Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22 

23__all__ = ["makeTestRepo", "makeTestCollection", "addDatasetType", "expandUniqueId", "DatastoreMock"] 

24 

25import random 

26from typing import ( 

27 Any, 

28 Iterable, 

29 Mapping, 

30 Optional, 

31 Tuple, 

32) 

33from unittest.mock import MagicMock 

34 

35from lsst.daf.butler import ( 

36 Butler, 

37 Config, 

38 DatasetRef, 

39 DatasetType, 

40 FileDataset, 

41) 

42 

43 

44def makeTestRepo(root, dataIds, *, config=None, **kwargs): 

45 """Create an empty repository with dummy data IDs. 

46 

47 Parameters 

48 ---------- 

49 root : `str` 

50 The location of the root directory for the repository. 

51 dataIds : `~collections.abc.Mapping` [`str`, `iterable`] 

52 A mapping keyed by the dimensions used in the test. Each value 

53 is an iterable of names for that dimension (e.g., detector IDs for 

54 `"detector"`). Related dimensions (e.g., instruments and detectors) 

55 are linked arbitrarily. 

56 config : `lsst.daf.butler.Config`, optional 

57 A configuration for the repository (for details, see 

58 `lsst.daf.butler.Butler.makeRepo`). If omitted, creates a repository 

59 with default dataset and storage types, but optimized for speed. 

60 The defaults set ``.datastore.cls``, ``.datastore.checksum`` and 

61 ``.registry.db``. If a supplied config does not specify these values 

62 the internal defaults will be used to ensure that we have a usable 

63 configuration. 

64 **kwargs 

65 Extra arguments to `lsst.daf.butler.Butler.makeRepo`. 

66 

67 Returns 

68 ------- 

69 butler : `lsst.daf.butler.Butler` 

70 A Butler referring to the new repository. This Butler is provided only 

71 for additional setup; to keep test cases isolated, it is highly 

72 recommended that each test create its own Butler with a 

73 unique run/collection. See `makeTestCollection`. 

74 

75 Notes 

76 ----- 

77 This function provides a "quick and dirty" repository for simple unit 

78 tests that don't depend on complex data relationships. Because it assigns 

79 dimension relationships and other metadata abitrarily, it is ill-suited 

80 for tests where the structure of the data matters. If you need such a 

81 dataset, create it directly or use a saved test dataset. 

82 

83 Since the values in ``dataIds`` uniquely determine the repository's 

84 data IDs, the fully linked IDs can be recovered by calling 

85 `expandUniqueId`, so long as no other code has inserted dimensions into 

86 the repository registry. 

87 """ 

88 defaults = Config() 

89 defaults["datastore", "cls"] = "lsst.daf.butler.datastores.inMemoryDatastore.InMemoryDatastore" 

90 defaults["datastore", "checksum"] = False # In case of future changes 

91 defaults["registry", "db"] = "sqlite:///<butlerRoot>/gen3.sqlite3" 

92 

93 if config: 

94 defaults.update(config) 

95 

96 # Disable config root by default so that our registry override will 

97 # not be ignored. 

98 # newConfig guards against location-related keywords like outfile 

99 newConfig = Butler.makeRepo(root, config=defaults, forceConfigRoot=False, **kwargs) 

100 butler = Butler(newConfig, writeable=True) 

101 dimensionRecords = _makeRecords(dataIds, butler.registry.dimensions) 

102 for dimension, records in dimensionRecords.items(): 

103 butler.registry.insertDimensionData(dimension, *records) 

104 return butler 

105 

106 

107def makeTestCollection(repo): 

108 """Create a read/write Butler to a fresh collection. 

109 

110 Parameters 

111 ---------- 

112 repo : `lsst.daf.butler.Butler` 

113 A previously existing Butler to a repository, such as that returned by 

114 `~lsst.daf.butler.Butler.makeRepo` or `makeTestRepo`. 

115 

116 Returns 

117 ------- 

118 butler : `lsst.daf.butler.Butler` 

119 A Butler referring to a new collection in the repository at ``root``. 

120 The collection is (almost) guaranteed to be new. 

121 """ 

122 # Create a "random" collection name 

123 # Speed matters more than cryptographic guarantees 

124 collection = "test" + str(random.randrange(1_000_000_000)) 

125 return Butler(butler=repo, run=collection) 

126 

127 

128def _makeRecords(dataIds, universe): 

129 """Create cross-linked dimension records from a collection of 

130 data ID values. 

131 

132 Parameters 

133 ---------- 

134 dataIds : `~collections.abc.Mapping` [`str`, `iterable`] 

135 A mapping keyed by the dimensions of interest. Each value is an 

136 iterable of names for that dimension (e.g., detector IDs for 

137 `"detector"`). 

138 universe : lsst.daf.butler.DimensionUniverse 

139 Set of all known dimensions and their relationships. 

140 

141 Returns 

142 ------- 

143 dataIds : `~collections.abc.Mapping` [`str`, `iterable`] 

144 A mapping keyed by the dimensions of interest, giving one 

145 `~lsst.daf.butler.DimensionRecord` for each input name. Related 

146 dimensions (e.g., instruments and detectors) are linked arbitrarily. 

147 """ 

148 expandedIds = {} 

149 # Provide alternate keys like detector names 

150 for name, values in dataIds.items(): 

151 expandedIds[name] = [] 

152 dimension = universe[name] 

153 for value in values: 

154 expandedValue = {} 

155 for key in dimension.uniqueKeys: 

156 if key.nbytes: 

157 castType = bytes 

158 else: 

159 castType = key.dtype().python_type 

160 try: 

161 castValue = castType(value) 

162 except TypeError: 

163 castValue = castType() 

164 expandedValue[key.name] = castValue 

165 for key in dimension.metadata: 

166 if not key.nullable: 

167 expandedValue[key.name] = key.dtype().python_type(value) 

168 expandedIds[name].append(expandedValue) 

169 

170 # Pick cross-relationships arbitrarily 

171 for name, values in expandedIds.items(): 

172 dimension = universe[name] 

173 for value in values: 

174 for other in dimension.required: 

175 if other != dimension: 

176 relation = expandedIds[other.name][0] 

177 value[other.name] = relation[other.primaryKey.name] 

178 # Do not recurse, to keep the user from having to provide 

179 # irrelevant dimensions 

180 for other in dimension.implied: 

181 if other != dimension and other.name in expandedIds and other.viewOf is None: 

182 relation = expandedIds[other.name][0] 

183 value[other.name] = relation[other.primaryKey.name] 

184 

185 return {dimension: [universe[dimension].RecordClass(**value) for value in values] 

186 for dimension, values in expandedIds.items()} 

187 

188 

189def expandUniqueId(butler, partialId): 

190 """Return a complete data ID matching some criterion. 

191 

192 Parameters 

193 ---------- 

194 butler : `lsst.daf.butler.Butler` 

195 The repository to query. 

196 partialId : `~collections.abc.Mapping` [`str`, any] 

197 A mapping of known dimensions and values. 

198 

199 Returns 

200 ------- 

201 dataId : `lsst.daf.butler.DataCoordinate` 

202 The unique data ID that matches ``partialId``. 

203 

204 Raises 

205 ------ 

206 ValueError 

207 Raised if ``partialId`` does not uniquely identify a data ID. 

208 

209 Notes 

210 ----- 

211 This method will only work correctly if all dimensions attached to the 

212 target dimension (eg., "physical_filter" for "visit") are known to the 

213 repository, even if they're not needed to identify a dataset. 

214 

215 Examples 

216 -------- 

217 .. code-block:: py 

218 

219 >>> butler = makeTestRepo( 

220 "testdir", {"instrument": ["notACam"], "detector": [1]}) 

221 >>> expandUniqueId(butler, {"detector": 1}) 

222 DataCoordinate({instrument, detector}, ('notACam', 1)) 

223 """ 

224 # The example is *not* a doctest because it requires dangerous I/O 

225 registry = butler.registry 

226 dimensions = registry.dimensions.extract(partialId.keys()).required 

227 

228 query = " AND ".join(f"{dimension} = {value!r}" for dimension, value in partialId.items()) 

229 

230 # Much of the purpose of this function is to do something we explicitly 

231 # reject most of the time: query for a governor dimension (e.g. instrument) 

232 # given something that depends on it (e.g. visit), hence check=False. 

233 dataId = list(registry.queryDataIds(dimensions, where=query, check=False)) 

234 if len(dataId) == 1: 

235 return dataId[0] 

236 else: 

237 raise ValueError(f"Found {len(dataId)} matches for {partialId}, expected 1.") 

238 

239 

240def addDatasetType(butler, name, dimensions, storageClass): 

241 """Add a new dataset type to a repository. 

242 

243 Parameters 

244 ---------- 

245 butler : `lsst.daf.butler.Butler` 

246 The repository to update. 

247 name : `str` 

248 The name of the dataset type. 

249 dimensions : `set` [`str`] 

250 The dimensions of the new dataset type. 

251 storageClass : `str` 

252 The storage class the dataset will use. 

253 

254 Returns 

255 ------- 

256 datasetType : `lsst.daf.butler.DatasetType` 

257 The new type. 

258 

259 Raises 

260 ------ 

261 ValueError 

262 Raised if the dimensions or storage class is invalid. 

263 

264 Notes 

265 ----- 

266 Dataset types are shared across all collections in a repository, so this 

267 function does not need to be run for each collection. 

268 """ 

269 try: 

270 datasetType = DatasetType(name, dimensions, storageClass, 

271 universe=butler.registry.dimensions) 

272 butler.registry.registerDatasetType(datasetType) 

273 return datasetType 

274 except KeyError as e: 

275 raise ValueError from e 

276 

277 

278class DatastoreMock: 

279 """Mocks a butler datastore. 

280 

281 Has functions that mock the datastore in a butler. Provides an `apply` 

282 function to replace the relevent butler datastore functions with the mock 

283 functions. 

284 """ 

285 

286 @staticmethod 

287 def apply(butler): 

288 """Apply datastore mocks to a butler.""" 

289 butler.datastore.export = DatastoreMock._mock_export 

290 butler.datastore.get = DatastoreMock._mock_get 

291 butler.datastore.ingest = MagicMock() 

292 

293 @staticmethod 

294 def _mock_export(refs: Iterable[DatasetRef], *, 

295 directory: Optional[str] = None, 

296 transfer: Optional[str] = None) -> Iterable[FileDataset]: 

297 """A mock of `Datastore.export` that satisfies the requirement that 

298 the refs passed in are included in the `FileDataset` objects 

299 returned. 

300 

301 This can be used to construct a `Datastore` mock that can be used 

302 in repository export via:: 

303 

304 datastore = unittest.mock.Mock(spec=Datastore) 

305 datastore.export = DatastoreMock._mock_export 

306 

307 """ 

308 for ref in refs: 

309 yield FileDataset(refs=[ref], 

310 path="mock/path", 

311 formatter="lsst.daf.butler.formatters.json.JsonFormatter") 

312 

313 @staticmethod 

314 def _mock_get(ref: DatasetRef, parameters: Optional[Mapping[str, Any]] = None 

315 ) -> Tuple[int, Optional[Mapping[str, Any]]]: 

316 """A mock of `Datastore.get` that just returns the integer dataset ID 

317 value and parameters it was given. 

318 """ 

319 return (ref.id, parameters)