Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22 

23__all__ = ["makeTestRepo", "makeTestCollection", "addDatasetType", "expandUniqueId", "DatastoreMock", 

24 "addDataIdValue", 

25 ] 

26 

27import random 

28from typing import ( 

29 Any, 

30 Iterable, 

31 Mapping, 

32 Optional, 

33 Set, 

34 Tuple, 

35) 

36from unittest.mock import MagicMock 

37 

38import sqlalchemy 

39 

40from lsst.daf.butler import ( 

41 Butler, 

42 Config, 

43 DataCoordinate, 

44 DatasetRef, 

45 DatasetType, 

46 Dimension, 

47 DimensionUniverse, 

48 FileDataset, 

49 Registry, 

50) 

51 

52 

53def makeTestRepo(root: str, 

54 dataIds: Optional[Mapping[str, Iterable]] = None, *, 

55 config: Config = None, 

56 **kwargs) -> Butler: 

57 """Create an empty test repository. 

58 

59 Parameters 

60 ---------- 

61 root : `str` 

62 The location of the root directory for the repository. 

63 dataIds : `~collections.abc.Mapping` [`str`, `iterable`], optional 

64 A mapping keyed by the dimensions used in the test. Each value 

65 is an iterable of names for that dimension (e.g., detector IDs for 

66 `"detector"`). Related dimensions (e.g., instruments and detectors) 

67 are linked arbitrarily. This parameter is provided for compatibility 

68 with old code; newer code should make the repository, then call 

69 `~lsst.daf.butler.tests.addDataIdValue`. 

70 config : `lsst.daf.butler.Config`, optional 

71 A configuration for the repository (for details, see 

72 `lsst.daf.butler.Butler.makeRepo`). If omitted, creates a repository 

73 with default dataset and storage types, but optimized for speed. 

74 The defaults set ``.datastore.cls``, ``.datastore.checksum`` and 

75 ``.registry.db``. If a supplied config does not specify these values 

76 the internal defaults will be used to ensure that we have a usable 

77 configuration. 

78 **kwargs 

79 Extra arguments to `lsst.daf.butler.Butler.makeRepo`. 

80 

81 Returns 

82 ------- 

83 butler : `lsst.daf.butler.Butler` 

84 A Butler referring to the new repository. This Butler is provided only 

85 for additional setup; to keep test cases isolated, it is highly 

86 recommended that each test create its own Butler with a 

87 unique run/collection. See `makeTestCollection`. 

88 

89 Notes 

90 ----- 

91 This function provides a "quick and dirty" repository for simple unit 

92 tests that don't depend on complex data relationships. It is ill-suited 

93 for tests where the structure of the data matters. If you need such a 

94 dataset, create it directly or use a saved test dataset. 

95 """ 

96 defaults = Config() 

97 defaults["datastore", "cls"] = "lsst.daf.butler.datastores.inMemoryDatastore.InMemoryDatastore" 

98 defaults["datastore", "checksum"] = False # In case of future changes 

99 defaults["registry", "db"] = "sqlite:///<butlerRoot>/gen3.sqlite3" 

100 

101 if config: 

102 defaults.update(config) 

103 

104 if not dataIds: 

105 dataIds = {} 

106 

107 # Disable config root by default so that our registry override will 

108 # not be ignored. 

109 # newConfig guards against location-related keywords like outfile 

110 newConfig = Butler.makeRepo(root, config=defaults, forceConfigRoot=False, **kwargs) 

111 butler = Butler(newConfig, writeable=True) 

112 dimensionRecords = _makeRecords(dataIds, butler.registry.dimensions) 

113 for dimension, records in dimensionRecords.items(): 

114 butler.registry.insertDimensionData(dimension, *records) 

115 return butler 

116 

117 

118def makeTestCollection(repo: Butler) -> Butler: 

119 """Create a read/write Butler to a fresh collection. 

120 

121 Parameters 

122 ---------- 

123 repo : `lsst.daf.butler.Butler` 

124 A previously existing Butler to a repository, such as that returned by 

125 `~lsst.daf.butler.Butler.makeRepo` or `makeTestRepo`. 

126 

127 Returns 

128 ------- 

129 butler : `lsst.daf.butler.Butler` 

130 A Butler referring to a new collection in the repository at ``root``. 

131 The collection is (almost) guaranteed to be new. 

132 

133 Notes 

134 ----- 

135 This function creates a single run collection that does not necessarily 

136 conform to any repository conventions. It is only suitable for creating an 

137 isolated test area, and not for repositories intended for real data 

138 processing or analysis. 

139 """ 

140 # Create a "random" collection name 

141 # Speed matters more than cryptographic guarantees 

142 collection = "test" + str(random.randrange(1_000_000_000)) 

143 return Butler(butler=repo, run=collection) 

144 

145 

146def _makeRecords(dataIds: Mapping[str, Iterable], 

147 universe: DimensionUniverse) -> Mapping[str, Iterable]: 

148 """Create cross-linked dimension records from a collection of 

149 data ID values. 

150 

151 Parameters 

152 ---------- 

153 dataIds : `~collections.abc.Mapping` [`str`, `iterable`] 

154 A mapping keyed by the dimensions of interest. Each value is an 

155 iterable of names for that dimension (e.g., detector IDs for 

156 `"detector"`). 

157 universe : lsst.daf.butler.DimensionUniverse 

158 Set of all known dimensions and their relationships. 

159 

160 Returns 

161 ------- 

162 dataIds : `~collections.abc.Mapping` [`str`, `iterable`] 

163 A mapping keyed by the dimensions of interest, giving one 

164 `~lsst.daf.butler.DimensionRecord` for each input name. Related 

165 dimensions (e.g., instruments and detectors) are linked arbitrarily. 

166 """ 

167 expandedIds = {} 

168 # Provide alternate keys like detector names 

169 for name, values in dataIds.items(): 

170 expandedIds[name] = [] 

171 dimension = universe[name] 

172 for value in values: 

173 expandedIds[name].append(_fillAllKeys(dimension, value)) 

174 

175 # Pick cross-relationships arbitrarily 

176 for name, values in expandedIds.items(): 

177 dimension = universe[name] 

178 for value in values: 

179 for other in dimension.required: 

180 if other != dimension: 

181 relation = expandedIds[other.name][0] 

182 value[other.name] = relation[other.primaryKey.name] 

183 # Do not recurse, to keep the user from having to provide 

184 # irrelevant dimensions 

185 for other in dimension.implied: 

186 if other != dimension and other.name in expandedIds and other.viewOf is None: 

187 relation = expandedIds[other.name][0] 

188 value[other.name] = relation[other.primaryKey.name] 

189 

190 return {dimension: [universe[dimension].RecordClass(**value) for value in values] 

191 for dimension, values in expandedIds.items()} 

192 

193 

194def _fillAllKeys(dimension: Dimension, value: Any) -> Mapping[str, Any]: 

195 """Create an arbitrary mapping of all required keys for a given dimension 

196 that do not refer to other dimensions. 

197 

198 Parameters 

199 ---------- 

200 dimension : `lsst.daf.butler.Dimension` 

201 The dimension for which to generate a set of keys (e.g., detector). 

202 value 

203 The value assigned to ``dimension`` (e.g., detector ID). 

204 

205 Returns 

206 ------- 

207 expandedValue : `dict` [`str`] 

208 A mapping of dimension keys to values. ``dimension's`` primary key 

209 maps to ``value``, but all other mappings (e.g., detector name) 

210 are arbitrary. 

211 """ 

212 expandedValue = {} 

213 for key in dimension.uniqueKeys: 

214 if key.nbytes: 

215 castType = bytes 

216 else: 

217 castType = key.dtype().python_type 

218 try: 

219 castValue = castType(value) 

220 except TypeError: 

221 castValue = castType() 

222 expandedValue[key.name] = castValue 

223 for key in dimension.metadata: 

224 if not key.nullable: 

225 expandedValue[key.name] = key.dtype().python_type(value) 

226 return expandedValue 

227 

228 

229def _matchAnyDataId(record: Mapping[str, Any], registry: Registry, dimension: Dimension): 

230 """Matches a partial dimension record to an existing record along a 

231 specific dimension. 

232 

233 Parameters 

234 ---------- 

235 record : `dict` [`str`] 

236 A mapping representing the record to be matched. 

237 registry : `lsst.daf.butler.Registry` 

238 The registry with all known dimension records. 

239 dimension : `lsst.daf.butler.Dimension` 

240 The dimension on which to find a match for ``record``. 

241 

242 Raises 

243 ------ 

244 RuntimeError 

245 Raised if there are no existing records for ``dimension``. 

246 """ 

247 matches = list(registry.queryDimensionRecords(dimension.name)) 

248 if matches: 

249 record[dimension.name] = matches[0].dataId[dimension.name] 

250 else: 

251 raise RuntimeError(f"No matching values for {dimension.name} found.") 

252 

253 

254def _fillRelationships(dimension: Dimension, 

255 dimensionInfo: Mapping[str, Any], 

256 existing: Registry) -> Mapping[str, Any]: 

257 """Create arbitrary mappings from one dimension to all dimensions it 

258 depends on. 

259 

260 Parameters 

261 ---------- 

262 dimension : `lsst.daf.butler.Dimension` 

263 The dimension for which to generate relationships. 

264 dimensionInfo : `dict` [`str`] 

265 A mapping of dimension keys to values. 

266 existing : `lsst.daf.butler.Registry` 

267 The registry with all previously registered dimensions. 

268 

269 Returns 

270 ------- 

271 filledInfo : `dict` [`str`] 

272 A version of ``dimensionInfo`` with extra mappings for any 

273 relationships required by ``dimension``. Any relationships already 

274 defined in ``dimensionInfo`` are preserved. 

275 

276 Raises 

277 ------ 

278 ValueError 

279 Raised if ``dimension`` depends on a dimension for which no values 

280 exist yet. 

281 """ 

282 filledInfo = dimensionInfo.copy() 

283 for other in dimension.required: 

284 if other != dimension and other.name not in filledInfo: 

285 _matchAnyDataId(filledInfo, existing, other) 

286 # Do not recurse, to keep the user from having to provide 

287 # irrelevant dimensions. 

288 for other in dimension.implied: 

289 toUpdate = other != dimension and other.name not in filledInfo 

290 updatable = other.viewOf is None 

291 # Do not run query if either toUpdate or updatable is false 

292 if toUpdate and updatable and list(existing.queryDimensionRecords(other)): 

293 _matchAnyDataId(filledInfo, existing, other) 

294 return filledInfo 

295 

296 

297def expandUniqueId(butler: Butler, partialId: Mapping[str, Any]) -> DataCoordinate: 

298 """Return a complete data ID matching some criterion. 

299 

300 Parameters 

301 ---------- 

302 butler : `lsst.daf.butler.Butler` 

303 The repository to query. 

304 partialId : `~collections.abc.Mapping` [`str`] 

305 A mapping of known dimensions and values. 

306 

307 Returns 

308 ------- 

309 dataId : `lsst.daf.butler.DataCoordinate` 

310 The unique data ID that matches ``partialId``. 

311 

312 Raises 

313 ------ 

314 ValueError 

315 Raised if ``partialId`` does not uniquely identify a data ID. 

316 

317 Notes 

318 ----- 

319 This method will only work correctly if all dimensions attached to the 

320 target dimension (eg., "physical_filter" for "visit") are known to the 

321 repository, even if they're not needed to identify a dataset. This function 

322 is only suitable for certain kinds of test repositories, and not for 

323 repositories intended for real data processing or analysis. 

324 

325 Examples 

326 -------- 

327 .. code-block:: py 

328 

329 >>> butler = makeTestRepo( 

330 "testdir", {"instrument": ["notACam"], "detector": [1]}) 

331 >>> expandUniqueId(butler, {"detector": 1}) 

332 DataCoordinate({instrument, detector}, ('notACam', 1)) 

333 """ 

334 # The example is *not* a doctest because it requires dangerous I/O 

335 registry = butler.registry 

336 dimensions = registry.dimensions.extract(partialId.keys()).required 

337 

338 query = " AND ".join(f"{dimension} = {value!r}" for dimension, value in partialId.items()) 

339 

340 # Much of the purpose of this function is to do something we explicitly 

341 # reject most of the time: query for a governor dimension (e.g. instrument) 

342 # given something that depends on it (e.g. visit), hence check=False. 

343 dataId = list(registry.queryDataIds(dimensions, where=query, check=False)) 

344 if len(dataId) == 1: 

345 return dataId[0] 

346 else: 

347 raise ValueError(f"Found {len(dataId)} matches for {partialId}, expected 1.") 

348 

349 

350def addDataIdValue(butler: Butler, dimension: str, value: Any, **related: Any): 

351 """Add a new data ID to a repository. 

352 

353 Related dimensions (e.g., the instrument associated with a detector) may 

354 be specified using ``related``. While these keywords are sometimes needed 

355 to get self-consistent repositories, you do not need to define 

356 relationships you do not use. Any unspecified dimensions will be 

357 linked arbitrarily. 

358 

359 Parameters 

360 ---------- 

361 butler : `lsst.daf.butler.Butler` 

362 The repository to update. 

363 dimension : `str` 

364 The name of the dimension to gain a new value. 

365 value 

366 The value to register for the dimension. 

367 **related 

368 Any existing dimensions to be linked to ``value``. 

369 

370 Notes 

371 ----- 

372 Because this function creates filler data, it is only suitable for test 

373 repositories. It should not be used for repositories intended for real data 

374 processing or analysis, which have known dimension values. 

375 

376 Examples 

377 -------- 

378 

379 See the guide on :ref:`using-butler-in-tests-make-repo` for usage examples. 

380 """ 

381 # Example is not doctest, because it's probably unsafe to create even an 

382 # in-memory butler in that environment. 

383 try: 

384 fullDimension = butler.registry.dimensions[dimension] 

385 except KeyError as e: 

386 raise ValueError from e 

387 # Bad keys ignored by registry code 

388 extraKeys = related.keys() - (fullDimension.required | fullDimension.implied) 

389 if extraKeys: 

390 raise ValueError(f"Unexpected keywords {extraKeys} not found " 

391 f"in {fullDimension.required | fullDimension.implied}") 

392 

393 # Define secondary keys (e.g., detector name given detector id) 

394 expandedValue = _fillAllKeys(fullDimension, value) 

395 expandedValue.update(**related) 

396 completeValue = _fillRelationships(fullDimension, expandedValue, butler.registry) 

397 

398 dimensionRecord = fullDimension.RecordClass(**completeValue) 

399 try: 

400 butler.registry.syncDimensionData(dimension, dimensionRecord) 

401 except sqlalchemy.exc.IntegrityError as e: 

402 raise RuntimeError("Could not create data ID value. Automatic relationship generation " 

403 "may have failed; try adding keywords to assign a specific instrument, " 

404 "physical_filter, etc. based on the nested exception message.") from e 

405 

406 

407def addDatasetType(butler: Butler, name: str, dimensions: Set[str], storageClass: str) -> DatasetType: 

408 """Add a new dataset type to a repository. 

409 

410 Parameters 

411 ---------- 

412 butler : `lsst.daf.butler.Butler` 

413 The repository to update. 

414 name : `str` 

415 The name of the dataset type. 

416 dimensions : `set` [`str`] 

417 The dimensions of the new dataset type. 

418 storageClass : `str` 

419 The storage class the dataset will use. 

420 

421 Returns 

422 ------- 

423 datasetType : `lsst.daf.butler.DatasetType` 

424 The new type. 

425 

426 Raises 

427 ------ 

428 ValueError 

429 Raised if the dimensions or storage class is invalid. 

430 

431 Notes 

432 ----- 

433 Dataset types are shared across all collections in a repository, so this 

434 function does not need to be run for each collection. 

435 """ 

436 try: 

437 datasetType = DatasetType(name, dimensions, storageClass, 

438 universe=butler.registry.dimensions) 

439 butler.registry.registerDatasetType(datasetType) 

440 return datasetType 

441 except KeyError as e: 

442 raise ValueError from e 

443 

444 

445class DatastoreMock: 

446 """Mocks a butler datastore. 

447 

448 Has functions that mock the datastore in a butler. Provides an `apply` 

449 function to replace the relevent butler datastore functions with the mock 

450 functions. 

451 """ 

452 

453 @staticmethod 

454 def apply(butler): 

455 """Apply datastore mocks to a butler.""" 

456 butler.datastore.export = DatastoreMock._mock_export 

457 butler.datastore.get = DatastoreMock._mock_get 

458 butler.datastore.ingest = MagicMock() 

459 

460 @staticmethod 

461 def _mock_export(refs: Iterable[DatasetRef], *, 

462 directory: Optional[str] = None, 

463 transfer: Optional[str] = None) -> Iterable[FileDataset]: 

464 """A mock of `Datastore.export` that satisfies the requirement that 

465 the refs passed in are included in the `FileDataset` objects 

466 returned. 

467 

468 This can be used to construct a `Datastore` mock that can be used 

469 in repository export via:: 

470 

471 datastore = unittest.mock.Mock(spec=Datastore) 

472 datastore.export = DatastoreMock._mock_export 

473 

474 """ 

475 for ref in refs: 

476 yield FileDataset(refs=[ref], 

477 path="mock/path", 

478 formatter="lsst.daf.butler.formatters.json.JsonFormatter") 

479 

480 @staticmethod 

481 def _mock_get(ref: DatasetRef, parameters: Optional[Mapping[str, Any]] = None 

482 ) -> Tuple[int, Optional[Mapping[str, Any]]]: 

483 """A mock of `Datastore.get` that just returns the integer dataset ID 

484 value and parameters it was given. 

485 """ 

486 return (ref.id, parameters)