Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22 

23__all__ = ["makeTestRepo", "makeTestCollection", "addDatasetType", "expandUniqueId", "DatastoreMock", 

24 "addDataIdValue", 

25 ] 

26 

27import random 

28from typing import ( 

29 Any, 

30 Iterable, 

31 Mapping, 

32 Optional, 

33 Set, 

34 Tuple, 

35) 

36from unittest.mock import MagicMock 

37 

38import sqlalchemy 

39 

40from lsst.daf.butler import ( 

41 Butler, 

42 Config, 

43 DataCoordinate, 

44 DatasetRef, 

45 DatasetType, 

46 Dimension, 

47 DimensionUniverse, 

48 FileDataset, 

49 Registry, 

50) 

51 

52 

53def makeTestRepo(root: str, 

54 dataIds: Optional[Mapping[str, Iterable]] = None, *, 

55 config: Config = None, 

56 **kwargs) -> Butler: 

57 """Create an empty test repository. 

58 

59 Parameters 

60 ---------- 

61 root : `str` 

62 The location of the root directory for the repository. 

63 dataIds : `~collections.abc.Mapping` [`str`, `iterable`], optional 

64 A mapping keyed by the dimensions used in the test. Each value 

65 is an iterable of names for that dimension (e.g., detector IDs for 

66 `"detector"`). Related dimensions (e.g., instruments and detectors) 

67 are linked arbitrarily. This parameter is provided for compatibility 

68 with old code; newer code should make the repository, then call 

69 `~lsst.daf.butler.tests.addDataIdValue`. 

70 config : `lsst.daf.butler.Config`, optional 

71 A configuration for the repository (for details, see 

72 `lsst.daf.butler.Butler.makeRepo`). If omitted, creates a repository 

73 with default dataset and storage types, but optimized for speed. 

74 The defaults set ``.datastore.cls``, ``.datastore.checksum`` and 

75 ``.registry.db``. If a supplied config does not specify these values 

76 the internal defaults will be used to ensure that we have a usable 

77 configuration. 

78 **kwargs 

79 Extra arguments to `lsst.daf.butler.Butler.makeRepo`. 

80 

81 Returns 

82 ------- 

83 butler : `lsst.daf.butler.Butler` 

84 A Butler referring to the new repository. This Butler is provided only 

85 for additional setup; to keep test cases isolated, it is highly 

86 recommended that each test create its own Butler with a 

87 unique run/collection. See `makeTestCollection`. 

88 

89 Notes 

90 ----- 

91 This function provides a "quick and dirty" repository for simple unit 

92 tests that don't depend on complex data relationships. It is ill-suited 

93 for tests where the structure of the data matters. If you need such a 

94 dataset, create it directly or use a saved test dataset. 

95 """ 

96 defaults = Config() 

97 defaults["datastore", "cls"] = "lsst.daf.butler.datastores.inMemoryDatastore.InMemoryDatastore" 

98 defaults["datastore", "checksum"] = False # In case of future changes 

99 defaults["registry", "db"] = "sqlite:///<butlerRoot>/gen3.sqlite3" 

100 

101 if config: 

102 defaults.update(config) 

103 

104 if not dataIds: 

105 dataIds = {} 

106 

107 # Disable config root by default so that our registry override will 

108 # not be ignored. 

109 # newConfig guards against location-related keywords like outfile 

110 newConfig = Butler.makeRepo(root, config=defaults, forceConfigRoot=False, **kwargs) 

111 butler = Butler(newConfig, writeable=True) 

112 dimensionRecords = _makeRecords(dataIds, butler.registry.dimensions) 

113 for dimension, records in dimensionRecords.items(): 

114 butler.registry.insertDimensionData(dimension, *records) 

115 return butler 

116 

117 

118def makeTestCollection(repo: Butler, uniqueId: Optional[str] = None) -> Butler: 

119 """Create a read/write Butler to a fresh collection. 

120 

121 Parameters 

122 ---------- 

123 repo : `lsst.daf.butler.Butler` 

124 A previously existing Butler to a repository, such as that returned by 

125 `~lsst.daf.butler.Butler.makeRepo` or `makeTestRepo`. 

126 uniqueId : `str`, optional 

127 A collection ID guaranteed by external code to be unique across all 

128 calls to ``makeTestCollection`` for the same repository. 

129 

130 Returns 

131 ------- 

132 butler : `lsst.daf.butler.Butler` 

133 A Butler referring to a new collection in the repository at ``root``. 

134 The collection is (almost) guaranteed to be new. 

135 

136 Notes 

137 ----- 

138 This function creates a single run collection that does not necessarily 

139 conform to any repository conventions. It is only suitable for creating an 

140 isolated test area, and not for repositories intended for real data 

141 processing or analysis. 

142 """ 

143 if not uniqueId: 

144 # Create a "random" collection name 

145 # Speed matters more than cryptographic guarantees 

146 uniqueId = str(random.randrange(1_000_000_000)) 

147 collection = "test_" + uniqueId 

148 return Butler(butler=repo, run=collection) 

149 

150 

151def _makeRecords(dataIds: Mapping[str, Iterable], 

152 universe: DimensionUniverse) -> Mapping[str, Iterable]: 

153 """Create cross-linked dimension records from a collection of 

154 data ID values. 

155 

156 Parameters 

157 ---------- 

158 dataIds : `~collections.abc.Mapping` [`str`, `iterable`] 

159 A mapping keyed by the dimensions of interest. Each value is an 

160 iterable of names for that dimension (e.g., detector IDs for 

161 `"detector"`). 

162 universe : lsst.daf.butler.DimensionUniverse 

163 Set of all known dimensions and their relationships. 

164 

165 Returns 

166 ------- 

167 dataIds : `~collections.abc.Mapping` [`str`, `iterable`] 

168 A mapping keyed by the dimensions of interest, giving one 

169 `~lsst.daf.butler.DimensionRecord` for each input name. Related 

170 dimensions (e.g., instruments and detectors) are linked arbitrarily. 

171 """ 

172 expandedIds = {} 

173 # Provide alternate keys like detector names 

174 for name, values in dataIds.items(): 

175 expandedIds[name] = [] 

176 dimension = universe[name] 

177 for value in values: 

178 expandedIds[name].append(_fillAllKeys(dimension, value)) 

179 

180 # Pick cross-relationships arbitrarily 

181 for name, values in expandedIds.items(): 

182 dimension = universe[name] 

183 for value in values: 

184 for other in dimension.required: 

185 if other != dimension: 

186 relation = expandedIds[other.name][0] 

187 value[other.name] = relation[other.primaryKey.name] 

188 # Do not recurse, to keep the user from having to provide 

189 # irrelevant dimensions 

190 for other in dimension.implied: 

191 if other != dimension and other.name in expandedIds and other.viewOf is None: 

192 relation = expandedIds[other.name][0] 

193 value[other.name] = relation[other.primaryKey.name] 

194 

195 return {dimension: [universe[dimension].RecordClass(**value) for value in values] 

196 for dimension, values in expandedIds.items()} 

197 

198 

199def _fillAllKeys(dimension: Dimension, value: Any) -> Mapping[str, Any]: 

200 """Create an arbitrary mapping of all required keys for a given dimension 

201 that do not refer to other dimensions. 

202 

203 Parameters 

204 ---------- 

205 dimension : `lsst.daf.butler.Dimension` 

206 The dimension for which to generate a set of keys (e.g., detector). 

207 value 

208 The value assigned to ``dimension`` (e.g., detector ID). 

209 

210 Returns 

211 ------- 

212 expandedValue : `dict` [`str`] 

213 A mapping of dimension keys to values. ``dimension's`` primary key 

214 maps to ``value``, but all other mappings (e.g., detector name) 

215 are arbitrary. 

216 """ 

217 expandedValue = {} 

218 for key in dimension.uniqueKeys: 

219 if key.nbytes: 

220 castType = bytes 

221 else: 

222 castType = key.dtype().python_type 

223 try: 

224 castValue = castType(value) 

225 except TypeError: 

226 castValue = castType() 

227 expandedValue[key.name] = castValue 

228 for key in dimension.metadata: 

229 if not key.nullable: 

230 expandedValue[key.name] = key.dtype().python_type(value) 

231 return expandedValue 

232 

233 

234def _matchAnyDataId(record: Mapping[str, Any], registry: Registry, dimension: Dimension): 

235 """Matches a partial dimension record to an existing record along a 

236 specific dimension. 

237 

238 Parameters 

239 ---------- 

240 record : `dict` [`str`] 

241 A mapping representing the record to be matched. 

242 registry : `lsst.daf.butler.Registry` 

243 The registry with all known dimension records. 

244 dimension : `lsst.daf.butler.Dimension` 

245 The dimension on which to find a match for ``record``. 

246 

247 Raises 

248 ------ 

249 RuntimeError 

250 Raised if there are no existing records for ``dimension``. 

251 """ 

252 matches = list(registry.queryDimensionRecords(dimension.name)) 

253 if matches: 

254 record[dimension.name] = matches[0].dataId[dimension.name] 

255 else: 

256 raise RuntimeError(f"No matching values for {dimension.name} found.") 

257 

258 

259def _fillRelationships(dimension: Dimension, 

260 dimensionInfo: Mapping[str, Any], 

261 existing: Registry) -> Mapping[str, Any]: 

262 """Create arbitrary mappings from one dimension to all dimensions it 

263 depends on. 

264 

265 Parameters 

266 ---------- 

267 dimension : `lsst.daf.butler.Dimension` 

268 The dimension for which to generate relationships. 

269 dimensionInfo : `dict` [`str`] 

270 A mapping of dimension keys to values. 

271 existing : `lsst.daf.butler.Registry` 

272 The registry with all previously registered dimensions. 

273 

274 Returns 

275 ------- 

276 filledInfo : `dict` [`str`] 

277 A version of ``dimensionInfo`` with extra mappings for any 

278 relationships required by ``dimension``. Any relationships already 

279 defined in ``dimensionInfo`` are preserved. 

280 

281 Raises 

282 ------ 

283 ValueError 

284 Raised if ``dimension`` depends on a dimension for which no values 

285 exist yet. 

286 """ 

287 filledInfo = dimensionInfo.copy() 

288 for other in dimension.required: 

289 if other != dimension and other.name not in filledInfo: 

290 _matchAnyDataId(filledInfo, existing, other) 

291 # Do not recurse, to keep the user from having to provide 

292 # irrelevant dimensions. 

293 for other in dimension.implied: 

294 toUpdate = other != dimension and other.name not in filledInfo 

295 updatable = other.viewOf is None 

296 # Do not run query if either toUpdate or updatable is false 

297 if toUpdate and updatable and list(existing.queryDimensionRecords(other)): 

298 _matchAnyDataId(filledInfo, existing, other) 

299 return filledInfo 

300 

301 

302def expandUniqueId(butler: Butler, partialId: Mapping[str, Any]) -> DataCoordinate: 

303 """Return a complete data ID matching some criterion. 

304 

305 Parameters 

306 ---------- 

307 butler : `lsst.daf.butler.Butler` 

308 The repository to query. 

309 partialId : `~collections.abc.Mapping` [`str`] 

310 A mapping of known dimensions and values. 

311 

312 Returns 

313 ------- 

314 dataId : `lsst.daf.butler.DataCoordinate` 

315 The unique data ID that matches ``partialId``. 

316 

317 Raises 

318 ------ 

319 ValueError 

320 Raised if ``partialId`` does not uniquely identify a data ID. 

321 

322 Notes 

323 ----- 

324 This method will only work correctly if all dimensions attached to the 

325 target dimension (eg., "physical_filter" for "visit") are known to the 

326 repository, even if they're not needed to identify a dataset. This function 

327 is only suitable for certain kinds of test repositories, and not for 

328 repositories intended for real data processing or analysis. 

329 

330 Examples 

331 -------- 

332 .. code-block:: py 

333 

334 >>> butler = makeTestRepo( 

335 "testdir", {"instrument": ["notACam"], "detector": [1]}) 

336 >>> expandUniqueId(butler, {"detector": 1}) 

337 DataCoordinate({instrument, detector}, ('notACam', 1)) 

338 """ 

339 # The example is *not* a doctest because it requires dangerous I/O 

340 registry = butler.registry 

341 dimensions = registry.dimensions.extract(partialId.keys()).required 

342 

343 query = " AND ".join(f"{dimension} = {value!r}" for dimension, value in partialId.items()) 

344 

345 # Much of the purpose of this function is to do something we explicitly 

346 # reject most of the time: query for a governor dimension (e.g. instrument) 

347 # given something that depends on it (e.g. visit), hence check=False. 

348 dataId = list(registry.queryDataIds(dimensions, where=query, check=False)) 

349 if len(dataId) == 1: 

350 return dataId[0] 

351 else: 

352 raise ValueError(f"Found {len(dataId)} matches for {partialId}, expected 1.") 

353 

354 

355def addDataIdValue(butler: Butler, dimension: str, value: Any, **related: Any): 

356 """Add a new data ID to a repository. 

357 

358 Related dimensions (e.g., the instrument associated with a detector) may 

359 be specified using ``related``. While these keywords are sometimes needed 

360 to get self-consistent repositories, you do not need to define 

361 relationships you do not use. Any unspecified dimensions will be 

362 linked arbitrarily. 

363 

364 Parameters 

365 ---------- 

366 butler : `lsst.daf.butler.Butler` 

367 The repository to update. 

368 dimension : `str` 

369 The name of the dimension to gain a new value. 

370 value 

371 The value to register for the dimension. 

372 **related 

373 Any existing dimensions to be linked to ``value``. 

374 

375 Notes 

376 ----- 

377 Because this function creates filler data, it is only suitable for test 

378 repositories. It should not be used for repositories intended for real data 

379 processing or analysis, which have known dimension values. 

380 

381 Examples 

382 -------- 

383 

384 See the guide on :ref:`using-butler-in-tests-make-repo` for usage examples. 

385 """ 

386 # Example is not doctest, because it's probably unsafe to create even an 

387 # in-memory butler in that environment. 

388 try: 

389 fullDimension = butler.registry.dimensions[dimension] 

390 except KeyError as e: 

391 raise ValueError from e 

392 # Bad keys ignored by registry code 

393 extraKeys = related.keys() - (fullDimension.required | fullDimension.implied) 

394 if extraKeys: 

395 raise ValueError(f"Unexpected keywords {extraKeys} not found " 

396 f"in {fullDimension.required | fullDimension.implied}") 

397 

398 # Define secondary keys (e.g., detector name given detector id) 

399 expandedValue = _fillAllKeys(fullDimension, value) 

400 expandedValue.update(**related) 

401 completeValue = _fillRelationships(fullDimension, expandedValue, butler.registry) 

402 

403 dimensionRecord = fullDimension.RecordClass(**completeValue) 

404 try: 

405 butler.registry.syncDimensionData(dimension, dimensionRecord) 

406 except sqlalchemy.exc.IntegrityError as e: 

407 raise RuntimeError("Could not create data ID value. Automatic relationship generation " 

408 "may have failed; try adding keywords to assign a specific instrument, " 

409 "physical_filter, etc. based on the nested exception message.") from e 

410 

411 

412def addDatasetType(butler: Butler, name: str, dimensions: Set[str], storageClass: str) -> DatasetType: 

413 """Add a new dataset type to a repository. 

414 

415 Parameters 

416 ---------- 

417 butler : `lsst.daf.butler.Butler` 

418 The repository to update. 

419 name : `str` 

420 The name of the dataset type. 

421 dimensions : `set` [`str`] 

422 The dimensions of the new dataset type. 

423 storageClass : `str` 

424 The storage class the dataset will use. 

425 

426 Returns 

427 ------- 

428 datasetType : `lsst.daf.butler.DatasetType` 

429 The new type. 

430 

431 Raises 

432 ------ 

433 ValueError 

434 Raised if the dimensions or storage class is invalid. 

435 

436 Notes 

437 ----- 

438 Dataset types are shared across all collections in a repository, so this 

439 function does not need to be run for each collection. 

440 """ 

441 try: 

442 datasetType = DatasetType(name, dimensions, storageClass, 

443 universe=butler.registry.dimensions) 

444 butler.registry.registerDatasetType(datasetType) 

445 return datasetType 

446 except KeyError as e: 

447 raise ValueError from e 

448 

449 

450class DatastoreMock: 

451 """Mocks a butler datastore. 

452 

453 Has functions that mock the datastore in a butler. Provides an `apply` 

454 function to replace the relevent butler datastore functions with the mock 

455 functions. 

456 """ 

457 

458 @staticmethod 

459 def apply(butler): 

460 """Apply datastore mocks to a butler.""" 

461 butler.datastore.export = DatastoreMock._mock_export 

462 butler.datastore.get = DatastoreMock._mock_get 

463 butler.datastore.ingest = MagicMock() 

464 

465 @staticmethod 

466 def _mock_export(refs: Iterable[DatasetRef], *, 

467 directory: Optional[str] = None, 

468 transfer: Optional[str] = None) -> Iterable[FileDataset]: 

469 """A mock of `Datastore.export` that satisfies the requirement that 

470 the refs passed in are included in the `FileDataset` objects 

471 returned. 

472 

473 This can be used to construct a `Datastore` mock that can be used 

474 in repository export via:: 

475 

476 datastore = unittest.mock.Mock(spec=Datastore) 

477 datastore.export = DatastoreMock._mock_export 

478 

479 """ 

480 for ref in refs: 

481 yield FileDataset(refs=[ref], 

482 path="mock/path", 

483 formatter="lsst.daf.butler.formatters.json.JsonFormatter") 

484 

485 @staticmethod 

486 def _mock_get(ref: DatasetRef, parameters: Optional[Mapping[str, Any]] = None 

487 ) -> Tuple[int, Optional[Mapping[str, Any]]]: 

488 """A mock of `Datastore.get` that just returns the integer dataset ID 

489 value and parameters it was given. 

490 """ 

491 return (ref.id, parameters)