Coverage for python/lsst/daf/butler/tests/_testRepo.py: 14%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

120 statements  

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22 

23__all__ = [ 

24 "makeTestRepo", 

25 "makeTestCollection", 

26 "addDatasetType", 

27 "expandUniqueId", 

28 "DatastoreMock", 

29 "addDataIdValue", 

30] 

31 

32import random 

33from typing import Any, Iterable, Mapping, Optional, Set, Tuple 

34from unittest.mock import MagicMock 

35 

36import sqlalchemy 

37from lsst.daf.butler import ( 

38 Butler, 

39 Config, 

40 DataCoordinate, 

41 DatasetRef, 

42 DatasetType, 

43 Dimension, 

44 DimensionUniverse, 

45 FileDataset, 

46 Registry, 

47) 

48 

49 

50def makeTestRepo( 

51 root: str, dataIds: Optional[Mapping[str, Iterable]] = None, *, config: Config = None, **kwargs 

52) -> Butler: 

53 """Create an empty test repository. 

54 

55 Parameters 

56 ---------- 

57 root : `str` 

58 The location of the root directory for the repository. 

59 dataIds : `~collections.abc.Mapping` [`str`, `iterable`], optional 

60 A mapping keyed by the dimensions used in the test. Each value 

61 is an iterable of names for that dimension (e.g., detector IDs for 

62 `"detector"`). Related dimensions (e.g., instruments and detectors) 

63 are linked arbitrarily. This parameter is provided for compatibility 

64 with old code; newer code should make the repository, then call 

65 `~lsst.daf.butler.tests.addDataIdValue`. 

66 config : `lsst.daf.butler.Config`, optional 

67 A configuration for the repository (for details, see 

68 `lsst.daf.butler.Butler.makeRepo`). If omitted, creates a repository 

69 with default dataset and storage types, but optimized for speed. 

70 The defaults set ``.datastore.cls``, ``.datastore.checksum`` and 

71 ``.registry.db``. If a supplied config does not specify these values 

72 the internal defaults will be used to ensure that we have a usable 

73 configuration. 

74 **kwargs 

75 Extra arguments to `lsst.daf.butler.Butler.makeRepo`. 

76 

77 Returns 

78 ------- 

79 butler : `lsst.daf.butler.Butler` 

80 A Butler referring to the new repository. This Butler is provided only 

81 for additional setup; to keep test cases isolated, it is highly 

82 recommended that each test create its own Butler with a 

83 unique run/collection. See `makeTestCollection`. 

84 

85 Notes 

86 ----- 

87 This function provides a "quick and dirty" repository for simple unit 

88 tests that don't depend on complex data relationships. It is ill-suited 

89 for tests where the structure of the data matters. If you need such a 

90 dataset, create it directly or use a saved test dataset. 

91 """ 

92 defaults = Config() 

93 defaults["datastore", "cls"] = "lsst.daf.butler.datastores.inMemoryDatastore.InMemoryDatastore" 

94 defaults["datastore", "checksum"] = False # In case of future changes 

95 defaults["registry", "db"] = "sqlite:///<butlerRoot>/gen3.sqlite3" 

96 

97 if config: 

98 defaults.update(config) 

99 

100 if not dataIds: 

101 dataIds = {} 

102 

103 # Disable config root by default so that our registry override will 

104 # not be ignored. 

105 # newConfig guards against location-related keywords like outfile 

106 newConfig = Butler.makeRepo(root, config=defaults, forceConfigRoot=False, **kwargs) 

107 butler = Butler(newConfig, writeable=True) 

108 dimensionRecords = _makeRecords(dataIds, butler.registry.dimensions) 

109 for dimension, records in dimensionRecords.items(): 

110 butler.registry.insertDimensionData(dimension, *records) 

111 return butler 

112 

113 

114def makeTestCollection(repo: Butler, uniqueId: Optional[str] = None) -> Butler: 

115 """Create a read/write Butler to a fresh collection. 

116 

117 Parameters 

118 ---------- 

119 repo : `lsst.daf.butler.Butler` 

120 A previously existing Butler to a repository, such as that returned by 

121 `~lsst.daf.butler.Butler.makeRepo` or `makeTestRepo`. 

122 uniqueId : `str`, optional 

123 A collection ID guaranteed by external code to be unique across all 

124 calls to ``makeTestCollection`` for the same repository. 

125 

126 Returns 

127 ------- 

128 butler : `lsst.daf.butler.Butler` 

129 A Butler referring to a new collection in the repository at ``root``. 

130 The collection is (almost) guaranteed to be new. 

131 

132 Notes 

133 ----- 

134 This function creates a single run collection that does not necessarily 

135 conform to any repository conventions. It is only suitable for creating an 

136 isolated test area, and not for repositories intended for real data 

137 processing or analysis. 

138 """ 

139 if not uniqueId: 

140 # Create a "random" collection name 

141 # Speed matters more than cryptographic guarantees 

142 uniqueId = str(random.randrange(1_000_000_000)) 

143 collection = "test_" + uniqueId 

144 return Butler(butler=repo, run=collection) 

145 

146 

147def _makeRecords(dataIds: Mapping[str, Iterable], universe: DimensionUniverse) -> Mapping[str, Iterable]: 

148 """Create cross-linked dimension records from a collection of 

149 data ID values. 

150 

151 Parameters 

152 ---------- 

153 dataIds : `~collections.abc.Mapping` [`str`, `iterable`] 

154 A mapping keyed by the dimensions of interest. Each value is an 

155 iterable of names for that dimension (e.g., detector IDs for 

156 `"detector"`). 

157 universe : lsst.daf.butler.DimensionUniverse 

158 Set of all known dimensions and their relationships. 

159 

160 Returns 

161 ------- 

162 dataIds : `~collections.abc.Mapping` [`str`, `iterable`] 

163 A mapping keyed by the dimensions of interest, giving one 

164 `~lsst.daf.butler.DimensionRecord` for each input name. Related 

165 dimensions (e.g., instruments and detectors) are linked arbitrarily. 

166 """ 

167 expandedIds = {} 

168 # Provide alternate keys like detector names 

169 for name, values in dataIds.items(): 

170 expandedIds[name] = [] 

171 dimension = universe[name] 

172 for value in values: 

173 expandedIds[name].append(_fillAllKeys(dimension, value)) 

174 

175 # Pick cross-relationships arbitrarily 

176 for name, values in expandedIds.items(): 

177 dimension = universe[name] 

178 for value in values: 

179 for other in dimension.required: 

180 if other != dimension: 

181 relation = expandedIds[other.name][0] 

182 value[other.name] = relation[other.primaryKey.name] 

183 # Do not recurse, to keep the user from having to provide 

184 # irrelevant dimensions 

185 for other in dimension.implied: 

186 if other != dimension and other.name in expandedIds and other.viewOf is None: 

187 relation = expandedIds[other.name][0] 

188 value[other.name] = relation[other.primaryKey.name] 

189 

190 return { 

191 dimension: [universe[dimension].RecordClass(**value) for value in values] 

192 for dimension, values in expandedIds.items() 

193 } 

194 

195 

196def _fillAllKeys(dimension: Dimension, value: Any) -> Mapping[str, Any]: 

197 """Create an arbitrary mapping of all required keys for a given dimension 

198 that do not refer to other dimensions. 

199 

200 Parameters 

201 ---------- 

202 dimension : `lsst.daf.butler.Dimension` 

203 The dimension for which to generate a set of keys (e.g., detector). 

204 value 

205 The value assigned to ``dimension`` (e.g., detector ID). 

206 

207 Returns 

208 ------- 

209 expandedValue : `dict` [`str`] 

210 A mapping of dimension keys to values. ``dimension's`` primary key 

211 maps to ``value``, but all other mappings (e.g., detector name) 

212 are arbitrary. 

213 """ 

214 expandedValue = {} 

215 for key in dimension.uniqueKeys: 

216 if key.nbytes: 

217 castType = bytes 

218 else: 

219 castType = key.dtype().python_type 

220 try: 

221 castValue = castType(value) 

222 except TypeError: 

223 castValue = castType() 

224 expandedValue[key.name] = castValue 

225 for key in dimension.metadata: 

226 if not key.nullable: 

227 expandedValue[key.name] = key.dtype().python_type(value) 

228 return expandedValue 

229 

230 

231def _matchAnyDataId(record: Mapping[str, Any], registry: Registry, dimension: Dimension): 

232 """Matches a partial dimension record to an existing record along a 

233 specific dimension. 

234 

235 Parameters 

236 ---------- 

237 record : `dict` [`str`] 

238 A mapping representing the record to be matched. 

239 registry : `lsst.daf.butler.Registry` 

240 The registry with all known dimension records. 

241 dimension : `lsst.daf.butler.Dimension` 

242 The dimension on which to find a match for ``record``. 

243 

244 Raises 

245 ------ 

246 RuntimeError 

247 Raised if there are no existing records for ``dimension``. 

248 """ 

249 matches = list(registry.queryDimensionRecords(dimension.name)) 

250 if matches: 

251 record[dimension.name] = matches[0].dataId[dimension.name] 

252 else: 

253 raise RuntimeError(f"No matching values for {dimension.name} found.") 

254 

255 

256def _fillRelationships( 

257 dimension: Dimension, dimensionInfo: Mapping[str, Any], existing: Registry 

258) -> Mapping[str, Any]: 

259 """Create arbitrary mappings from one dimension to all dimensions it 

260 depends on. 

261 

262 Parameters 

263 ---------- 

264 dimension : `lsst.daf.butler.Dimension` 

265 The dimension for which to generate relationships. 

266 dimensionInfo : `dict` [`str`] 

267 A mapping of dimension keys to values. 

268 existing : `lsst.daf.butler.Registry` 

269 The registry with all previously registered dimensions. 

270 

271 Returns 

272 ------- 

273 filledInfo : `dict` [`str`] 

274 A version of ``dimensionInfo`` with extra mappings for any 

275 relationships required by ``dimension``. Any relationships already 

276 defined in ``dimensionInfo`` are preserved. 

277 

278 Raises 

279 ------ 

280 ValueError 

281 Raised if ``dimension`` depends on a dimension for which no values 

282 exist yet. 

283 """ 

284 filledInfo = dimensionInfo.copy() 

285 for other in dimension.required: 

286 if other != dimension and other.name not in filledInfo: 

287 _matchAnyDataId(filledInfo, existing, other) 

288 # Do not recurse, to keep the user from having to provide 

289 # irrelevant dimensions. 

290 for other in dimension.implied: 

291 toUpdate = other != dimension and other.name not in filledInfo 

292 updatable = other.viewOf is None 

293 # Do not run query if either toUpdate or updatable is false 

294 if toUpdate and updatable and list(existing.queryDimensionRecords(other)): 

295 _matchAnyDataId(filledInfo, existing, other) 

296 return filledInfo 

297 

298 

299def expandUniqueId(butler: Butler, partialId: Mapping[str, Any]) -> DataCoordinate: 

300 """Return a complete data ID matching some criterion. 

301 

302 Parameters 

303 ---------- 

304 butler : `lsst.daf.butler.Butler` 

305 The repository to query. 

306 partialId : `~collections.abc.Mapping` [`str`] 

307 A mapping of known dimensions and values. 

308 

309 Returns 

310 ------- 

311 dataId : `lsst.daf.butler.DataCoordinate` 

312 The unique data ID that matches ``partialId``. 

313 

314 Raises 

315 ------ 

316 ValueError 

317 Raised if ``partialId`` does not uniquely identify a data ID. 

318 

319 Notes 

320 ----- 

321 This method will only work correctly if all dimensions attached to the 

322 target dimension (eg., "physical_filter" for "visit") are known to the 

323 repository, even if they're not needed to identify a dataset. This function 

324 is only suitable for certain kinds of test repositories, and not for 

325 repositories intended for real data processing or analysis. 

326 

327 Examples 

328 -------- 

329 .. code-block:: py 

330 

331 >>> butler = makeTestRepo( 

332 "testdir", {"instrument": ["notACam"], "detector": [1]}) 

333 >>> expandUniqueId(butler, {"detector": 1}) 

334 DataCoordinate({instrument, detector}, ('notACam', 1)) 

335 """ 

336 # The example is *not* a doctest because it requires dangerous I/O 

337 registry = butler.registry 

338 dimensions = registry.dimensions.extract(partialId.keys()).required 

339 

340 query = " AND ".join(f"{dimension} = {value!r}" for dimension, value in partialId.items()) 

341 

342 # Much of the purpose of this function is to do something we explicitly 

343 # reject most of the time: query for a governor dimension (e.g. instrument) 

344 # given something that depends on it (e.g. visit), hence check=False. 

345 dataId = list(registry.queryDataIds(dimensions, where=query, check=False)) 

346 if len(dataId) == 1: 

347 return dataId[0] 

348 else: 

349 raise ValueError(f"Found {len(dataId)} matches for {partialId}, expected 1.") 

350 

351 

352def addDataIdValue(butler: Butler, dimension: str, value: Any, **related: Any): 

353 """Add a new data ID to a repository. 

354 

355 Related dimensions (e.g., the instrument associated with a detector) may 

356 be specified using ``related``. While these keywords are sometimes needed 

357 to get self-consistent repositories, you do not need to define 

358 relationships you do not use. Any unspecified dimensions will be 

359 linked arbitrarily. 

360 

361 Parameters 

362 ---------- 

363 butler : `lsst.daf.butler.Butler` 

364 The repository to update. 

365 dimension : `str` 

366 The name of the dimension to gain a new value. 

367 value 

368 The value to register for the dimension. 

369 **related 

370 Any existing dimensions to be linked to ``value``. 

371 

372 Notes 

373 ----- 

374 Because this function creates filler data, it is only suitable for test 

375 repositories. It should not be used for repositories intended for real data 

376 processing or analysis, which have known dimension values. 

377 

378 Examples 

379 -------- 

380 

381 See the guide on :ref:`using-butler-in-tests-make-repo` for usage examples. 

382 """ 

383 # Example is not doctest, because it's probably unsafe to create even an 

384 # in-memory butler in that environment. 

385 try: 

386 fullDimension = butler.registry.dimensions[dimension] 

387 except KeyError as e: 

388 raise ValueError from e 

389 # Bad keys ignored by registry code 

390 extraKeys = related.keys() - (fullDimension.required | fullDimension.implied) 

391 if extraKeys: 

392 raise ValueError( 

393 f"Unexpected keywords {extraKeys} not found " 

394 f"in {fullDimension.required | fullDimension.implied}" 

395 ) 

396 

397 # Define secondary keys (e.g., detector name given detector id) 

398 expandedValue = _fillAllKeys(fullDimension, value) 

399 expandedValue.update(**related) 

400 completeValue = _fillRelationships(fullDimension, expandedValue, butler.registry) 

401 

402 dimensionRecord = fullDimension.RecordClass(**completeValue) 

403 try: 

404 butler.registry.syncDimensionData(dimension, dimensionRecord) 

405 except sqlalchemy.exc.IntegrityError as e: 

406 raise RuntimeError( 

407 "Could not create data ID value. Automatic relationship generation " 

408 "may have failed; try adding keywords to assign a specific instrument, " 

409 "physical_filter, etc. based on the nested exception message." 

410 ) from e 

411 

412 

413def addDatasetType(butler: Butler, name: str, dimensions: Set[str], storageClass: str) -> DatasetType: 

414 """Add a new dataset type to a repository. 

415 

416 Parameters 

417 ---------- 

418 butler : `lsst.daf.butler.Butler` 

419 The repository to update. 

420 name : `str` 

421 The name of the dataset type. 

422 dimensions : `set` [`str`] 

423 The dimensions of the new dataset type. 

424 storageClass : `str` 

425 The storage class the dataset will use. 

426 

427 Returns 

428 ------- 

429 datasetType : `lsst.daf.butler.DatasetType` 

430 The new type. 

431 

432 Raises 

433 ------ 

434 ValueError 

435 Raised if the dimensions or storage class is invalid. 

436 

437 Notes 

438 ----- 

439 Dataset types are shared across all collections in a repository, so this 

440 function does not need to be run for each collection. 

441 """ 

442 try: 

443 datasetType = DatasetType(name, dimensions, storageClass, universe=butler.registry.dimensions) 

444 butler.registry.registerDatasetType(datasetType) 

445 return datasetType 

446 except KeyError as e: 

447 raise ValueError from e 

448 

449 

450class DatastoreMock: 

451 """Mocks a butler datastore. 

452 

453 Has functions that mock the datastore in a butler. Provides an `apply` 

454 function to replace the relevent butler datastore functions with the mock 

455 functions. 

456 """ 

457 

458 @staticmethod 

459 def apply(butler): 

460 """Apply datastore mocks to a butler.""" 

461 butler.datastore.export = DatastoreMock._mock_export 

462 butler.datastore.get = DatastoreMock._mock_get 

463 butler.datastore.ingest = MagicMock() 

464 

465 @staticmethod 

466 def _mock_export( 

467 refs: Iterable[DatasetRef], *, directory: Optional[str] = None, transfer: Optional[str] = None 

468 ) -> Iterable[FileDataset]: 

469 """A mock of `Datastore.export` that satisfies the requirement that 

470 the refs passed in are included in the `FileDataset` objects 

471 returned. 

472 

473 This can be used to construct a `Datastore` mock that can be used 

474 in repository export via:: 

475 

476 datastore = unittest.mock.Mock(spec=Datastore) 

477 datastore.export = DatastoreMock._mock_export 

478 

479 """ 

480 for ref in refs: 

481 yield FileDataset( 

482 refs=[ref], path="mock/path", formatter="lsst.daf.butler.formatters.json.JsonFormatter" 

483 ) 

484 

485 @staticmethod 

486 def _mock_get( 

487 ref: DatasetRef, parameters: Optional[Mapping[str, Any]] = None 

488 ) -> Tuple[int, Optional[Mapping[str, Any]]]: 

489 """A mock of `Datastore.get` that just returns the integer dataset ID 

490 value and parameters it was given. 

491 """ 

492 return (ref.id, parameters)