Coverage for python/lsst/pipe/base/tests/mocks/_storage_class.py: 39%

198 statements  

« prev     ^ index     » next       coverage.py v7.5.0, created at 2024-05-02 03:31 -0700

1# This file is part of pipe_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30__all__ = ( 

31 "MockDataset", 

32 "MockStorageClass", 

33 "MockDatasetQuantum", 

34 "MockStorageClassDelegate", 

35 "get_mock_name", 

36 "get_original_name", 

37 "is_mock_name", 

38) 

39 

40import sys 

41import uuid 

42from collections.abc import Callable, Iterable, Mapping 

43from typing import Any, cast 

44 

45import pydantic 

46from lsst.daf.butler import ( 

47 DataIdValue, 

48 DatasetComponent, 

49 DatasetRef, 

50 DatasetType, 

51 Formatter, 

52 FormatterFactory, 

53 LookupKey, 

54 SerializedDatasetType, 

55 StorageClass, 

56 StorageClassDelegate, 

57 StorageClassFactory, 

58) 

59from lsst.daf.butler.formatters.json import JsonFormatter 

60from lsst.utils.introspection import get_full_type_name 

61 

62_NAME_PREFIX: str = "_mock_" 

63 

64 

65def get_mock_name(original: str) -> str: 

66 """Return the name of the mock storage class, dataset type, or task label 

67 for the given original name. 

68 

69 Parameters 

70 ---------- 

71 original : `str` 

72 Original name. 

73 

74 Returns 

75 ------- 

76 name : `str` 

77 The name of the mocked version. 

78 """ 

79 return _NAME_PREFIX + original 

80 

81 

82def get_original_name(mock: str) -> str: 

83 """Return the name of the original storage class, dataset type, or task 

84 label that corresponds to the given mock name. 

85 

86 Parameters 

87 ---------- 

88 mock : `str` 

89 The mocked name. 

90 

91 Returns 

92 ------- 

93 original : `str` 

94 The original name. 

95 """ 

96 assert mock.startswith(_NAME_PREFIX) 

97 return mock.removeprefix(_NAME_PREFIX) 

98 

99 

100def is_mock_name(name: str) -> bool: 

101 """Return whether the given name is that of a mock storage class, dataset 

102 type, or task label. 

103 

104 Parameters 

105 ---------- 

106 name : `str` 

107 The given name to check. 

108 

109 Returns 

110 ------- 

111 is_mock : `bool` 

112 Whether the name is for a mock or not. 

113 """ 

114 return name.startswith(_NAME_PREFIX) 

115 

116 

117# Tests for this module are in the ci_middleware package, where we have easy 

118# access to complex real storage classes (and their pytypes) to test against. 

119 

120 

121class MockDataset(pydantic.BaseModel): 

122 """The in-memory dataset type used by `MockStorageClass`.""" 

123 

124 dataset_id: uuid.UUID | None 

125 """Universal unique identifier for this dataset.""" 

126 

127 dataset_type: SerializedDatasetType 

128 """Butler dataset type or this dataset. 

129 

130 See the documentation for ``data_id`` for why this is a 

131 `~lsst.daf.butler.SerializedDatasetType` instead of a "real" one. 

132 """ 

133 

134 data_id: dict[str, DataIdValue] 

135 """Butler data ID for this dataset. 

136 

137 This is a `~lsst.daf.butler.SerializedDataCoordinate` instead of a "real" 

138 one for two reasons: 

139 

140 - the mock dataset may need to be read from disk in a context in which a 

141 `~lsst.daf.butler.DimensionUniverse` is unavailable; 

142 - we don't want the complexity of having a separate 

143 ``SerializedMockDataCoordinate``. 

144 """ 

145 

146 run: str | None 

147 """`~lsst.daf.butler.CollectionType.RUN` collection this dataset belongs 

148 to. 

149 """ 

150 

151 quantum: MockDatasetQuantum | None = None 

152 """Description of the quantum that produced this dataset. 

153 """ 

154 

155 output_connection_name: str | None = None 

156 """The name of the PipelineTask output connection that produced this 

157 dataset. 

158 """ 

159 

160 converted_from: MockDataset | None = None 

161 """Another `MockDataset` that underwent a storage class conversion to 

162 produce this one. 

163 """ 

164 

165 parent: MockDataset | None = None 

166 """Another `MockDataset` from which a component was extract to form this 

167 one. 

168 """ 

169 

170 parameters: dict[str, str] | None = None 

171 """`repr` of all parameters applied when reading this dataset.""" 

172 

173 @property 

174 def storage_class(self) -> str: 

175 return cast(str, self.dataset_type.storageClass) 

176 

177 def make_derived(self, **kwargs: Any) -> MockDataset: 

178 """Return a new MockDataset that represents applying some storage class 

179 operation to this one. 

180 

181 Parameters 

182 ---------- 

183 **kwargs : `~typing.Any` 

184 Keyword arguments are fields of `MockDataset` or 

185 `~lsst.daf.butler.SerializedDatasetType` to override in the result. 

186 

187 Returns 

188 ------- 

189 derived : `MockDataset` 

190 The newly-mocked dataset. 

191 """ 

192 dataset_type_updates = { 

193 k: kwargs.pop(k) for k in list(kwargs) if k in SerializedDatasetType.model_fields 

194 } 

195 kwargs.setdefault("dataset_type", self.dataset_type.model_copy(update=dataset_type_updates)) 

196 # Fields below are those that should not be propagated to the derived 

197 # dataset, because they're not about the intrinsic on-disk thing. 

198 kwargs.setdefault("converted_from", None) 

199 kwargs.setdefault("parent", None) 

200 kwargs.setdefault("parameters", None) 

201 # Also use setdefault on the ref in case caller wants to override that 

202 # directly, but this is expected to be rare enough that it's not worth 

203 # it to try to optimize out the work above to make derived_ref. 

204 return self.model_copy(update=kwargs) 

205 

206 # Work around the fact that Sphinx chokes on Pydantic docstring formatting, 

207 # when we inherit those docstrings in our public classes. 

208 if "sphinx" in sys.modules: 208 ↛ 210line 208 didn't jump to line 210, because the condition on line 208 was never true

209 

210 def copy(self, *args: Any, **kwargs: Any) -> Any: 

211 """See `pydantic.BaseModel.copy`.""" 

212 return super().copy(*args, **kwargs) 

213 

214 def model_dump(self, *args: Any, **kwargs: Any) -> Any: 

215 """See `pydantic.BaseModel.model_dump`.""" 

216 return super().model_dump(*args, **kwargs) 

217 

218 def model_dump_json(self, *args: Any, **kwargs: Any) -> Any: 

219 """See `pydantic.BaseModel.model_dump_json`.""" 

220 return super().model_dump(*args, **kwargs) 

221 

222 def model_copy(self, *args: Any, **kwargs: Any) -> Any: 

223 """See `pydantic.BaseModel.model_copy`.""" 

224 return super().model_copy(*args, **kwargs) 

225 

226 @classmethod 

227 def model_json_schema(cls, *args: Any, **kwargs: Any) -> Any: 

228 """See `pydantic.BaseModel.model_json_schema`.""" 

229 return super().model_json_schema(*args, **kwargs) 

230 

231 

232class MockDatasetQuantum(pydantic.BaseModel): 

233 """Description of the quantum that produced a mock dataset. 

234 

235 This is also used to represent task-init operations for init-output mock 

236 datasets. 

237 """ 

238 

239 task_label: str 

240 """Label of the producing PipelineTask in its pipeline.""" 

241 

242 data_id: dict[str, DataIdValue] 

243 """Data ID for the quantum.""" 

244 

245 inputs: dict[str, list[MockDataset]] 

246 """Mock datasets provided as input to the quantum. 

247 

248 Keys are task-internal connection names, not dataset type names. 

249 """ 

250 

251 # Work around the fact that Sphinx chokes on Pydantic docstring formatting, 

252 # when we inherit those docstrings in our public classes. 

253 if "sphinx" in sys.modules: 253 ↛ 255line 253 didn't jump to line 255, because the condition on line 253 was never true

254 

255 def copy(self, *args: Any, **kwargs: Any) -> Any: 

256 """See `pydantic.BaseModel.copy`.""" 

257 return super().copy(*args, **kwargs) 

258 

259 def model_dump(self, *args: Any, **kwargs: Any) -> Any: 

260 """See `pydantic.BaseModel.model_dump`.""" 

261 return super().model_dump(*args, **kwargs) 

262 

263 def model_dump_json(self, *args: Any, **kwargs: Any) -> Any: 

264 """See `pydantic.BaseModel.model_dump_json`.""" 

265 return super().model_dump(*args, **kwargs) 

266 

267 def model_copy(self, *args: Any, **kwargs: Any) -> Any: 

268 """See `pydantic.BaseModel.model_copy`.""" 

269 return super().model_copy(*args, **kwargs) 

270 

271 @classmethod 

272 def model_json_schema(cls, *args: Any, **kwargs: Any) -> Any: 

273 """See `pydantic.BaseModel.model_json_schema`.""" 

274 return super().model_json_schema(*args, **kwargs) 

275 

276 

277MockDataset.model_rebuild() 

278 

279 

280class MockStorageClassDelegate(StorageClassDelegate): 

281 """Implementation of the StorageClassDelegate interface for mock datasets. 

282 

283 This class does not implement assembly and disassembly just because it's 

284 not needed right now. That could be added in the future with some 

285 additional tracking attributes in `MockDataset`. 

286 """ 

287 

288 def assemble(self, components: dict[str, Any], pytype: type | None = None) -> MockDataset: 

289 # Docstring inherited. 

290 raise NotImplementedError("Mock storage classes do not implement assembly.") 

291 

292 def getComponent(self, composite: Any, componentName: str) -> Any: 

293 # Docstring inherited. 

294 assert isinstance( 

295 composite, MockDataset 

296 ), f"MockStorageClassDelegate given a non-mock dataset {composite!r}." 

297 return composite.make_derived( 

298 name=f"{composite.dataset_type.name}.{componentName}", 

299 storageClass=self.storageClass.allComponents()[componentName].name, 

300 parentStorageClass=self.storageClass.name, 

301 parent=composite, 

302 ) 

303 

304 def disassemble( 

305 self, composite: Any, subset: Iterable | None = None, override: Any | None = None 

306 ) -> dict[str, DatasetComponent]: 

307 # Docstring inherited. 

308 raise NotImplementedError("Mock storage classes do not implement disassembly.") 

309 

310 def handleParameters(self, inMemoryDataset: Any, parameters: Mapping[str, Any] | None = None) -> Any: 

311 # Docstring inherited. 

312 assert isinstance( 

313 inMemoryDataset, MockDataset 

314 ), f"MockStorageClassDelegate given a non-mock dataset {inMemoryDataset!r}." 

315 if not parameters: 

316 return inMemoryDataset 

317 return inMemoryDataset.make_derived(parameters={k: repr(v) for k, v in parameters.items()}) 

318 

319 

320class MockStorageClass(StorageClass): 

321 """A reimplementation of `lsst.daf.butler.StorageClass` for mock datasets. 

322 

323 Parameters 

324 ---------- 

325 original : `~lsst.daf.butler.StorageClass` 

326 The original storage class. 

327 factory : `~lsst.daf.butler.StorageClassFactory` or `None`, optional 

328 Storage class factory to use. If `None` the default factory is used. 

329 

330 Notes 

331 ----- 

332 Each `MockStorageClass` instance corresponds to a real "original" storage 

333 class, with components and conversions that are mocks of the original's 

334 components and conversions. The ``pytype`` for all `MockStorageClass` 

335 instances is `MockDataset`. 

336 """ 

337 

338 def __init__(self, original: StorageClass, factory: StorageClassFactory | None = None): 

339 name = get_mock_name(original.name) 

340 if factory is None: 

341 factory = StorageClassFactory() 

342 super().__init__( 

343 name=name, 

344 pytype=MockDataset, 

345 components={ 

346 k: self.get_or_register_mock(v.name, factory) for k, v in original.components.items() 

347 }, 

348 derivedComponents={ 

349 k: self.get_or_register_mock(v.name, factory) for k, v in original.derivedComponents.items() 

350 }, 

351 parameters=frozenset(original.parameters), 

352 delegate=get_full_type_name(MockStorageClassDelegate), 

353 # Conversions work differently for mock storage classes, since they 

354 # all have the same pytype: we use the original storage class being 

355 # mocked to see if we can convert, then just make a new MockDataset 

356 # that points back to the original. 

357 converters={}, 

358 ) 

359 self.original = original 

360 # Make certain no one tries to use the converters. 

361 self._converters = None # type: ignore 

362 

363 def _get_converters_by_type(self) -> dict[type, Callable[[Any], Any]]: 

364 # Docstring inherited. 

365 raise NotImplementedError("MockStorageClass does not use converters.") 

366 

367 @classmethod 

368 def get_or_register_mock( 

369 cls, original: str, factory: StorageClassFactory | None = None 

370 ) -> MockStorageClass: 

371 """Return a mock storage class for the given original storage class, 

372 creating and registering it if necessary. 

373 

374 Parameters 

375 ---------- 

376 original : `str` 

377 Name of the original storage class to be mocked. 

378 factory : `~lsst.daf.butler.StorageClassFactory`, optional 

379 Storage class factory singleton instance. 

380 

381 Returns 

382 ------- 

383 mock : `MockStorageClass` 

384 New storage class that mocks ``original``. 

385 """ 

386 name = get_mock_name(original) 

387 if factory is None: 

388 factory = StorageClassFactory() 

389 if name in factory: 

390 return cast(MockStorageClass, factory.getStorageClass(name)) 

391 else: 

392 result = cls(factory.getStorageClass(original), factory) 

393 factory.registerStorageClass(result) 

394 return result 

395 

396 def allComponents(self) -> Mapping[str, MockStorageClass]: 

397 # Docstring inherited. 

398 return cast(Mapping[str, MockStorageClass], super().allComponents()) 

399 

400 @property 

401 def components(self) -> Mapping[str, MockStorageClass]: 

402 # Docstring inherited. 

403 return cast(Mapping[str, MockStorageClass], super().components) 

404 

405 @property 

406 def derivedComponents(self) -> Mapping[str, MockStorageClass]: 

407 # Docstring inherited. 

408 return cast(Mapping[str, MockStorageClass], super().derivedComponents) 

409 

410 def can_convert(self, other: StorageClass) -> bool: 

411 # Docstring inherited. 

412 if not isinstance(other, MockStorageClass): 

413 return False 

414 return self.original.can_convert(other.original) 

415 

416 def coerce_type(self, incorrect: Any) -> Any: 

417 # Docstring inherited. 

418 if not isinstance(incorrect, MockDataset): 

419 raise TypeError( 

420 f"Mock storage class {self.name!r} can only convert in-memory datasets " 

421 f"corresponding to other mock storage classes, not {incorrect!r}." 

422 ) 

423 factory = StorageClassFactory() 

424 other_storage_class = factory.getStorageClass(incorrect.storage_class) 

425 assert isinstance(other_storage_class, MockStorageClass), "Should not get a MockDataset otherwise." 

426 if other_storage_class.name == self.name: 

427 return incorrect 

428 if not self.can_convert(other_storage_class): 

429 raise TypeError( 

430 f"Mocked storage class {self.original.name!r} cannot convert from " 

431 f"{other_storage_class.original.name!r}." 

432 ) 

433 return incorrect.make_derived(storageClass=self.name, converted_from=incorrect) 

434 

435 @staticmethod 

436 def mock_dataset_type(original_type: DatasetType) -> DatasetType: 

437 """Replace a dataset type with a version that uses a mock storage class 

438 and name. 

439 

440 Parameters 

441 ---------- 

442 original_type : `lsst.daf.butler.DatasetType` 

443 Original dataset type to be mocked. 

444 

445 Returns 

446 ------- 

447 mock_type : `lsst.daf.butler.DatasetType` 

448 A mock version of the dataset type, with name and storage class 

449 changed and everything else unchanged. 

450 """ 

451 mock_storage_class = MockStorageClass.get_or_register_mock(original_type.storageClass_name) 

452 mock_parent_storage_class = None 

453 if original_type.parentStorageClass is not None: 

454 mock_parent_storage_class = MockStorageClass.get_or_register_mock( 

455 original_type.parentStorageClass.name 

456 ) 

457 return DatasetType( 

458 get_mock_name(original_type.name), 

459 original_type.dimensions, 

460 mock_storage_class, 

461 isCalibration=original_type.isCalibration(), 

462 parentStorageClass=mock_parent_storage_class, 

463 ) 

464 

465 @staticmethod 

466 def mock_dataset_refs(original_refs: Iterable[DatasetRef]) -> list[DatasetRef]: 

467 """Replace dataset references with versions that uses a mock storage 

468 class and dataset type name. 

469 

470 Parameters 

471 ---------- 

472 original_refs : `~collections.abc.Iterable` [ \ 

473 `lsst.daf.butler.DatasetRef` ] 

474 Original dataset references to be mocked. 

475 

476 Returns 

477 ------- 

478 mock_refs : `list` [ `lsst.daf.butler.DatasetRef` ] 

479 Mocked version of the dataset references, with dataset type name 

480 and storage class changed and everything else unchanged. 

481 """ 

482 original_refs = list(original_refs) 

483 if not original_refs: 

484 return original_refs 

485 dataset_type = MockStorageClass.mock_dataset_type(original_refs[0].datasetType) 

486 return [ 

487 DatasetRef(dataset_type, original_ref.dataId, run=original_ref.run, id=original_ref.id) 

488 for original_ref in original_refs 

489 ] 

490 

491 @staticmethod 

492 def unmock_dataset_type(mock_type: DatasetType) -> DatasetType: 

493 """Replace a mock dataset type with the original one it was created 

494 from. 

495 

496 Parameters 

497 ---------- 

498 mock_type : `lsst.daf.butler.DatasetType` 

499 A dataset type with a mocked name and storage class. 

500 

501 Returns 

502 ------- 

503 original_type : `lsst.daf.butler.DatasetType` 

504 The original dataset type. 

505 """ 

506 storage_class = mock_type.storageClass 

507 parent_storage_class = mock_type.parentStorageClass 

508 if isinstance(storage_class, MockStorageClass): 

509 storage_class = storage_class.original 

510 if parent_storage_class is not None and isinstance(parent_storage_class, MockStorageClass): 

511 parent_storage_class = parent_storage_class.original 

512 return DatasetType( 

513 get_original_name(mock_type.name), 

514 mock_type.dimensions, 

515 storage_class, 

516 isCalibration=mock_type.isCalibration(), 

517 parentStorageClass=parent_storage_class, 

518 ) 

519 

520 @staticmethod 

521 def unmock_dataset_refs(mock_refs: Iterable[DatasetRef]) -> list[DatasetRef]: 

522 """Replace dataset references with versions that do not use a mock 

523 storage class and dataset type name. 

524 

525 Parameters 

526 ---------- 

527 mock_refs : `~collections.abc.Iterable` [ \ 

528 `lsst.daf.butler.DatasetRef` ] 

529 Dataset references that use a mocked dataset type name and storage 

530 class. 

531 

532 Returns 

533 ------- 

534 original_refs : `list` [ `lsst.daf.butler.DatasetRef` ] 

535 The original dataset references. 

536 """ 

537 mock_refs = list(mock_refs) 

538 if not mock_refs: 

539 return mock_refs 

540 dataset_type = MockStorageClass.unmock_dataset_type(mock_refs[0].datasetType) 

541 return [ 

542 DatasetRef(dataset_type, mock_ref.dataId, run=mock_ref.run, id=mock_ref.id) 

543 for mock_ref in mock_refs 

544 ] 

545 

546 

547def _monkeypatch_daf_butler() -> None: 

548 """Replace methods in daf_butler's StorageClassFactory and FormatterFactory 

549 classes to automatically recognize mock storage classes. 

550 

551 This monkey-patching is executed when the `lsst.pipe.base.tests.mocks` 

552 package is imported, and it affects all butler instances created before or 

553 after that imported. 

554 """ 

555 original_get_storage_class = StorageClassFactory.getStorageClass 

556 

557 def new_get_storage_class(self: StorageClassFactory, storageClassName: str) -> StorageClass: 

558 try: 

559 return original_get_storage_class(self, storageClassName) 

560 except KeyError: 

561 if is_mock_name(storageClassName): 

562 return MockStorageClass.get_or_register_mock(get_original_name(storageClassName)) 

563 raise 

564 

565 StorageClassFactory.getStorageClass = new_get_storage_class # type: ignore 

566 

567 del new_get_storage_class 

568 

569 original_get_formatter_class_with_match = FormatterFactory.getFormatterClassWithMatch 

570 

571 def new_get_formatter_class_with_match( 

572 self: FormatterFactory, entity: Any 

573 ) -> tuple[LookupKey, type[Formatter], dict[str, Any]]: 

574 try: 

575 return original_get_formatter_class_with_match(self, entity) 

576 except KeyError: 

577 lookup_keys = (LookupKey(name=entity),) if isinstance(entity, str) else entity._lookupNames() 

578 for key in lookup_keys: 

579 # This matches mock dataset type names before mock storage 

580 # classes, and it would even match some regular dataset types 

581 # that are automatic connections (logs, configs, metadata) of 

582 # mocked tasks. The latter would be a problem, except that 

583 # those should have already matched in the try block above. 

584 if is_mock_name(key.name): 

585 return (key, JsonFormatter, {}) 

586 raise 

587 

588 FormatterFactory.getFormatterClassWithMatch = new_get_formatter_class_with_match # type: ignore 

589 

590 del new_get_formatter_class_with_match 

591 

592 original_get_formatter_with_match = FormatterFactory.getFormatterWithMatch 

593 

594 def new_get_formatter_with_match( 

595 self: FormatterFactory, entity: Any, *args: Any, **kwargs: Any 

596 ) -> tuple[LookupKey, Formatter]: 

597 try: 

598 return original_get_formatter_with_match(self, entity, *args, **kwargs) 

599 except KeyError: 

600 lookup_keys = (LookupKey(name=entity),) if isinstance(entity, str) else entity._lookupNames() 

601 for key in lookup_keys: 

602 if is_mock_name(key.name): 

603 return (key, JsonFormatter(*args, **kwargs)) 

604 raise 

605 

606 FormatterFactory.getFormatterWithMatch = new_get_formatter_with_match # type: ignore 

607 

608 del new_get_formatter_with_match 

609 

610 

611_monkeypatch_daf_butler()