Coverage for python/lsst/pipe/base/tests/mocks/_storage_class.py: 40%

194 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-03-30 10:01 +0000

1# This file is part of pipe_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30__all__ = ( 

31 "MockDataset", 

32 "MockStorageClass", 

33 "MockDatasetQuantum", 

34 "MockStorageClassDelegate", 

35 "get_mock_name", 

36 "get_original_name", 

37 "is_mock_name", 

38) 

39 

40import sys 

41import uuid 

42from collections.abc import Callable, Iterable, Mapping 

43from typing import Any, cast 

44 

45import pydantic 

46from lsst.daf.butler import ( 

47 DataIdValue, 

48 DatasetComponent, 

49 DatasetRef, 

50 DatasetType, 

51 Formatter, 

52 FormatterFactory, 

53 LookupKey, 

54 SerializedDatasetType, 

55 StorageClass, 

56 StorageClassDelegate, 

57 StorageClassFactory, 

58) 

59from lsst.daf.butler.formatters.json import JsonFormatter 

60from lsst.utils.introspection import get_full_type_name 

61 

62_NAME_PREFIX: str = "_mock_" 

63 

64 

65def get_mock_name(original: str) -> str: 

66 """Return the name of the mock storage class, dataset type, or task label 

67 for the given original name. 

68 

69 Parameters 

70 ---------- 

71 original : `str` 

72 Original name. 

73 

74 Returns 

75 ------- 

76 name : `str` 

77 The name of the mocked version. 

78 """ 

79 return _NAME_PREFIX + original 

80 

81 

82def get_original_name(mock: str) -> str: 

83 """Return the name of the original storage class, dataset type, or task 

84 label that corresponds to the given mock name. 

85 

86 Parameters 

87 ---------- 

88 mock : `str` 

89 The mocked name. 

90 

91 Returns 

92 ------- 

93 original : `str` 

94 The original name. 

95 """ 

96 assert mock.startswith(_NAME_PREFIX) 

97 return mock.removeprefix(_NAME_PREFIX) 

98 

99 

100def is_mock_name(name: str) -> bool: 

101 """Return whether the given name is that of a mock storage class, dataset 

102 type, or task label. 

103 

104 Parameters 

105 ---------- 

106 name : `str` 

107 The given name to check. 

108 

109 Returns 

110 ------- 

111 is_mock : `bool` 

112 Whether the name is for a mock or not. 

113 """ 

114 return name.startswith(_NAME_PREFIX) 

115 

116 

117# Tests for this module are in the ci_middleware package, where we have easy 

118# access to complex real storage classes (and their pytypes) to test against. 

119 

120 

121class MockDataset(pydantic.BaseModel): 

122 """The in-memory dataset type used by `MockStorageClass`.""" 

123 

124 dataset_id: uuid.UUID | None 

125 """Universal unique identifier for this dataset.""" 

126 

127 dataset_type: SerializedDatasetType 

128 """Butler dataset type or this dataset. 

129 

130 See the documentation for ``data_id`` for why this is a 

131 `~lsst.daf.butler.SerializedDatasetType` instead of a "real" one. 

132 """ 

133 

134 data_id: dict[str, DataIdValue] 

135 """Butler data ID for this dataset. 

136 

137 This is a `~lsst.daf.butler.SerializedDataCoordinate` instead of a "real" 

138 one for two reasons: 

139 

140 - the mock dataset may need to be read from disk in a context in which a 

141 `~lsst.daf.butler.DimensionUniverse` is unavailable; 

142 - we don't want the complexity of having a separate 

143 ``SerializedMockDataCoordinate``. 

144 """ 

145 

146 run: str | None 

147 """`~lsst.daf.butler.CollectionType.RUN` collection this dataset belongs 

148 to. 

149 """ 

150 

151 quantum: MockDatasetQuantum | None = None 

152 """Description of the quantum that produced this dataset. 

153 """ 

154 

155 output_connection_name: str | None = None 

156 """The name of the PipelineTask output connection that produced this 

157 dataset. 

158 """ 

159 

160 converted_from: MockDataset | None = None 

161 """Another `MockDataset` that underwent a storage class conversion to 

162 produce this one. 

163 """ 

164 

165 parent: MockDataset | None = None 

166 """Another `MockDataset` from which a component was extract to form this 

167 one. 

168 """ 

169 

170 parameters: dict[str, str] | None = None 

171 """`repr` of all parameters applied when reading this dataset.""" 

172 

173 @property 

174 def storage_class(self) -> str: 

175 return cast(str, self.dataset_type.storageClass) 

176 

177 def make_derived(self, **kwargs: Any) -> MockDataset: 

178 """Return a new MockDataset that represents applying some storage class 

179 operation to this one. 

180 

181 Parameters 

182 ---------- 

183 **kwargs : `~typing.Any` 

184 Keyword arguments are fields of `MockDataset` or 

185 `~lsst.daf.butler.SerializedDatasetType` to override in the result. 

186 

187 Returns 

188 ------- 

189 derived : `MockDataset` 

190 The newly-mocked dataset. 

191 """ 

192 dataset_type_updates = { 

193 k: kwargs.pop(k) for k in list(kwargs) if k in SerializedDatasetType.model_fields 

194 } 

195 kwargs.setdefault("dataset_type", self.dataset_type.model_copy(update=dataset_type_updates)) 

196 # Fields below are those that should not be propagated to the derived 

197 # dataset, because they're not about the intrinsic on-disk thing. 

198 kwargs.setdefault("converted_from", None) 

199 kwargs.setdefault("parent", None) 

200 kwargs.setdefault("parameters", None) 

201 # Also use setdefault on the ref in case caller wants to override that 

202 # directly, but this is expected to be rare enough that it's not worth 

203 # it to try to optimize out the work above to make derived_ref. 

204 return self.model_copy(update=kwargs) 

205 

206 # Work around the fact that Sphinx chokes on Pydantic docstring formatting, 

207 # when we inherit those docstrings in our public classes. 

208 if "sphinx" in sys.modules: 208 ↛ 210line 208 didn't jump to line 210, because the condition on line 208 was never true

209 

210 def copy(self, *args: Any, **kwargs: Any) -> Any: 

211 """See `pydantic.BaseModel.copy`.""" 

212 return super().copy(*args, **kwargs) 

213 

214 def model_dump(self, *args: Any, **kwargs: Any) -> Any: 

215 """See `pydantic.BaseModel.model_dump`.""" 

216 return super().model_dump(*args, **kwargs) 

217 

218 def model_copy(self, *args: Any, **kwargs: Any) -> Any: 

219 """See `pydantic.BaseModel.model_copy`.""" 

220 return super().model_copy(*args, **kwargs) 

221 

222 @classmethod 

223 def model_json_schema(cls, *args: Any, **kwargs: Any) -> Any: 

224 """See `pydantic.BaseModel.model_json_schema`.""" 

225 return super().model_json_schema(*args, **kwargs) 

226 

227 

228class MockDatasetQuantum(pydantic.BaseModel): 

229 """Description of the quantum that produced a mock dataset. 

230 

231 This is also used to represent task-init operations for init-output mock 

232 datasets. 

233 """ 

234 

235 task_label: str 

236 """Label of the producing PipelineTask in its pipeline.""" 

237 

238 data_id: dict[str, DataIdValue] 

239 """Data ID for the quantum.""" 

240 

241 inputs: dict[str, list[MockDataset]] 

242 """Mock datasets provided as input to the quantum. 

243 

244 Keys are task-internal connection names, not dataset type names. 

245 """ 

246 

247 # Work around the fact that Sphinx chokes on Pydantic docstring formatting, 

248 # when we inherit those docstrings in our public classes. 

249 if "sphinx" in sys.modules: 249 ↛ 251line 249 didn't jump to line 251, because the condition on line 249 was never true

250 

251 def copy(self, *args: Any, **kwargs: Any) -> Any: 

252 """See `pydantic.BaseModel.copy`.""" 

253 return super().copy(*args, **kwargs) 

254 

255 def model_dump(self, *args: Any, **kwargs: Any) -> Any: 

256 """See `pydantic.BaseModel.model_dump`.""" 

257 return super().model_dump(*args, **kwargs) 

258 

259 def model_copy(self, *args: Any, **kwargs: Any) -> Any: 

260 """See `pydantic.BaseModel.model_copy`.""" 

261 return super().model_copy(*args, **kwargs) 

262 

263 @classmethod 

264 def model_json_schema(cls, *args: Any, **kwargs: Any) -> Any: 

265 """See `pydantic.BaseModel.model_json_schema`.""" 

266 return super().model_json_schema(*args, **kwargs) 

267 

268 

269MockDataset.model_rebuild() 

270 

271 

272class MockStorageClassDelegate(StorageClassDelegate): 

273 """Implementation of the StorageClassDelegate interface for mock datasets. 

274 

275 This class does not implement assembly and disassembly just because it's 

276 not needed right now. That could be added in the future with some 

277 additional tracking attributes in `MockDataset`. 

278 """ 

279 

280 def assemble(self, components: dict[str, Any], pytype: type | None = None) -> MockDataset: 

281 # Docstring inherited. 

282 raise NotImplementedError("Mock storage classes do not implement assembly.") 

283 

284 def getComponent(self, composite: Any, componentName: str) -> Any: 

285 # Docstring inherited. 

286 assert isinstance( 

287 composite, MockDataset 

288 ), f"MockStorageClassDelegate given a non-mock dataset {composite!r}." 

289 return composite.make_derived( 

290 name=f"{composite.dataset_type.name}.{componentName}", 

291 storageClass=self.storageClass.allComponents()[componentName].name, 

292 parentStorageClass=self.storageClass.name, 

293 parent=composite, 

294 ) 

295 

296 def disassemble( 

297 self, composite: Any, subset: Iterable | None = None, override: Any | None = None 

298 ) -> dict[str, DatasetComponent]: 

299 # Docstring inherited. 

300 raise NotImplementedError("Mock storage classes do not implement disassembly.") 

301 

302 def handleParameters(self, inMemoryDataset: Any, parameters: Mapping[str, Any] | None = None) -> Any: 

303 # Docstring inherited. 

304 assert isinstance( 

305 inMemoryDataset, MockDataset 

306 ), f"MockStorageClassDelegate given a non-mock dataset {inMemoryDataset!r}." 

307 if not parameters: 

308 return inMemoryDataset 

309 return inMemoryDataset.make_derived(parameters={k: repr(v) for k, v in parameters.items()}) 

310 

311 

312class MockStorageClass(StorageClass): 

313 """A reimplementation of `lsst.daf.butler.StorageClass` for mock datasets. 

314 

315 Parameters 

316 ---------- 

317 original : `~lsst.daf.butler.StorageClass` 

318 The original storage class. 

319 factory : `~lsst.daf.butler.StorageClassFactory` or `None`, optional 

320 Storage class factory to use. If `None` the default factory is used. 

321 

322 Notes 

323 ----- 

324 Each `MockStorageClass` instance corresponds to a real "original" storage 

325 class, with components and conversions that are mocks of the original's 

326 components and conversions. The ``pytype`` for all `MockStorageClass` 

327 instances is `MockDataset`. 

328 """ 

329 

330 def __init__(self, original: StorageClass, factory: StorageClassFactory | None = None): 

331 name = get_mock_name(original.name) 

332 if factory is None: 

333 factory = StorageClassFactory() 

334 super().__init__( 

335 name=name, 

336 pytype=MockDataset, 

337 components={ 

338 k: self.get_or_register_mock(v.name, factory) for k, v in original.components.items() 

339 }, 

340 derivedComponents={ 

341 k: self.get_or_register_mock(v.name, factory) for k, v in original.derivedComponents.items() 

342 }, 

343 parameters=frozenset(original.parameters), 

344 delegate=get_full_type_name(MockStorageClassDelegate), 

345 # Conversions work differently for mock storage classes, since they 

346 # all have the same pytype: we use the original storage class being 

347 # mocked to see if we can convert, then just make a new MockDataset 

348 # that points back to the original. 

349 converters={}, 

350 ) 

351 self.original = original 

352 # Make certain no one tries to use the converters. 

353 self._converters = None # type: ignore 

354 

355 def _get_converters_by_type(self) -> dict[type, Callable[[Any], Any]]: 

356 # Docstring inherited. 

357 raise NotImplementedError("MockStorageClass does not use converters.") 

358 

359 @classmethod 

360 def get_or_register_mock( 

361 cls, original: str, factory: StorageClassFactory | None = None 

362 ) -> MockStorageClass: 

363 """Return a mock storage class for the given original storage class, 

364 creating and registering it if necessary. 

365 

366 Parameters 

367 ---------- 

368 original : `str` 

369 Name of the original storage class to be mocked. 

370 factory : `~lsst.daf.butler.StorageClassFactory`, optional 

371 Storage class factory singleton instance. 

372 

373 Returns 

374 ------- 

375 mock : `MockStorageClass` 

376 New storage class that mocks ``original``. 

377 """ 

378 name = get_mock_name(original) 

379 if factory is None: 

380 factory = StorageClassFactory() 

381 if name in factory: 

382 return cast(MockStorageClass, factory.getStorageClass(name)) 

383 else: 

384 result = cls(factory.getStorageClass(original), factory) 

385 factory.registerStorageClass(result) 

386 return result 

387 

388 def allComponents(self) -> Mapping[str, MockStorageClass]: 

389 # Docstring inherited. 

390 return cast(Mapping[str, MockStorageClass], super().allComponents()) 

391 

392 @property 

393 def components(self) -> Mapping[str, MockStorageClass]: 

394 # Docstring inherited. 

395 return cast(Mapping[str, MockStorageClass], super().components) 

396 

397 @property 

398 def derivedComponents(self) -> Mapping[str, MockStorageClass]: 

399 # Docstring inherited. 

400 return cast(Mapping[str, MockStorageClass], super().derivedComponents) 

401 

402 def can_convert(self, other: StorageClass) -> bool: 

403 # Docstring inherited. 

404 if not isinstance(other, MockStorageClass): 

405 return False 

406 return self.original.can_convert(other.original) 

407 

408 def coerce_type(self, incorrect: Any) -> Any: 

409 # Docstring inherited. 

410 if not isinstance(incorrect, MockDataset): 

411 raise TypeError( 

412 f"Mock storage class {self.name!r} can only convert in-memory datasets " 

413 f"corresponding to other mock storage classes, not {incorrect!r}." 

414 ) 

415 factory = StorageClassFactory() 

416 other_storage_class = factory.getStorageClass(incorrect.storage_class) 

417 assert isinstance(other_storage_class, MockStorageClass), "Should not get a MockDataset otherwise." 

418 if other_storage_class.name == self.name: 

419 return incorrect 

420 if not self.can_convert(other_storage_class): 

421 raise TypeError( 

422 f"Mocked storage class {self.original.name!r} cannot convert from " 

423 f"{other_storage_class.original.name!r}." 

424 ) 

425 return incorrect.make_derived(storageClass=self.name, converted_from=incorrect) 

426 

427 @staticmethod 

428 def mock_dataset_type(original_type: DatasetType) -> DatasetType: 

429 """Replace a dataset type with a version that uses a mock storage class 

430 and name. 

431 

432 Parameters 

433 ---------- 

434 original_type : `lsst.daf.butler.DatasetType` 

435 Original dataset type to be mocked. 

436 

437 Returns 

438 ------- 

439 mock_type : `lsst.daf.butler.DatasetType` 

440 A mock version of the dataset type, with name and storage class 

441 changed and everything else unchanged. 

442 """ 

443 mock_storage_class = MockStorageClass.get_or_register_mock(original_type.storageClass_name) 

444 mock_parent_storage_class = None 

445 if original_type.parentStorageClass is not None: 

446 mock_parent_storage_class = MockStorageClass.get_or_register_mock( 

447 original_type.parentStorageClass.name 

448 ) 

449 return DatasetType( 

450 get_mock_name(original_type.name), 

451 original_type.dimensions, 

452 mock_storage_class, 

453 isCalibration=original_type.isCalibration(), 

454 parentStorageClass=mock_parent_storage_class, 

455 ) 

456 

457 @staticmethod 

458 def mock_dataset_refs(original_refs: Iterable[DatasetRef]) -> list[DatasetRef]: 

459 """Replace dataset references with versions that uses a mock storage 

460 class and dataset type name. 

461 

462 Parameters 

463 ---------- 

464 original_refs : `~collections.abc.Iterable` [ \ 

465 `lsst.daf.butler.DatasetRef` ] 

466 Original dataset references to be mocked. 

467 

468 Returns 

469 ------- 

470 mock_refs : `list` [ `lsst.daf.butler.DatasetRef` ] 

471 Mocked version of the dataset references, with dataset type name 

472 and storage class changed and everything else unchanged. 

473 """ 

474 original_refs = list(original_refs) 

475 if not original_refs: 

476 return original_refs 

477 dataset_type = MockStorageClass.mock_dataset_type(original_refs[0].datasetType) 

478 return [ 

479 DatasetRef(dataset_type, original_ref.dataId, run=original_ref.run, id=original_ref.id) 

480 for original_ref in original_refs 

481 ] 

482 

483 @staticmethod 

484 def unmock_dataset_type(mock_type: DatasetType) -> DatasetType: 

485 """Replace a mock dataset type with the original one it was created 

486 from. 

487 

488 Parameters 

489 ---------- 

490 mock_type : `lsst.daf.butler.DatasetType` 

491 A dataset type with a mocked name and storage class. 

492 

493 Returns 

494 ------- 

495 original_type : `lsst.daf.butler.DatasetType` 

496 The original dataset type. 

497 """ 

498 storage_class = mock_type.storageClass 

499 parent_storage_class = mock_type.parentStorageClass 

500 if isinstance(storage_class, MockStorageClass): 

501 storage_class = storage_class.original 

502 if parent_storage_class is not None and isinstance(parent_storage_class, MockStorageClass): 

503 parent_storage_class = parent_storage_class.original 

504 return DatasetType( 

505 get_original_name(mock_type.name), 

506 mock_type.dimensions, 

507 storage_class, 

508 isCalibration=mock_type.isCalibration(), 

509 parentStorageClass=parent_storage_class, 

510 ) 

511 

512 @staticmethod 

513 def unmock_dataset_refs(mock_refs: Iterable[DatasetRef]) -> list[DatasetRef]: 

514 """Replace dataset references with versions that do not use a mock 

515 storage class and dataset type name. 

516 

517 Parameters 

518 ---------- 

519 mock_refs : `~collections.abc.Iterable` [ \ 

520 `lsst.daf.butler.DatasetRef` ] 

521 Dataset references that use a mocked dataset type name and storage 

522 class. 

523 

524 Returns 

525 ------- 

526 original_refs : `list` [ `lsst.daf.butler.DatasetRef` ] 

527 The original dataset references. 

528 """ 

529 mock_refs = list(mock_refs) 

530 if not mock_refs: 

531 return mock_refs 

532 dataset_type = MockStorageClass.unmock_dataset_type(mock_refs[0].datasetType) 

533 return [ 

534 DatasetRef(dataset_type, mock_ref.dataId, run=mock_ref.run, id=mock_ref.id) 

535 for mock_ref in mock_refs 

536 ] 

537 

538 

539def _monkeypatch_daf_butler() -> None: 

540 """Replace methods in daf_butler's StorageClassFactory and FormatterFactory 

541 classes to automatically recognize mock storage classes. 

542 

543 This monkey-patching is executed when the `lsst.pipe.base.tests.mocks` 

544 package is imported, and it affects all butler instances created before or 

545 after that imported. 

546 """ 

547 original_get_storage_class = StorageClassFactory.getStorageClass 

548 

549 def new_get_storage_class(self: StorageClassFactory, storageClassName: str) -> StorageClass: 

550 try: 

551 return original_get_storage_class(self, storageClassName) 

552 except KeyError: 

553 if is_mock_name(storageClassName): 

554 return MockStorageClass.get_or_register_mock(get_original_name(storageClassName)) 

555 raise 

556 

557 StorageClassFactory.getStorageClass = new_get_storage_class # type: ignore 

558 

559 del new_get_storage_class 

560 

561 original_get_formatter_class_with_match = FormatterFactory.getFormatterClassWithMatch 

562 

563 def new_get_formatter_class_with_match( 

564 self: FormatterFactory, entity: Any 

565 ) -> tuple[LookupKey, type[Formatter], dict[str, Any]]: 

566 try: 

567 return original_get_formatter_class_with_match(self, entity) 

568 except KeyError: 

569 lookup_keys = (LookupKey(name=entity),) if isinstance(entity, str) else entity._lookupNames() 

570 for key in lookup_keys: 

571 # This matches mock dataset type names before mock storage 

572 # classes, and it would even match some regular dataset types 

573 # that are automatic connections (logs, configs, metadata) of 

574 # mocked tasks. The latter would be a problem, except that 

575 # those should have already matched in the try block above. 

576 if is_mock_name(key.name): 

577 return (key, JsonFormatter, {}) 

578 raise 

579 

580 FormatterFactory.getFormatterClassWithMatch = new_get_formatter_class_with_match # type: ignore 

581 

582 del new_get_formatter_class_with_match 

583 

584 original_get_formatter_with_match = FormatterFactory.getFormatterWithMatch 

585 

586 def new_get_formatter_with_match( 

587 self: FormatterFactory, entity: Any, *args: Any, **kwargs: Any 

588 ) -> tuple[LookupKey, Formatter]: 

589 try: 

590 return original_get_formatter_with_match(self, entity, *args, **kwargs) 

591 except KeyError: 

592 lookup_keys = (LookupKey(name=entity),) if isinstance(entity, str) else entity._lookupNames() 

593 for key in lookup_keys: 

594 if is_mock_name(key.name): 

595 return (key, JsonFormatter(*args, **kwargs)) 

596 raise 

597 

598 FormatterFactory.getFormatterWithMatch = new_get_formatter_with_match # type: ignore 

599 

600 del new_get_formatter_with_match 

601 

602 

603_monkeypatch_daf_butler()