Coverage for python/lsst/pipe/base/tests/mocks/_storage_class.py: 43%

173 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-11-17 10:52 +0000

1# This file is part of pipe_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30__all__ = ( 

31 "MockDataset", 

32 "MockStorageClass", 

33 "MockDatasetQuantum", 

34 "MockStorageClassDelegate", 

35 "get_mock_name", 

36 "get_original_name", 

37 "is_mock_name", 

38) 

39 

40import uuid 

41from collections.abc import Callable, Iterable, Mapping 

42from typing import Any, cast 

43 

44from lsst.daf.butler import ( 

45 DataIdValue, 

46 DatasetComponent, 

47 DatasetRef, 

48 DatasetType, 

49 Formatter, 

50 FormatterFactory, 

51 LookupKey, 

52 SerializedDatasetType, 

53 StorageClass, 

54 StorageClassDelegate, 

55 StorageClassFactory, 

56) 

57from lsst.daf.butler._compat import _BaseModelCompat 

58from lsst.daf.butler.formatters.json import JsonFormatter 

59from lsst.utils.introspection import get_full_type_name 

60 

61_NAME_PREFIX: str = "_mock_" 

62 

63 

64def get_mock_name(original: str) -> str: 

65 """Return the name of the mock storage class, dataset type, or task label 

66 for the given original name. 

67 """ 

68 return _NAME_PREFIX + original 

69 

70 

71def get_original_name(mock: str) -> str: 

72 """Return the name of the original storage class, dataset type, or task 

73 label that corresponds to the given mock name. 

74 """ 

75 assert mock.startswith(_NAME_PREFIX) 

76 return mock.removeprefix(_NAME_PREFIX) 

77 

78 

79def is_mock_name(name: str) -> bool: 

80 """Return whether the given name is that of a mock storage class, dataset 

81 type, or task label. 

82 """ 

83 return name.startswith(_NAME_PREFIX) 

84 

85 

86# Tests for this module are in the ci_middleware package, where we have easy 

87# access to complex real storage classes (and their pytypes) to test against. 

88 

89 

90class MockDataset(_BaseModelCompat): 

91 """The in-memory dataset type used by `MockStorageClass`.""" 

92 

93 dataset_id: uuid.UUID | None 

94 """Universal unique identifier for this dataset.""" 

95 

96 dataset_type: SerializedDatasetType 

97 """Butler dataset type or this dataset. 

98 

99 See the documentation for ``data_id`` for why this is a 

100 `~lsst.daf.butler.SerializedDatasetType` instead of a "real" one. 

101 """ 

102 

103 data_id: dict[str, DataIdValue] 

104 """Butler data ID for this dataset. 

105 

106 This is a `~lsst.daf.butler.SerializedDataCoordinate` instead of a "real" 

107 one for two reasons: 

108 

109 - the mock dataset may need to be read from disk in a context in which a 

110 `~lsst.daf.butler.DimensionUniverse` is unavailable; 

111 - we don't want the complexity of having a separate 

112 ``SerializedMockDataCoordinate``. 

113 """ 

114 

115 run: str | None 

116 """`~lsst.daf.butler.CollectionType.RUN` collection this dataset belongs 

117 to. 

118 """ 

119 

120 quantum: MockDatasetQuantum | None = None 

121 """Description of the quantum that produced this dataset. 

122 """ 

123 

124 output_connection_name: str | None = None 

125 """The name of the PipelineTask output connection that produced this 

126 dataset. 

127 """ 

128 

129 converted_from: MockDataset | None = None 

130 """Another `MockDataset` that underwent a storage class conversion to 

131 produce this one. 

132 """ 

133 

134 parent: MockDataset | None = None 

135 """Another `MockDataset` from which a component was extract to form this 

136 one. 

137 """ 

138 

139 parameters: dict[str, str] | None = None 

140 """`repr` of all parameters applied when reading this dataset.""" 

141 

142 @property 

143 def storage_class(self) -> str: 

144 return cast(str, self.dataset_type.storageClass) 

145 

146 def make_derived(self, **kwargs: Any) -> MockDataset: 

147 """Return a new MockDataset that represents applying some storage class 

148 operation to this one. 

149 

150 Keyword arguments are fields of `MockDataset` or 

151 `~lsst.daf.butler.SerializedDatasetType` to override in the result. 

152 """ 

153 dataset_type_updates = { 

154 k: kwargs.pop(k) for k in list(kwargs) if k in SerializedDatasetType.model_fields # type: ignore 

155 } 

156 kwargs.setdefault("dataset_type", self.dataset_type.copy(update=dataset_type_updates)) 

157 # Fields below are those that should not be propagated to the derived 

158 # dataset, because they're not about the intrinsic on-disk thing. 

159 kwargs.setdefault("converted_from", None) 

160 kwargs.setdefault("parent", None) 

161 kwargs.setdefault("parameters", None) 

162 # Also use setdefault on the ref in case caller wants to override that 

163 # directly, but this is expected to be rare enough that it's not worth 

164 # it to try to optimize out the work above to make derived_ref. 

165 return self.copy(update=kwargs) 

166 

167 

168class MockDatasetQuantum(_BaseModelCompat): 

169 """Description of the quantum that produced a mock dataset. 

170 

171 This is also used to represent task-init operations for init-output mock 

172 datasets. 

173 """ 

174 

175 task_label: str 

176 """Label of the producing PipelineTask in its pipeline.""" 

177 

178 data_id: dict[str, DataIdValue] 

179 """Data ID for the quantum.""" 

180 

181 inputs: dict[str, list[MockDataset]] 

182 """Mock datasets provided as input to the quantum. 

183 

184 Keys are task-internal connection names, not dataset type names. 

185 """ 

186 

187 

188MockDataset.model_rebuild() 

189 

190 

191class MockStorageClassDelegate(StorageClassDelegate): 

192 """Implementation of the StorageClassDelegate interface for mock datasets. 

193 

194 This class does not implement assembly and disassembly just because it's 

195 not needed right now. That could be added in the future with some 

196 additional tracking attributes in `MockDataset`. 

197 """ 

198 

199 def assemble(self, components: dict[str, Any], pytype: type | None = None) -> MockDataset: 

200 # Docstring inherited. 

201 raise NotImplementedError("Mock storage classes do not implement assembly.") 

202 

203 def getComponent(self, composite: Any, componentName: str) -> Any: 

204 # Docstring inherited. 

205 assert isinstance( 

206 composite, MockDataset 

207 ), f"MockStorageClassDelegate given a non-mock dataset {composite!r}." 

208 return composite.make_derived( 

209 name=f"{composite.dataset_type.name}.{componentName}", 

210 storageClass=self.storageClass.allComponents()[componentName].name, 

211 parentStorageClass=self.storageClass.name, 

212 parent=composite, 

213 ) 

214 

215 def disassemble( 

216 self, composite: Any, subset: Iterable | None = None, override: Any | None = None 

217 ) -> dict[str, DatasetComponent]: 

218 # Docstring inherited. 

219 raise NotImplementedError("Mock storage classes do not implement disassembly.") 

220 

221 def handleParameters(self, inMemoryDataset: Any, parameters: Mapping[str, Any] | None = None) -> Any: 

222 # Docstring inherited. 

223 assert isinstance( 

224 inMemoryDataset, MockDataset 

225 ), f"MockStorageClassDelegate given a non-mock dataset {inMemoryDataset!r}." 

226 if not parameters: 

227 return inMemoryDataset 

228 return inMemoryDataset.make_derived(parameters={k: repr(v) for k, v in parameters.items()}) 

229 

230 

231class MockStorageClass(StorageClass): 

232 """A reimplementation of `lsst.daf.butler.StorageClass` for mock datasets. 

233 

234 Each `MockStorageClass` instance corresponds to a real "original" storage 

235 class, with components and conversions that are mocks of the original's 

236 components and conversions. The `pytype` for all `MockStorageClass` 

237 instances is `MockDataset`. 

238 """ 

239 

240 def __init__(self, original: StorageClass, factory: StorageClassFactory | None = None): 

241 name = get_mock_name(original.name) 

242 if factory is None: 

243 factory = StorageClassFactory() 

244 super().__init__( 

245 name=name, 

246 pytype=MockDataset, 

247 components={ 

248 k: self.get_or_register_mock(v.name, factory) for k, v in original.components.items() 

249 }, 

250 derivedComponents={ 

251 k: self.get_or_register_mock(v.name, factory) for k, v in original.derivedComponents.items() 

252 }, 

253 parameters=frozenset(original.parameters), 

254 delegate=get_full_type_name(MockStorageClassDelegate), 

255 # Conversions work differently for mock storage classes, since they 

256 # all have the same pytype: we use the original storage class being 

257 # mocked to see if we can convert, then just make a new MockDataset 

258 # that points back to the original. 

259 converters={}, 

260 ) 

261 self.original = original 

262 # Make certain no one tries to use the converters. 

263 self._converters = None # type: ignore 

264 

265 def _get_converters_by_type(self) -> dict[type, Callable[[Any], Any]]: 

266 # Docstring inherited. 

267 raise NotImplementedError("MockStorageClass does not use converters.") 

268 

269 @classmethod 

270 def get_or_register_mock( 

271 cls, original: str, factory: StorageClassFactory | None = None 

272 ) -> MockStorageClass: 

273 """Return a mock storage class for the given original storage class, 

274 creating and registering it if necessary. 

275 

276 Parameters 

277 ---------- 

278 original : `str` 

279 Name of the original storage class to be mocked. 

280 factory : `~lsst.daf.butler.StorageClassFactory`, optional 

281 Storage class factory singleton instance. 

282 

283 Returns 

284 ------- 

285 mock : `MockStorageClass` 

286 New storage class that mocks ``original``. 

287 """ 

288 name = get_mock_name(original) 

289 if factory is None: 

290 factory = StorageClassFactory() 

291 if name in factory: 

292 return cast(MockStorageClass, factory.getStorageClass(name)) 

293 else: 

294 result = cls(factory.getStorageClass(original), factory) 

295 factory.registerStorageClass(result) 

296 return result 

297 

298 def allComponents(self) -> Mapping[str, MockStorageClass]: 

299 # Docstring inherited. 

300 return cast(Mapping[str, MockStorageClass], super().allComponents()) 

301 

302 @property 

303 def components(self) -> Mapping[str, MockStorageClass]: 

304 # Docstring inherited. 

305 return cast(Mapping[str, MockStorageClass], super().components) 

306 

307 @property 

308 def derivedComponents(self) -> Mapping[str, MockStorageClass]: 

309 # Docstring inherited. 

310 return cast(Mapping[str, MockStorageClass], super().derivedComponents) 

311 

312 def can_convert(self, other: StorageClass) -> bool: 

313 # Docstring inherited. 

314 if not isinstance(other, MockStorageClass): 

315 return False 

316 return self.original.can_convert(other.original) 

317 

318 def coerce_type(self, incorrect: Any) -> Any: 

319 # Docstring inherited. 

320 if not isinstance(incorrect, MockDataset): 

321 raise TypeError( 

322 f"Mock storage class {self.name!r} can only convert in-memory datasets " 

323 f"corresponding to other mock storage classes, not {incorrect!r}." 

324 ) 

325 factory = StorageClassFactory() 

326 other_storage_class = factory.getStorageClass(incorrect.storage_class) 

327 assert isinstance(other_storage_class, MockStorageClass), "Should not get a MockDataset otherwise." 

328 if other_storage_class.name == self.name: 

329 return incorrect 

330 if not self.can_convert(other_storage_class): 

331 raise TypeError( 

332 f"Mocked storage class {self.original.name!r} cannot convert from " 

333 f"{other_storage_class.original.name!r}." 

334 ) 

335 return incorrect.make_derived(storageClass=self.name, converted_from=incorrect) 

336 

337 @staticmethod 

338 def mock_dataset_type(original_type: DatasetType) -> DatasetType: 

339 """Replace a dataset type with a version that uses a mock storage class 

340 and name. 

341 

342 Parameters 

343 ---------- 

344 original_type : `lsst.daf.butler.DatasetType` 

345 Original dataset type to be mocked. 

346 

347 Returns 

348 ------- 

349 mock_type : `lsst.daf.butler.DatasetType` 

350 A mock version of the dataset type, with name and storage class 

351 changed and everything else unchanged. 

352 """ 

353 mock_storage_class = MockStorageClass.get_or_register_mock(original_type.storageClass_name) 

354 mock_parent_storage_class = None 

355 if original_type.parentStorageClass is not None: 

356 mock_parent_storage_class = MockStorageClass.get_or_register_mock( 

357 original_type.parentStorageClass.name 

358 ) 

359 return DatasetType( 

360 get_mock_name(original_type.name), 

361 original_type.dimensions, 

362 mock_storage_class, 

363 isCalibration=original_type.isCalibration(), 

364 parentStorageClass=mock_parent_storage_class, 

365 ) 

366 

367 @staticmethod 

368 def mock_dataset_refs(original_refs: Iterable[DatasetRef]) -> list[DatasetRef]: 

369 """Replace dataset references with versions that uses a mock storage 

370 class and dataset type name. 

371 

372 Parameters 

373 ---------- 

374 original_refs : `~collections.abc.Iterable` [ \ 

375 `lsst.daf.butler.DatasetRef` ] 

376 Original dataset references to be mocked. 

377 

378 Returns 

379 ------- 

380 mock_refs : `list` [ `lsst.daf.butler.DatasetRef` ] 

381 Mocked version of the dataset references, with dataset type name 

382 and storage class changed and everything else unchanged. 

383 """ 

384 original_refs = list(original_refs) 

385 if not original_refs: 

386 return original_refs 

387 dataset_type = MockStorageClass.mock_dataset_type(original_refs[0].datasetType) 

388 return [ 

389 DatasetRef(dataset_type, original_ref.dataId, run=original_ref.run, id=original_ref.id) 

390 for original_ref in original_refs 

391 ] 

392 

393 @staticmethod 

394 def unmock_dataset_type(mock_type: DatasetType) -> DatasetType: 

395 """Replace a mock dataset type with the original one it was created 

396 from. 

397 

398 Parameters 

399 ---------- 

400 mock_type : `lsst.daf.butler.DatasetType` 

401 A dataset type with a mocked name and storage class. 

402 

403 Returns 

404 ------- 

405 original_type : `lsst.daf.butler.DatasetType` 

406 The original dataset type. 

407 """ 

408 storage_class = mock_type.storageClass 

409 parent_storage_class = mock_type.parentStorageClass 

410 if isinstance(storage_class, MockStorageClass): 

411 storage_class = storage_class.original 

412 if parent_storage_class is not None and isinstance(parent_storage_class, MockStorageClass): 

413 parent_storage_class = parent_storage_class.original 

414 return DatasetType( 

415 get_original_name(mock_type.name), 

416 mock_type.dimensions, 

417 storage_class, 

418 isCalibration=mock_type.isCalibration(), 

419 parentStorageClass=parent_storage_class, 

420 ) 

421 

422 @staticmethod 

423 def unmock_dataset_refs(mock_refs: Iterable[DatasetRef]) -> list[DatasetRef]: 

424 """Replace dataset references with versions that do not use a mock 

425 storage class and dataset type name. 

426 

427 Parameters 

428 ---------- 

429 mock_refs : `~collections.abc.Iterable` [ \ 

430 `lsst.daf.butler.DatasetRef` ] 

431 Dataset references that use a mocked dataset type name and storage 

432 class. 

433 

434 Returns 

435 ------- 

436 original_refs : `list` [ `lsst.daf.butler.DatasetRef` ] 

437 The original dataset references. 

438 """ 

439 mock_refs = list(mock_refs) 

440 if not mock_refs: 

441 return mock_refs 

442 dataset_type = MockStorageClass.unmock_dataset_type(mock_refs[0].datasetType) 

443 return [ 

444 DatasetRef(dataset_type, mock_ref.dataId, run=mock_ref.run, id=mock_ref.id) 

445 for mock_ref in mock_refs 

446 ] 

447 

448 

449def _monkeypatch_daf_butler() -> None: 

450 """Replace methods in daf_butler's StorageClassFactory and FormatterFactory 

451 classes to automatically recognize mock storage classes. 

452 

453 This monkey-patching is executed when the `lsst.pipe.base.tests.mocks` 

454 package is imported, and it affects all butler instances created before or 

455 after that imported. 

456 """ 

457 original_get_storage_class = StorageClassFactory.getStorageClass 

458 

459 def new_get_storage_class(self: StorageClassFactory, storageClassName: str) -> StorageClass: 

460 try: 

461 return original_get_storage_class(self, storageClassName) 

462 except KeyError: 

463 if is_mock_name(storageClassName): 

464 return MockStorageClass.get_or_register_mock(get_original_name(storageClassName)) 

465 raise 

466 

467 StorageClassFactory.getStorageClass = new_get_storage_class # type: ignore 

468 

469 del new_get_storage_class 

470 

471 original_get_formatter_class_with_match = FormatterFactory.getFormatterClassWithMatch 

472 

473 def new_get_formatter_class_with_match( 

474 self: FormatterFactory, entity: Any 

475 ) -> tuple[LookupKey, type[Formatter], dict[str, Any]]: 

476 try: 

477 return original_get_formatter_class_with_match(self, entity) 

478 except KeyError: 

479 lookup_keys = (LookupKey(name=entity),) if isinstance(entity, str) else entity._lookupNames() 

480 for key in lookup_keys: 

481 # This matches mock dataset type names before mock storage 

482 # classes, and it would even match some regular dataset types 

483 # that are automatic connections (logs, configs, metadata) of 

484 # mocked tasks. The latter would be a problem, except that 

485 # those should have already matched in the try block above. 

486 if is_mock_name(key.name): 

487 return (key, JsonFormatter, {}) 

488 raise 

489 

490 FormatterFactory.getFormatterClassWithMatch = new_get_formatter_class_with_match # type: ignore 

491 

492 del new_get_formatter_class_with_match 

493 

494 original_get_formatter_with_match = FormatterFactory.getFormatterWithMatch 

495 

496 def new_get_formatter_with_match( 

497 self: FormatterFactory, entity: Any, *args: Any, **kwargs: Any 

498 ) -> tuple[LookupKey, Formatter]: 

499 try: 

500 return original_get_formatter_with_match(self, entity, *args, **kwargs) 

501 except KeyError: 

502 lookup_keys = (LookupKey(name=entity),) if isinstance(entity, str) else entity._lookupNames() 

503 for key in lookup_keys: 

504 if is_mock_name(key.name): 

505 return (key, JsonFormatter(*args, **kwargs)) 

506 raise 

507 

508 FormatterFactory.getFormatterWithMatch = new_get_formatter_with_match # type: ignore 

509 

510 del new_get_formatter_with_match 

511 

512 

513_monkeypatch_daf_butler()