Coverage for python/lsst/pipe/base/tests/mocks/_storage_class.py: 43%

171 statements  

« prev     ^ index     » next       coverage.py v7.3.0, created at 2023-08-31 09:39 +0000

1# This file is part of pipe_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ( 

25 "MockDataset", 

26 "MockStorageClass", 

27 "MockDatasetQuantum", 

28 "MockStorageClassDelegate", 

29 "get_mock_name", 

30 "get_original_name", 

31 "is_mock_name", 

32) 

33 

34import uuid 

35from collections.abc import Callable, Iterable, Mapping 

36from typing import Any, cast 

37 

38from lsst.daf.butler import ( 

39 DataIdValue, 

40 DatasetComponent, 

41 DatasetRef, 

42 DatasetType, 

43 Formatter, 

44 FormatterFactory, 

45 LookupKey, 

46 SerializedDatasetType, 

47 StorageClass, 

48 StorageClassDelegate, 

49 StorageClassFactory, 

50) 

51from lsst.daf.butler._compat import _BaseModelCompat 

52from lsst.daf.butler.formatters.json import JsonFormatter 

53from lsst.utils.introspection import get_full_type_name 

54 

55_NAME_PREFIX: str = "_mock_" 

56 

57 

58def get_mock_name(original: str) -> str: 

59 """Return the name of the mock storage class, dataset type, or task label 

60 for the given original name. 

61 """ 

62 return _NAME_PREFIX + original 

63 

64 

65def get_original_name(mock: str) -> str: 

66 """Return the name of the original storage class, dataset type, or task 

67 label that corresponds to the given mock name. 

68 """ 

69 assert mock.startswith(_NAME_PREFIX) 

70 return mock.removeprefix(_NAME_PREFIX) 

71 

72 

73def is_mock_name(name: str) -> bool: 

74 """Return whether the given name is that of a mock storage class, dataset 

75 type, or task label. 

76 """ 

77 return name.startswith(_NAME_PREFIX) 

78 

79 

80# Tests for this module are in the ci_middleware package, where we have easy 

81# access to complex real storage classes (and their pytypes) to test against. 

82 

83 

84class MockDataset(_BaseModelCompat): 

85 """The in-memory dataset type used by `MockStorageClass`.""" 

86 

87 dataset_id: uuid.UUID | None 

88 """Universal unique identifier for this dataset.""" 

89 

90 dataset_type: SerializedDatasetType 

91 """Butler dataset type or this dataset. 

92 

93 See the documentation for ``data_id`` for why this is a 

94 `~lsst.daf.butler.SerializedDatasetType` instead of a "real" one. 

95 """ 

96 

97 data_id: dict[str, DataIdValue] 

98 """Butler data ID for this dataset. 

99 

100 This is a `~lsst.daf.butler.SerializedDataCoordinate` instead of a "real" 

101 one for two reasons: 

102 

103 - the mock dataset may need to be read from disk in a context in which a 

104 `~lsst.daf.butler.DimensionUniverse` is unavailable; 

105 - we don't want the complexity of having a separate 

106 ``SerializedMockDataCoordinate``. 

107 """ 

108 

109 run: str | None 

110 """`~lsst.daf.butler.CollectionType.RUN` collection this dataset belongs 

111 to. 

112 """ 

113 

114 quantum: MockDatasetQuantum | None = None 

115 """Description of the quantum that produced this dataset. 

116 """ 

117 

118 output_connection_name: str | None = None 

119 """The name of the PipelineTask output connection that produced this 

120 dataset. 

121 """ 

122 

123 converted_from: MockDataset | None = None 

124 """Another `MockDataset` that underwent a storage class conversion to 

125 produce this one. 

126 """ 

127 

128 parent: MockDataset | None = None 

129 """Another `MockDataset` from which a component was extract to form this 

130 one. 

131 """ 

132 

133 parameters: dict[str, str] | None = None 

134 """`repr` of all parameters applied when reading this dataset.""" 

135 

136 @property 

137 def storage_class(self) -> str: 

138 return cast(str, self.dataset_type.storageClass) 

139 

140 def make_derived(self, **kwargs: Any) -> MockDataset: 

141 """Return a new MockDataset that represents applying some storage class 

142 operation to this one. 

143 

144 Keyword arguments are fields of `MockDataset` or 

145 `~lsst.daf.butler.SerializedDatasetType` to override in the result. 

146 """ 

147 dataset_type_updates = { 

148 k: kwargs.pop(k) for k in list(kwargs) if k in SerializedDatasetType.model_fields # type: ignore 

149 } 

150 kwargs.setdefault("dataset_type", self.dataset_type.copy(update=dataset_type_updates)) 

151 # Fields below are those that should not be propagated to the derived 

152 # dataset, because they're not about the intrinsic on-disk thing. 

153 kwargs.setdefault("converted_from", None) 

154 kwargs.setdefault("parent", None) 

155 kwargs.setdefault("parameters", None) 

156 # Also use setdefault on the ref in case caller wants to override that 

157 # directly, but this is expected to be rare enough that it's not worth 

158 # it to try to optimize out the work above to make derived_ref. 

159 return self.copy(update=kwargs) 

160 

161 

162class MockDatasetQuantum(_BaseModelCompat): 

163 """Description of the quantum that produced a mock dataset. 

164 

165 This is also used to represent task-init operations for init-output mock 

166 datasets. 

167 """ 

168 

169 task_label: str 

170 """Label of the producing PipelineTask in its pipeline.""" 

171 

172 data_id: dict[str, DataIdValue] 

173 """Data ID for the quantum.""" 

174 

175 inputs: dict[str, list[MockDataset]] 

176 """Mock datasets provided as input to the quantum. 

177 

178 Keys are task-internal connection names, not dataset type names. 

179 """ 

180 

181 

182MockDataset.model_rebuild() 

183 

184 

185class MockStorageClassDelegate(StorageClassDelegate): 

186 """Implementation of the StorageClassDelegate interface for mock datasets. 

187 

188 This class does not implement assembly and disassembly just because it's 

189 not needed right now. That could be added in the future with some 

190 additional tracking attributes in `MockDataset`. 

191 """ 

192 

193 def assemble(self, components: dict[str, Any], pytype: type | None = None) -> MockDataset: 

194 # Docstring inherited. 

195 raise NotImplementedError("Mock storage classes do not implement assembly.") 

196 

197 def getComponent(self, composite: Any, componentName: str) -> Any: 

198 # Docstring inherited. 

199 assert isinstance( 

200 composite, MockDataset 

201 ), f"MockStorageClassDelegate given a non-mock dataset {composite!r}." 

202 return composite.make_derived( 

203 name=f"{composite.dataset_type.name}.{componentName}", 

204 storageClass=self.storageClass.allComponents()[componentName].name, 

205 parentStorageClass=self.storageClass.name, 

206 parent=composite, 

207 ) 

208 

209 def disassemble( 

210 self, composite: Any, subset: Iterable | None = None, override: Any | None = None 

211 ) -> dict[str, DatasetComponent]: 

212 # Docstring inherited. 

213 raise NotImplementedError("Mock storage classes do not implement disassembly.") 

214 

215 def handleParameters(self, inMemoryDataset: Any, parameters: Mapping[str, Any] | None = None) -> Any: 

216 # Docstring inherited. 

217 assert isinstance( 

218 inMemoryDataset, MockDataset 

219 ), f"MockStorageClassDelegate given a non-mock dataset {inMemoryDataset!r}." 

220 if not parameters: 

221 return inMemoryDataset 

222 return inMemoryDataset.make_derived(parameters={k: repr(v) for k, v in parameters.items()}) 

223 

224 

225class MockStorageClass(StorageClass): 

226 """A reimplementation of `lsst.daf.butler.StorageClass` for mock datasets. 

227 

228 Each `MockStorageClass` instance corresponds to a real "original" storage 

229 class, with components and conversions that are mocks of the original's 

230 components and conversions. The `pytype` for all `MockStorageClass` 

231 instances is `MockDataset`. 

232 """ 

233 

234 def __init__(self, original: StorageClass, factory: StorageClassFactory | None = None): 

235 name = get_mock_name(original.name) 

236 if factory is None: 

237 factory = StorageClassFactory() 

238 super().__init__( 

239 name=name, 

240 pytype=MockDataset, 

241 components={ 

242 k: self.get_or_register_mock(v.name, factory) for k, v in original.components.items() 

243 }, 

244 derivedComponents={ 

245 k: self.get_or_register_mock(v.name, factory) for k, v in original.derivedComponents.items() 

246 }, 

247 parameters=frozenset(original.parameters), 

248 delegate=get_full_type_name(MockStorageClassDelegate), 

249 # Conversions work differently for mock storage classes, since they 

250 # all have the same pytype: we use the original storage class being 

251 # mocked to see if we can convert, then just make a new MockDataset 

252 # that points back to the original. 

253 converters={}, 

254 ) 

255 self.original = original 

256 # Make certain no one tries to use the converters. 

257 self._converters = None # type: ignore 

258 

259 def _get_converters_by_type(self) -> dict[type, Callable[[Any], Any]]: 

260 # Docstring inherited. 

261 raise NotImplementedError("MockStorageClass does not use converters.") 

262 

263 @classmethod 

264 def get_or_register_mock( 

265 cls, original: str, factory: StorageClassFactory | None = None 

266 ) -> MockStorageClass: 

267 """Return a mock storage class for the given original storage class, 

268 creating and registering it if necessary. 

269 

270 Parameters 

271 ---------- 

272 original : `str` 

273 Name of the original storage class to be mocked. 

274 factory : `~lsst.daf.butler.StorageClassFactory`, optional 

275 Storage class factory singleton instance. 

276 

277 Returns 

278 ------- 

279 mock : `MockStorageClass` 

280 New storage class that mocks ``original``. 

281 """ 

282 name = get_mock_name(original) 

283 if factory is None: 

284 factory = StorageClassFactory() 

285 if name in factory: 

286 return cast(MockStorageClass, factory.getStorageClass(name)) 

287 else: 

288 result = cls(factory.getStorageClass(original), factory) 

289 factory.registerStorageClass(result) 

290 return result 

291 

292 def allComponents(self) -> Mapping[str, MockStorageClass]: 

293 # Docstring inherited. 

294 return cast(Mapping[str, MockStorageClass], super().allComponents()) 

295 

296 @property 

297 def components(self) -> Mapping[str, MockStorageClass]: 

298 # Docstring inherited. 

299 return cast(Mapping[str, MockStorageClass], super().components) 

300 

301 @property 

302 def derivedComponents(self) -> Mapping[str, MockStorageClass]: 

303 # Docstring inherited. 

304 return cast(Mapping[str, MockStorageClass], super().derivedComponents) 

305 

306 def can_convert(self, other: StorageClass) -> bool: 

307 # Docstring inherited. 

308 if not isinstance(other, MockStorageClass): 

309 return False 

310 return self.original.can_convert(other.original) 

311 

312 def coerce_type(self, incorrect: Any) -> Any: 

313 # Docstring inherited. 

314 if not isinstance(incorrect, MockDataset): 

315 raise TypeError( 

316 f"Mock storage class {self.name!r} can only convert in-memory datasets " 

317 f"corresponding to other mock storage classes, not {incorrect!r}." 

318 ) 

319 factory = StorageClassFactory() 

320 other_storage_class = factory.getStorageClass(incorrect.storage_class) 

321 assert isinstance(other_storage_class, MockStorageClass), "Should not get a MockDataset otherwise." 

322 if other_storage_class.name == self.name: 

323 return incorrect 

324 if not self.can_convert(other_storage_class): 

325 raise TypeError( 

326 f"Mocked storage class {self.original.name!r} cannot convert from " 

327 f"{other_storage_class.original.name!r}." 

328 ) 

329 return incorrect.make_derived(storageClass=self.name, converted_from=incorrect) 

330 

331 @staticmethod 

332 def mock_dataset_type(original_type: DatasetType) -> DatasetType: 

333 """Replace a dataset type with a version that uses a mock storage class 

334 and name. 

335 

336 Parameters 

337 ---------- 

338 original_type : `lsst.daf.butler.DatasetType` 

339 Original dataset type to be mocked. 

340 

341 Returns 

342 ------- 

343 mock_type : `lsst.daf.butler.DatasetType` 

344 A mock version of the dataset type, with name and storage class 

345 changed and everything else unchanged. 

346 """ 

347 mock_storage_class = MockStorageClass.get_or_register_mock(original_type.storageClass_name) 

348 mock_parent_storage_class = None 

349 if original_type.parentStorageClass is not None: 

350 mock_parent_storage_class = MockStorageClass.get_or_register_mock( 

351 original_type.parentStorageClass.name 

352 ) 

353 return DatasetType( 

354 get_mock_name(original_type.name), 

355 original_type.dimensions, 

356 mock_storage_class, 

357 isCalibration=original_type.isCalibration(), 

358 parentStorageClass=mock_parent_storage_class, 

359 ) 

360 

361 @staticmethod 

362 def mock_dataset_refs(original_refs: Iterable[DatasetRef]) -> list[DatasetRef]: 

363 """Replace dataset references with versions that uses a mock storage 

364 class and dataset type name. 

365 

366 Parameters 

367 ---------- 

368 original_refs : `~collections.abc.Iterable` [ \ 

369 `lsst.daf.butler.DatasetRef` ] 

370 Original dataset references to be mocked. 

371 

372 Returns 

373 ------- 

374 mock_refs : `list` [ `lsst.daf.butler.DatasetRef` ] 

375 Mocked version of the dataset references, with dataset type name 

376 and storage class changed and everything else unchanged. 

377 """ 

378 original_refs = list(original_refs) 

379 if not original_refs: 

380 return original_refs 

381 dataset_type = MockStorageClass.mock_dataset_type(original_refs[0].datasetType) 

382 return [ 

383 DatasetRef(dataset_type, original_ref.dataId, run=original_ref.run, id=original_ref.id) 

384 for original_ref in original_refs 

385 ] 

386 

387 @staticmethod 

388 def unmock_dataset_type(mock_type: DatasetType) -> DatasetType: 

389 """Replace a mock dataset type with the original one it was created 

390 from. 

391 

392 Parameters 

393 ---------- 

394 mock_type : `lsst.daf.butler.DatasetType` 

395 A dataset type with a mocked name and storage class. 

396 

397 Returns 

398 ------- 

399 original_type : `lsst.daf.butler.DatasetType` 

400 The original dataset type. 

401 """ 

402 mock_storage_class = cast(MockStorageClass, mock_type.storageClass) 

403 original_parent_storage_class = None 

404 if mock_type.parentStorageClass is not None: 

405 original_parent_storage_class = cast(MockStorageClass, mock_type.parentStorageClass).original 

406 return DatasetType( 

407 get_original_name(mock_type.name), 

408 mock_type.dimensions, 

409 mock_storage_class.original, 

410 isCalibration=mock_type.isCalibration(), 

411 parentStorageClass=original_parent_storage_class, 

412 ) 

413 

414 @staticmethod 

415 def unmock_dataset_refs(mock_refs: Iterable[DatasetRef]) -> list[DatasetRef]: 

416 """Replace dataset references with versions that do not use a mock 

417 storage class and dataset type name. 

418 

419 Parameters 

420 ---------- 

421 mock_refs : `~collections.abc.Iterable` [ \ 

422 `lsst.daf.butler.DatasetRef` ] 

423 Dataset references that use a mocked dataset type name and storage 

424 class. 

425 

426 Returns 

427 ------- 

428 original_refs : `list` [ `lsst.daf.butler.DatasetRef` ] 

429 The original dataset references. 

430 """ 

431 mock_refs = list(mock_refs) 

432 if not mock_refs: 

433 return mock_refs 

434 dataset_type = MockStorageClass.unmock_dataset_type(mock_refs[0].datasetType) 

435 return [ 

436 DatasetRef(dataset_type, mock_ref.dataId, run=mock_ref.run, id=mock_ref.id) 

437 for mock_ref in mock_refs 

438 ] 

439 

440 

441def _monkeypatch_daf_butler() -> None: 

442 """Replace methods in daf_butler's StorageClassFactory and FormatterFactory 

443 classes to automatically recognize mock storage classes. 

444 

445 This monkey-patching is executed when the `lsst.pipe.base.tests.mocks` 

446 package is imported, and it affects all butler instances created before or 

447 after that imported. 

448 """ 

449 original_get_storage_class = StorageClassFactory.getStorageClass 

450 

451 def new_get_storage_class(self: StorageClassFactory, storageClassName: str) -> StorageClass: 

452 try: 

453 return original_get_storage_class(self, storageClassName) 

454 except KeyError: 

455 if is_mock_name(storageClassName): 

456 return MockStorageClass.get_or_register_mock(get_original_name(storageClassName)) 

457 raise 

458 

459 StorageClassFactory.getStorageClass = new_get_storage_class # type: ignore 

460 

461 del new_get_storage_class 

462 

463 original_get_formatter_class_with_match = FormatterFactory.getFormatterClassWithMatch 

464 

465 def new_get_formatter_class_with_match( 

466 self: FormatterFactory, entity: Any 

467 ) -> tuple[LookupKey, type[Formatter], dict[str, Any]]: 

468 try: 

469 return original_get_formatter_class_with_match(self, entity) 

470 except KeyError: 

471 lookup_keys = (LookupKey(name=entity),) if isinstance(entity, str) else entity._lookupNames() 

472 for key in lookup_keys: 

473 # This matches mock dataset type names before mock storage 

474 # classes, and it would even match some regular dataset types 

475 # that are automatic connections (logs, configs, metadata) of 

476 # mocked tasks. The latter would be a problem, except that 

477 # those should have already matched in the try block above. 

478 if is_mock_name(key.name): 

479 return (key, JsonFormatter, {}) 

480 raise 

481 

482 FormatterFactory.getFormatterClassWithMatch = new_get_formatter_class_with_match # type: ignore 

483 

484 del new_get_formatter_class_with_match 

485 

486 original_get_formatter_with_match = FormatterFactory.getFormatterWithMatch 

487 

488 def new_get_formatter_with_match( 

489 self: FormatterFactory, entity: Any, *args: Any, **kwargs: Any 

490 ) -> tuple[LookupKey, Formatter]: 

491 try: 

492 return original_get_formatter_with_match(self, entity, *args, **kwargs) 

493 except KeyError: 

494 lookup_keys = (LookupKey(name=entity),) if isinstance(entity, str) else entity._lookupNames() 

495 for key in lookup_keys: 

496 if is_mock_name(key.name): 

497 return (key, JsonFormatter(*args, **kwargs)) 

498 raise 

499 

500 FormatterFactory.getFormatterWithMatch = new_get_formatter_with_match # type: ignore 

501 

502 del new_get_formatter_with_match 

503 

504 

505_monkeypatch_daf_butler()