Coverage for python/lsst/pipe/base/tests/mocks/_storage_class.py: 43%
173 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-06 10:56 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-06 10:56 +0000
1# This file is part of pipe_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30__all__ = (
31 "MockDataset",
32 "MockStorageClass",
33 "MockDatasetQuantum",
34 "MockStorageClassDelegate",
35 "get_mock_name",
36 "get_original_name",
37 "is_mock_name",
38)
40import uuid
41from collections.abc import Callable, Iterable, Mapping
42from typing import Any, cast
44from lsst.daf.butler import (
45 DataIdValue,
46 DatasetComponent,
47 DatasetRef,
48 DatasetType,
49 Formatter,
50 FormatterFactory,
51 LookupKey,
52 SerializedDatasetType,
53 StorageClass,
54 StorageClassDelegate,
55 StorageClassFactory,
56)
57from lsst.daf.butler._compat import _BaseModelCompat
58from lsst.daf.butler.formatters.json import JsonFormatter
59from lsst.utils.introspection import get_full_type_name
61_NAME_PREFIX: str = "_mock_"
64def get_mock_name(original: str) -> str:
65 """Return the name of the mock storage class, dataset type, or task label
66 for the given original name.
67 """
68 return _NAME_PREFIX + original
71def get_original_name(mock: str) -> str:
72 """Return the name of the original storage class, dataset type, or task
73 label that corresponds to the given mock name.
74 """
75 assert mock.startswith(_NAME_PREFIX)
76 return mock.removeprefix(_NAME_PREFIX)
79def is_mock_name(name: str) -> bool:
80 """Return whether the given name is that of a mock storage class, dataset
81 type, or task label.
82 """
83 return name.startswith(_NAME_PREFIX)
86# Tests for this module are in the ci_middleware package, where we have easy
87# access to complex real storage classes (and their pytypes) to test against.
90class MockDataset(_BaseModelCompat):
91 """The in-memory dataset type used by `MockStorageClass`."""
93 dataset_id: uuid.UUID | None
94 """Universal unique identifier for this dataset."""
96 dataset_type: SerializedDatasetType
97 """Butler dataset type or this dataset.
99 See the documentation for ``data_id`` for why this is a
100 `~lsst.daf.butler.SerializedDatasetType` instead of a "real" one.
101 """
103 data_id: dict[str, DataIdValue]
104 """Butler data ID for this dataset.
106 This is a `~lsst.daf.butler.SerializedDataCoordinate` instead of a "real"
107 one for two reasons:
109 - the mock dataset may need to be read from disk in a context in which a
110 `~lsst.daf.butler.DimensionUniverse` is unavailable;
111 - we don't want the complexity of having a separate
112 ``SerializedMockDataCoordinate``.
113 """
115 run: str | None
116 """`~lsst.daf.butler.CollectionType.RUN` collection this dataset belongs
117 to.
118 """
120 quantum: MockDatasetQuantum | None = None
121 """Description of the quantum that produced this dataset.
122 """
124 output_connection_name: str | None = None
125 """The name of the PipelineTask output connection that produced this
126 dataset.
127 """
129 converted_from: MockDataset | None = None
130 """Another `MockDataset` that underwent a storage class conversion to
131 produce this one.
132 """
134 parent: MockDataset | None = None
135 """Another `MockDataset` from which a component was extract to form this
136 one.
137 """
139 parameters: dict[str, str] | None = None
140 """`repr` of all parameters applied when reading this dataset."""
142 @property
143 def storage_class(self) -> str:
144 return cast(str, self.dataset_type.storageClass)
146 def make_derived(self, **kwargs: Any) -> MockDataset:
147 """Return a new MockDataset that represents applying some storage class
148 operation to this one.
150 Keyword arguments are fields of `MockDataset` or
151 `~lsst.daf.butler.SerializedDatasetType` to override in the result.
152 """
153 dataset_type_updates = {
154 k: kwargs.pop(k) for k in list(kwargs) if k in SerializedDatasetType.model_fields # type: ignore
155 }
156 kwargs.setdefault("dataset_type", self.dataset_type.copy(update=dataset_type_updates))
157 # Fields below are those that should not be propagated to the derived
158 # dataset, because they're not about the intrinsic on-disk thing.
159 kwargs.setdefault("converted_from", None)
160 kwargs.setdefault("parent", None)
161 kwargs.setdefault("parameters", None)
162 # Also use setdefault on the ref in case caller wants to override that
163 # directly, but this is expected to be rare enough that it's not worth
164 # it to try to optimize out the work above to make derived_ref.
165 return self.copy(update=kwargs)
168class MockDatasetQuantum(_BaseModelCompat):
169 """Description of the quantum that produced a mock dataset.
171 This is also used to represent task-init operations for init-output mock
172 datasets.
173 """
175 task_label: str
176 """Label of the producing PipelineTask in its pipeline."""
178 data_id: dict[str, DataIdValue]
179 """Data ID for the quantum."""
181 inputs: dict[str, list[MockDataset]]
182 """Mock datasets provided as input to the quantum.
184 Keys are task-internal connection names, not dataset type names.
185 """
188MockDataset.model_rebuild()
191class MockStorageClassDelegate(StorageClassDelegate):
192 """Implementation of the StorageClassDelegate interface for mock datasets.
194 This class does not implement assembly and disassembly just because it's
195 not needed right now. That could be added in the future with some
196 additional tracking attributes in `MockDataset`.
197 """
199 def assemble(self, components: dict[str, Any], pytype: type | None = None) -> MockDataset:
200 # Docstring inherited.
201 raise NotImplementedError("Mock storage classes do not implement assembly.")
203 def getComponent(self, composite: Any, componentName: str) -> Any:
204 # Docstring inherited.
205 assert isinstance(
206 composite, MockDataset
207 ), f"MockStorageClassDelegate given a non-mock dataset {composite!r}."
208 return composite.make_derived(
209 name=f"{composite.dataset_type.name}.{componentName}",
210 storageClass=self.storageClass.allComponents()[componentName].name,
211 parentStorageClass=self.storageClass.name,
212 parent=composite,
213 )
215 def disassemble(
216 self, composite: Any, subset: Iterable | None = None, override: Any | None = None
217 ) -> dict[str, DatasetComponent]:
218 # Docstring inherited.
219 raise NotImplementedError("Mock storage classes do not implement disassembly.")
221 def handleParameters(self, inMemoryDataset: Any, parameters: Mapping[str, Any] | None = None) -> Any:
222 # Docstring inherited.
223 assert isinstance(
224 inMemoryDataset, MockDataset
225 ), f"MockStorageClassDelegate given a non-mock dataset {inMemoryDataset!r}."
226 if not parameters:
227 return inMemoryDataset
228 return inMemoryDataset.make_derived(parameters={k: repr(v) for k, v in parameters.items()})
231class MockStorageClass(StorageClass):
232 """A reimplementation of `lsst.daf.butler.StorageClass` for mock datasets.
234 Each `MockStorageClass` instance corresponds to a real "original" storage
235 class, with components and conversions that are mocks of the original's
236 components and conversions. The `pytype` for all `MockStorageClass`
237 instances is `MockDataset`.
238 """
240 def __init__(self, original: StorageClass, factory: StorageClassFactory | None = None):
241 name = get_mock_name(original.name)
242 if factory is None:
243 factory = StorageClassFactory()
244 super().__init__(
245 name=name,
246 pytype=MockDataset,
247 components={
248 k: self.get_or_register_mock(v.name, factory) for k, v in original.components.items()
249 },
250 derivedComponents={
251 k: self.get_or_register_mock(v.name, factory) for k, v in original.derivedComponents.items()
252 },
253 parameters=frozenset(original.parameters),
254 delegate=get_full_type_name(MockStorageClassDelegate),
255 # Conversions work differently for mock storage classes, since they
256 # all have the same pytype: we use the original storage class being
257 # mocked to see if we can convert, then just make a new MockDataset
258 # that points back to the original.
259 converters={},
260 )
261 self.original = original
262 # Make certain no one tries to use the converters.
263 self._converters = None # type: ignore
265 def _get_converters_by_type(self) -> dict[type, Callable[[Any], Any]]:
266 # Docstring inherited.
267 raise NotImplementedError("MockStorageClass does not use converters.")
269 @classmethod
270 def get_or_register_mock(
271 cls, original: str, factory: StorageClassFactory | None = None
272 ) -> MockStorageClass:
273 """Return a mock storage class for the given original storage class,
274 creating and registering it if necessary.
276 Parameters
277 ----------
278 original : `str`
279 Name of the original storage class to be mocked.
280 factory : `~lsst.daf.butler.StorageClassFactory`, optional
281 Storage class factory singleton instance.
283 Returns
284 -------
285 mock : `MockStorageClass`
286 New storage class that mocks ``original``.
287 """
288 name = get_mock_name(original)
289 if factory is None:
290 factory = StorageClassFactory()
291 if name in factory:
292 return cast(MockStorageClass, factory.getStorageClass(name))
293 else:
294 result = cls(factory.getStorageClass(original), factory)
295 factory.registerStorageClass(result)
296 return result
298 def allComponents(self) -> Mapping[str, MockStorageClass]:
299 # Docstring inherited.
300 return cast(Mapping[str, MockStorageClass], super().allComponents())
302 @property
303 def components(self) -> Mapping[str, MockStorageClass]:
304 # Docstring inherited.
305 return cast(Mapping[str, MockStorageClass], super().components)
307 @property
308 def derivedComponents(self) -> Mapping[str, MockStorageClass]:
309 # Docstring inherited.
310 return cast(Mapping[str, MockStorageClass], super().derivedComponents)
312 def can_convert(self, other: StorageClass) -> bool:
313 # Docstring inherited.
314 if not isinstance(other, MockStorageClass):
315 return False
316 return self.original.can_convert(other.original)
318 def coerce_type(self, incorrect: Any) -> Any:
319 # Docstring inherited.
320 if not isinstance(incorrect, MockDataset):
321 raise TypeError(
322 f"Mock storage class {self.name!r} can only convert in-memory datasets "
323 f"corresponding to other mock storage classes, not {incorrect!r}."
324 )
325 factory = StorageClassFactory()
326 other_storage_class = factory.getStorageClass(incorrect.storage_class)
327 assert isinstance(other_storage_class, MockStorageClass), "Should not get a MockDataset otherwise."
328 if other_storage_class.name == self.name:
329 return incorrect
330 if not self.can_convert(other_storage_class):
331 raise TypeError(
332 f"Mocked storage class {self.original.name!r} cannot convert from "
333 f"{other_storage_class.original.name!r}."
334 )
335 return incorrect.make_derived(storageClass=self.name, converted_from=incorrect)
337 @staticmethod
338 def mock_dataset_type(original_type: DatasetType) -> DatasetType:
339 """Replace a dataset type with a version that uses a mock storage class
340 and name.
342 Parameters
343 ----------
344 original_type : `lsst.daf.butler.DatasetType`
345 Original dataset type to be mocked.
347 Returns
348 -------
349 mock_type : `lsst.daf.butler.DatasetType`
350 A mock version of the dataset type, with name and storage class
351 changed and everything else unchanged.
352 """
353 mock_storage_class = MockStorageClass.get_or_register_mock(original_type.storageClass_name)
354 mock_parent_storage_class = None
355 if original_type.parentStorageClass is not None:
356 mock_parent_storage_class = MockStorageClass.get_or_register_mock(
357 original_type.parentStorageClass.name
358 )
359 return DatasetType(
360 get_mock_name(original_type.name),
361 original_type.dimensions,
362 mock_storage_class,
363 isCalibration=original_type.isCalibration(),
364 parentStorageClass=mock_parent_storage_class,
365 )
367 @staticmethod
368 def mock_dataset_refs(original_refs: Iterable[DatasetRef]) -> list[DatasetRef]:
369 """Replace dataset references with versions that uses a mock storage
370 class and dataset type name.
372 Parameters
373 ----------
374 original_refs : `~collections.abc.Iterable` [ \
375 `lsst.daf.butler.DatasetRef` ]
376 Original dataset references to be mocked.
378 Returns
379 -------
380 mock_refs : `list` [ `lsst.daf.butler.DatasetRef` ]
381 Mocked version of the dataset references, with dataset type name
382 and storage class changed and everything else unchanged.
383 """
384 original_refs = list(original_refs)
385 if not original_refs:
386 return original_refs
387 dataset_type = MockStorageClass.mock_dataset_type(original_refs[0].datasetType)
388 return [
389 DatasetRef(dataset_type, original_ref.dataId, run=original_ref.run, id=original_ref.id)
390 for original_ref in original_refs
391 ]
393 @staticmethod
394 def unmock_dataset_type(mock_type: DatasetType) -> DatasetType:
395 """Replace a mock dataset type with the original one it was created
396 from.
398 Parameters
399 ----------
400 mock_type : `lsst.daf.butler.DatasetType`
401 A dataset type with a mocked name and storage class.
403 Returns
404 -------
405 original_type : `lsst.daf.butler.DatasetType`
406 The original dataset type.
407 """
408 storage_class = mock_type.storageClass
409 parent_storage_class = mock_type.parentStorageClass
410 if isinstance(storage_class, MockStorageClass):
411 storage_class = storage_class.original
412 if parent_storage_class is not None and isinstance(parent_storage_class, MockStorageClass):
413 parent_storage_class = parent_storage_class.original
414 return DatasetType(
415 get_original_name(mock_type.name),
416 mock_type.dimensions,
417 storage_class,
418 isCalibration=mock_type.isCalibration(),
419 parentStorageClass=parent_storage_class,
420 )
422 @staticmethod
423 def unmock_dataset_refs(mock_refs: Iterable[DatasetRef]) -> list[DatasetRef]:
424 """Replace dataset references with versions that do not use a mock
425 storage class and dataset type name.
427 Parameters
428 ----------
429 mock_refs : `~collections.abc.Iterable` [ \
430 `lsst.daf.butler.DatasetRef` ]
431 Dataset references that use a mocked dataset type name and storage
432 class.
434 Returns
435 -------
436 original_refs : `list` [ `lsst.daf.butler.DatasetRef` ]
437 The original dataset references.
438 """
439 mock_refs = list(mock_refs)
440 if not mock_refs:
441 return mock_refs
442 dataset_type = MockStorageClass.unmock_dataset_type(mock_refs[0].datasetType)
443 return [
444 DatasetRef(dataset_type, mock_ref.dataId, run=mock_ref.run, id=mock_ref.id)
445 for mock_ref in mock_refs
446 ]
449def _monkeypatch_daf_butler() -> None:
450 """Replace methods in daf_butler's StorageClassFactory and FormatterFactory
451 classes to automatically recognize mock storage classes.
453 This monkey-patching is executed when the `lsst.pipe.base.tests.mocks`
454 package is imported, and it affects all butler instances created before or
455 after that imported.
456 """
457 original_get_storage_class = StorageClassFactory.getStorageClass
459 def new_get_storage_class(self: StorageClassFactory, storageClassName: str) -> StorageClass:
460 try:
461 return original_get_storage_class(self, storageClassName)
462 except KeyError:
463 if is_mock_name(storageClassName):
464 return MockStorageClass.get_or_register_mock(get_original_name(storageClassName))
465 raise
467 StorageClassFactory.getStorageClass = new_get_storage_class # type: ignore
469 del new_get_storage_class
471 original_get_formatter_class_with_match = FormatterFactory.getFormatterClassWithMatch
473 def new_get_formatter_class_with_match(
474 self: FormatterFactory, entity: Any
475 ) -> tuple[LookupKey, type[Formatter], dict[str, Any]]:
476 try:
477 return original_get_formatter_class_with_match(self, entity)
478 except KeyError:
479 lookup_keys = (LookupKey(name=entity),) if isinstance(entity, str) else entity._lookupNames()
480 for key in lookup_keys:
481 # This matches mock dataset type names before mock storage
482 # classes, and it would even match some regular dataset types
483 # that are automatic connections (logs, configs, metadata) of
484 # mocked tasks. The latter would be a problem, except that
485 # those should have already matched in the try block above.
486 if is_mock_name(key.name):
487 return (key, JsonFormatter, {})
488 raise
490 FormatterFactory.getFormatterClassWithMatch = new_get_formatter_class_with_match # type: ignore
492 del new_get_formatter_class_with_match
494 original_get_formatter_with_match = FormatterFactory.getFormatterWithMatch
496 def new_get_formatter_with_match(
497 self: FormatterFactory, entity: Any, *args: Any, **kwargs: Any
498 ) -> tuple[LookupKey, Formatter]:
499 try:
500 return original_get_formatter_with_match(self, entity, *args, **kwargs)
501 except KeyError:
502 lookup_keys = (LookupKey(name=entity),) if isinstance(entity, str) else entity._lookupNames()
503 for key in lookup_keys:
504 if is_mock_name(key.name):
505 return (key, JsonFormatter(*args, **kwargs))
506 raise
508 FormatterFactory.getFormatterWithMatch = new_get_formatter_with_match # type: ignore
510 del new_get_formatter_with_match
513_monkeypatch_daf_butler()