Coverage for python/lsst/pipe/base/tests/mocks/_storage_class.py: 43%
171 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-11 09:32 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-11 09:32 +0000
1# This file is part of pipe_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30__all__ = (
31 "MockDataset",
32 "MockStorageClass",
33 "MockDatasetQuantum",
34 "MockStorageClassDelegate",
35 "get_mock_name",
36 "get_original_name",
37 "is_mock_name",
38)
40import uuid
41from collections.abc import Callable, Iterable, Mapping
42from typing import Any, cast
44from lsst.daf.butler import (
45 DataIdValue,
46 DatasetComponent,
47 DatasetRef,
48 DatasetType,
49 Formatter,
50 FormatterFactory,
51 LookupKey,
52 SerializedDatasetType,
53 StorageClass,
54 StorageClassDelegate,
55 StorageClassFactory,
56)
57from lsst.daf.butler._compat import _BaseModelCompat
58from lsst.daf.butler.formatters.json import JsonFormatter
59from lsst.utils.introspection import get_full_type_name
61_NAME_PREFIX: str = "_mock_"
64def get_mock_name(original: str) -> str:
65 """Return the name of the mock storage class, dataset type, or task label
66 for the given original name.
67 """
68 return _NAME_PREFIX + original
71def get_original_name(mock: str) -> str:
72 """Return the name of the original storage class, dataset type, or task
73 label that corresponds to the given mock name.
74 """
75 assert mock.startswith(_NAME_PREFIX)
76 return mock.removeprefix(_NAME_PREFIX)
79def is_mock_name(name: str) -> bool:
80 """Return whether the given name is that of a mock storage class, dataset
81 type, or task label.
82 """
83 return name.startswith(_NAME_PREFIX)
86# Tests for this module are in the ci_middleware package, where we have easy
87# access to complex real storage classes (and their pytypes) to test against.
90class MockDataset(_BaseModelCompat):
91 """The in-memory dataset type used by `MockStorageClass`."""
93 dataset_id: uuid.UUID | None
94 """Universal unique identifier for this dataset."""
96 dataset_type: SerializedDatasetType
97 """Butler dataset type or this dataset.
99 See the documentation for ``data_id`` for why this is a
100 `~lsst.daf.butler.SerializedDatasetType` instead of a "real" one.
101 """
103 data_id: dict[str, DataIdValue]
104 """Butler data ID for this dataset.
106 This is a `~lsst.daf.butler.SerializedDataCoordinate` instead of a "real"
107 one for two reasons:
109 - the mock dataset may need to be read from disk in a context in which a
110 `~lsst.daf.butler.DimensionUniverse` is unavailable;
111 - we don't want the complexity of having a separate
112 ``SerializedMockDataCoordinate``.
113 """
115 run: str | None
116 """`~lsst.daf.butler.CollectionType.RUN` collection this dataset belongs
117 to.
118 """
120 quantum: MockDatasetQuantum | None = None
121 """Description of the quantum that produced this dataset.
122 """
124 output_connection_name: str | None = None
125 """The name of the PipelineTask output connection that produced this
126 dataset.
127 """
129 converted_from: MockDataset | None = None
130 """Another `MockDataset` that underwent a storage class conversion to
131 produce this one.
132 """
134 parent: MockDataset | None = None
135 """Another `MockDataset` from which a component was extract to form this
136 one.
137 """
139 parameters: dict[str, str] | None = None
140 """`repr` of all parameters applied when reading this dataset."""
142 @property
143 def storage_class(self) -> str:
144 return cast(str, self.dataset_type.storageClass)
146 def make_derived(self, **kwargs: Any) -> MockDataset:
147 """Return a new MockDataset that represents applying some storage class
148 operation to this one.
150 Keyword arguments are fields of `MockDataset` or
151 `~lsst.daf.butler.SerializedDatasetType` to override in the result.
152 """
153 dataset_type_updates = {
154 k: kwargs.pop(k) for k in list(kwargs) if k in SerializedDatasetType.model_fields # type: ignore
155 }
156 kwargs.setdefault("dataset_type", self.dataset_type.copy(update=dataset_type_updates))
157 # Fields below are those that should not be propagated to the derived
158 # dataset, because they're not about the intrinsic on-disk thing.
159 kwargs.setdefault("converted_from", None)
160 kwargs.setdefault("parent", None)
161 kwargs.setdefault("parameters", None)
162 # Also use setdefault on the ref in case caller wants to override that
163 # directly, but this is expected to be rare enough that it's not worth
164 # it to try to optimize out the work above to make derived_ref.
165 return self.copy(update=kwargs)
168class MockDatasetQuantum(_BaseModelCompat):
169 """Description of the quantum that produced a mock dataset.
171 This is also used to represent task-init operations for init-output mock
172 datasets.
173 """
175 task_label: str
176 """Label of the producing PipelineTask in its pipeline."""
178 data_id: dict[str, DataIdValue]
179 """Data ID for the quantum."""
181 inputs: dict[str, list[MockDataset]]
182 """Mock datasets provided as input to the quantum.
184 Keys are task-internal connection names, not dataset type names.
185 """
188MockDataset.model_rebuild()
191class MockStorageClassDelegate(StorageClassDelegate):
192 """Implementation of the StorageClassDelegate interface for mock datasets.
194 This class does not implement assembly and disassembly just because it's
195 not needed right now. That could be added in the future with some
196 additional tracking attributes in `MockDataset`.
197 """
199 def assemble(self, components: dict[str, Any], pytype: type | None = None) -> MockDataset:
200 # Docstring inherited.
201 raise NotImplementedError("Mock storage classes do not implement assembly.")
203 def getComponent(self, composite: Any, componentName: str) -> Any:
204 # Docstring inherited.
205 assert isinstance(
206 composite, MockDataset
207 ), f"MockStorageClassDelegate given a non-mock dataset {composite!r}."
208 return composite.make_derived(
209 name=f"{composite.dataset_type.name}.{componentName}",
210 storageClass=self.storageClass.allComponents()[componentName].name,
211 parentStorageClass=self.storageClass.name,
212 parent=composite,
213 )
215 def disassemble(
216 self, composite: Any, subset: Iterable | None = None, override: Any | None = None
217 ) -> dict[str, DatasetComponent]:
218 # Docstring inherited.
219 raise NotImplementedError("Mock storage classes do not implement disassembly.")
221 def handleParameters(self, inMemoryDataset: Any, parameters: Mapping[str, Any] | None = None) -> Any:
222 # Docstring inherited.
223 assert isinstance(
224 inMemoryDataset, MockDataset
225 ), f"MockStorageClassDelegate given a non-mock dataset {inMemoryDataset!r}."
226 if not parameters:
227 return inMemoryDataset
228 return inMemoryDataset.make_derived(parameters={k: repr(v) for k, v in parameters.items()})
231class MockStorageClass(StorageClass):
232 """A reimplementation of `lsst.daf.butler.StorageClass` for mock datasets.
234 Each `MockStorageClass` instance corresponds to a real "original" storage
235 class, with components and conversions that are mocks of the original's
236 components and conversions. The `pytype` for all `MockStorageClass`
237 instances is `MockDataset`.
238 """
240 def __init__(self, original: StorageClass, factory: StorageClassFactory | None = None):
241 name = get_mock_name(original.name)
242 if factory is None:
243 factory = StorageClassFactory()
244 super().__init__(
245 name=name,
246 pytype=MockDataset,
247 components={
248 k: self.get_or_register_mock(v.name, factory) for k, v in original.components.items()
249 },
250 derivedComponents={
251 k: self.get_or_register_mock(v.name, factory) for k, v in original.derivedComponents.items()
252 },
253 parameters=frozenset(original.parameters),
254 delegate=get_full_type_name(MockStorageClassDelegate),
255 # Conversions work differently for mock storage classes, since they
256 # all have the same pytype: we use the original storage class being
257 # mocked to see if we can convert, then just make a new MockDataset
258 # that points back to the original.
259 converters={},
260 )
261 self.original = original
262 # Make certain no one tries to use the converters.
263 self._converters = None # type: ignore
265 def _get_converters_by_type(self) -> dict[type, Callable[[Any], Any]]:
266 # Docstring inherited.
267 raise NotImplementedError("MockStorageClass does not use converters.")
269 @classmethod
270 def get_or_register_mock(
271 cls, original: str, factory: StorageClassFactory | None = None
272 ) -> MockStorageClass:
273 """Return a mock storage class for the given original storage class,
274 creating and registering it if necessary.
276 Parameters
277 ----------
278 original : `str`
279 Name of the original storage class to be mocked.
280 factory : `~lsst.daf.butler.StorageClassFactory`, optional
281 Storage class factory singleton instance.
283 Returns
284 -------
285 mock : `MockStorageClass`
286 New storage class that mocks ``original``.
287 """
288 name = get_mock_name(original)
289 if factory is None:
290 factory = StorageClassFactory()
291 if name in factory:
292 return cast(MockStorageClass, factory.getStorageClass(name))
293 else:
294 result = cls(factory.getStorageClass(original), factory)
295 factory.registerStorageClass(result)
296 return result
298 def allComponents(self) -> Mapping[str, MockStorageClass]:
299 # Docstring inherited.
300 return cast(Mapping[str, MockStorageClass], super().allComponents())
302 @property
303 def components(self) -> Mapping[str, MockStorageClass]:
304 # Docstring inherited.
305 return cast(Mapping[str, MockStorageClass], super().components)
307 @property
308 def derivedComponents(self) -> Mapping[str, MockStorageClass]:
309 # Docstring inherited.
310 return cast(Mapping[str, MockStorageClass], super().derivedComponents)
312 def can_convert(self, other: StorageClass) -> bool:
313 # Docstring inherited.
314 if not isinstance(other, MockStorageClass):
315 return False
316 return self.original.can_convert(other.original)
318 def coerce_type(self, incorrect: Any) -> Any:
319 # Docstring inherited.
320 if not isinstance(incorrect, MockDataset):
321 raise TypeError(
322 f"Mock storage class {self.name!r} can only convert in-memory datasets "
323 f"corresponding to other mock storage classes, not {incorrect!r}."
324 )
325 factory = StorageClassFactory()
326 other_storage_class = factory.getStorageClass(incorrect.storage_class)
327 assert isinstance(other_storage_class, MockStorageClass), "Should not get a MockDataset otherwise."
328 if other_storage_class.name == self.name:
329 return incorrect
330 if not self.can_convert(other_storage_class):
331 raise TypeError(
332 f"Mocked storage class {self.original.name!r} cannot convert from "
333 f"{other_storage_class.original.name!r}."
334 )
335 return incorrect.make_derived(storageClass=self.name, converted_from=incorrect)
337 @staticmethod
338 def mock_dataset_type(original_type: DatasetType) -> DatasetType:
339 """Replace a dataset type with a version that uses a mock storage class
340 and name.
342 Parameters
343 ----------
344 original_type : `lsst.daf.butler.DatasetType`
345 Original dataset type to be mocked.
347 Returns
348 -------
349 mock_type : `lsst.daf.butler.DatasetType`
350 A mock version of the dataset type, with name and storage class
351 changed and everything else unchanged.
352 """
353 mock_storage_class = MockStorageClass.get_or_register_mock(original_type.storageClass_name)
354 mock_parent_storage_class = None
355 if original_type.parentStorageClass is not None:
356 mock_parent_storage_class = MockStorageClass.get_or_register_mock(
357 original_type.parentStorageClass.name
358 )
359 return DatasetType(
360 get_mock_name(original_type.name),
361 original_type.dimensions,
362 mock_storage_class,
363 isCalibration=original_type.isCalibration(),
364 parentStorageClass=mock_parent_storage_class,
365 )
367 @staticmethod
368 def mock_dataset_refs(original_refs: Iterable[DatasetRef]) -> list[DatasetRef]:
369 """Replace dataset references with versions that uses a mock storage
370 class and dataset type name.
372 Parameters
373 ----------
374 original_refs : `~collections.abc.Iterable` [ \
375 `lsst.daf.butler.DatasetRef` ]
376 Original dataset references to be mocked.
378 Returns
379 -------
380 mock_refs : `list` [ `lsst.daf.butler.DatasetRef` ]
381 Mocked version of the dataset references, with dataset type name
382 and storage class changed and everything else unchanged.
383 """
384 original_refs = list(original_refs)
385 if not original_refs:
386 return original_refs
387 dataset_type = MockStorageClass.mock_dataset_type(original_refs[0].datasetType)
388 return [
389 DatasetRef(dataset_type, original_ref.dataId, run=original_ref.run, id=original_ref.id)
390 for original_ref in original_refs
391 ]
393 @staticmethod
394 def unmock_dataset_type(mock_type: DatasetType) -> DatasetType:
395 """Replace a mock dataset type with the original one it was created
396 from.
398 Parameters
399 ----------
400 mock_type : `lsst.daf.butler.DatasetType`
401 A dataset type with a mocked name and storage class.
403 Returns
404 -------
405 original_type : `lsst.daf.butler.DatasetType`
406 The original dataset type.
407 """
408 mock_storage_class = cast(MockStorageClass, mock_type.storageClass)
409 original_parent_storage_class = None
410 if mock_type.parentStorageClass is not None:
411 original_parent_storage_class = cast(MockStorageClass, mock_type.parentStorageClass).original
412 return DatasetType(
413 get_original_name(mock_type.name),
414 mock_type.dimensions,
415 mock_storage_class.original,
416 isCalibration=mock_type.isCalibration(),
417 parentStorageClass=original_parent_storage_class,
418 )
420 @staticmethod
421 def unmock_dataset_refs(mock_refs: Iterable[DatasetRef]) -> list[DatasetRef]:
422 """Replace dataset references with versions that do not use a mock
423 storage class and dataset type name.
425 Parameters
426 ----------
427 mock_refs : `~collections.abc.Iterable` [ \
428 `lsst.daf.butler.DatasetRef` ]
429 Dataset references that use a mocked dataset type name and storage
430 class.
432 Returns
433 -------
434 original_refs : `list` [ `lsst.daf.butler.DatasetRef` ]
435 The original dataset references.
436 """
437 mock_refs = list(mock_refs)
438 if not mock_refs:
439 return mock_refs
440 dataset_type = MockStorageClass.unmock_dataset_type(mock_refs[0].datasetType)
441 return [
442 DatasetRef(dataset_type, mock_ref.dataId, run=mock_ref.run, id=mock_ref.id)
443 for mock_ref in mock_refs
444 ]
447def _monkeypatch_daf_butler() -> None:
448 """Replace methods in daf_butler's StorageClassFactory and FormatterFactory
449 classes to automatically recognize mock storage classes.
451 This monkey-patching is executed when the `lsst.pipe.base.tests.mocks`
452 package is imported, and it affects all butler instances created before or
453 after that imported.
454 """
455 original_get_storage_class = StorageClassFactory.getStorageClass
457 def new_get_storage_class(self: StorageClassFactory, storageClassName: str) -> StorageClass:
458 try:
459 return original_get_storage_class(self, storageClassName)
460 except KeyError:
461 if is_mock_name(storageClassName):
462 return MockStorageClass.get_or_register_mock(get_original_name(storageClassName))
463 raise
465 StorageClassFactory.getStorageClass = new_get_storage_class # type: ignore
467 del new_get_storage_class
469 original_get_formatter_class_with_match = FormatterFactory.getFormatterClassWithMatch
471 def new_get_formatter_class_with_match(
472 self: FormatterFactory, entity: Any
473 ) -> tuple[LookupKey, type[Formatter], dict[str, Any]]:
474 try:
475 return original_get_formatter_class_with_match(self, entity)
476 except KeyError:
477 lookup_keys = (LookupKey(name=entity),) if isinstance(entity, str) else entity._lookupNames()
478 for key in lookup_keys:
479 # This matches mock dataset type names before mock storage
480 # classes, and it would even match some regular dataset types
481 # that are automatic connections (logs, configs, metadata) of
482 # mocked tasks. The latter would be a problem, except that
483 # those should have already matched in the try block above.
484 if is_mock_name(key.name):
485 return (key, JsonFormatter, {})
486 raise
488 FormatterFactory.getFormatterClassWithMatch = new_get_formatter_class_with_match # type: ignore
490 del new_get_formatter_class_with_match
492 original_get_formatter_with_match = FormatterFactory.getFormatterWithMatch
494 def new_get_formatter_with_match(
495 self: FormatterFactory, entity: Any, *args: Any, **kwargs: Any
496 ) -> tuple[LookupKey, Formatter]:
497 try:
498 return original_get_formatter_with_match(self, entity, *args, **kwargs)
499 except KeyError:
500 lookup_keys = (LookupKey(name=entity),) if isinstance(entity, str) else entity._lookupNames()
501 for key in lookup_keys:
502 if is_mock_name(key.name):
503 return (key, JsonFormatter(*args, **kwargs))
504 raise
506 FormatterFactory.getFormatterWithMatch = new_get_formatter_with_match # type: ignore
508 del new_get_formatter_with_match
511_monkeypatch_daf_butler()