Coverage for python/lsst/pipe/base/tests/mocks/_storage_class.py: 41%
144 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-12 11:14 -0700
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-12 11:14 -0700
1# This file is part of pipe_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = (
25 "MockDataset",
26 "MockStorageClass",
27 "MockDatasetQuantum",
28 "MockStorageClassDelegate",
29 "get_mock_name",
30 "get_original_name",
31 "is_mock_name",
32)
34from collections.abc import Callable, Iterable, Mapping
35from typing import Any, cast
37try:
38 import pydantic.v1 as pydantic
39except ModuleNotFoundError:
40 import pydantic # type: ignore
41from lsst.daf.butler import (
42 DatasetComponent,
43 Formatter,
44 FormatterFactory,
45 LookupKey,
46 SerializedDataCoordinate,
47 SerializedDatasetRef,
48 SerializedDatasetType,
49 StorageClass,
50 StorageClassDelegate,
51 StorageClassFactory,
52)
53from lsst.daf.butler.formatters.json import JsonFormatter
54from lsst.utils.introspection import get_full_type_name
56_NAME_PREFIX: str = "_mock_"
59def get_mock_name(original: str) -> str:
60 """Return the name of the mock storage class, dataset type, or task label
61 for the given original name.
62 """
63 return _NAME_PREFIX + original
66def get_original_name(mock: str) -> str:
67 """Return the name of the original storage class, dataset type, or task
68 label that corresponds to the given mock name.
69 """
70 assert mock.startswith(_NAME_PREFIX)
71 return mock.removeprefix(_NAME_PREFIX)
74def is_mock_name(name: str) -> bool:
75 """Return whether the given name is that of a mock storage class, dataset
76 type, or task label.
77 """
78 return name.startswith(_NAME_PREFIX)
81# Tests for this module are in the ci_middleware package, where we have easy
82# access to complex real storage classes (and their pytypes) to test against.
85class MockDataset(pydantic.BaseModel):
86 """The in-memory dataset type used by `MockStorageClass`."""
88 ref: SerializedDatasetRef
89 """Reference used to read and write this dataset.
91 This is a `~lsst.daf.butler.SerializedDatasetRef` instead of a "real" one
92 for two reasons:
94 - the mock dataset may need to be read from disk in a context in which a
95 `~lsst.daf.butler.DimensionUniverse` is unavailable;
96 - we don't want the complexity of having a separate
97 ``SerializedMockDataset``.
99 The downside of this is that we end up effectively reimplementing a few
100 fairly trivial DatasetType/DatasetRef methods that override storage classes
101 and extract components (in `MockStorageClass` and
102 `MockStorageClassDelegate`).
103 """
105 quantum: MockDatasetQuantum | None = None
106 """Description of the quantum that produced this dataset.
107 """
109 output_connection_name: str | None = None
110 """The name of the PipelineTask output connection that produced this
111 dataset.
112 """
114 converted_from: MockDataset | None = None
115 """Another `MockDataset` that underwent a storage class conversion to
116 produce this one.
117 """
119 parent: MockDataset | None = None
120 """Another `MockDataset` from which a component was extract to form this
121 one.
122 """
124 parameters: dict[str, str] | None = None
125 """`repr` of all parameters applied when reading this dataset."""
127 @property
128 def dataset_type(self) -> SerializedDatasetType:
129 return cast(SerializedDatasetType, self.ref.datasetType)
131 @property
132 def storage_class(self) -> str:
133 return cast(str, self.dataset_type.storageClass)
135 def make_derived(self, **kwargs: Any) -> MockDataset:
136 """Return a new MockDataset that represents applying some storage class
137 operation to this one.
139 Keyword arguments are fields of `MockDataset` or
140 `~lsst.daf.butler.SerializedDatasetType` to override in the result.
141 """
142 dataset_type_updates = {
143 k: kwargs.pop(k) for k in list(kwargs) if k in SerializedDatasetType.__fields__
144 }
145 derived_dataset_type = self.dataset_type.copy(update=dataset_type_updates)
146 derived_ref = self.ref.copy(update=dict(datasetType=derived_dataset_type))
147 # Fields below are those that should not be propagated to the derived
148 # dataset, because they're not about the intrinsic on-disk thing.
149 kwargs.setdefault("converted_from", None)
150 kwargs.setdefault("parent", None)
151 kwargs.setdefault("parameters", None)
152 # Also use setdefault on the ref in case caller wants to override that
153 # directly, but this is expected to be rare enough that it's not worth
154 # it to try to optimize out the work above to make derived_ref.
155 kwargs.setdefault("ref", derived_ref)
156 return self.copy(update=kwargs)
159class MockDatasetQuantum(pydantic.BaseModel):
160 """Description of the quantum that produced a mock dataset."""
162 task_label: str
163 """Label of the producing PipelineTask in its pipeline."""
165 data_id: SerializedDataCoordinate
166 """Data ID for the quantum."""
168 inputs: dict[str, list[MockDataset]]
169 """Mock datasets provided as input to the quantum."""
172MockDataset.update_forward_refs()
175class MockStorageClassDelegate(StorageClassDelegate):
176 """Implementation of the StorageClassDelegate interface for mock datasets.
178 This class does not implement assembly and disassembly just because it's
179 not needed right now. That could be added in the future with some
180 additional tracking attributes in `MockDataset`.
181 """
183 def assemble(self, components: dict[str, Any], pytype: type | None = None) -> MockDataset:
184 # Docstring inherited.
185 raise NotImplementedError("Mock storage classes do not implement assembly.")
187 def getComponent(self, composite: Any, componentName: str) -> Any:
188 # Docstring inherited.
189 assert isinstance(
190 composite, MockDataset
191 ), f"MockStorageClassDelegate given a non-mock dataset {composite!r}."
192 return composite.make_derived(
193 name=f"{composite.dataset_type.name}.{componentName}",
194 storageClass=self.storageClass.allComponents()[componentName].name,
195 parentStorageClass=self.storageClass.name,
196 parent=composite,
197 )
199 def disassemble(
200 self, composite: Any, subset: Iterable | None = None, override: Any | None = None
201 ) -> dict[str, DatasetComponent]:
202 # Docstring inherited.
203 raise NotImplementedError("Mock storage classes do not implement disassembly.")
205 def handleParameters(self, inMemoryDataset: Any, parameters: Mapping[str, Any] | None = None) -> Any:
206 # Docstring inherited.
207 assert isinstance(
208 inMemoryDataset, MockDataset
209 ), f"MockStorageClassDelegate given a non-mock dataset {inMemoryDataset!r}."
210 if not parameters:
211 return inMemoryDataset
212 return inMemoryDataset.make_derived(parameters={k: repr(v) for k, v in parameters.items()})
215class MockStorageClass(StorageClass):
216 """A reimplementation of `lsst.daf.butler.StorageClass` for mock datasets.
218 Each `MockStorageClass` instance corresponds to a real "original" storage
219 class, with components and conversions that are mocks of the original's
220 components and conversions. The `pytype` for all `MockStorageClass`
221 instances is `MockDataset`.
222 """
224 def __init__(self, original: StorageClass, factory: StorageClassFactory | None = None):
225 name = get_mock_name(original.name)
226 if factory is None:
227 factory = StorageClassFactory()
228 super().__init__(
229 name=name,
230 pytype=MockDataset,
231 components={
232 k: self.get_or_register_mock(v.name, factory) for k, v in original.components.items()
233 },
234 derivedComponents={
235 k: self.get_or_register_mock(v.name, factory) for k, v in original.derivedComponents.items()
236 },
237 parameters=frozenset(original.parameters),
238 delegate=get_full_type_name(MockStorageClassDelegate),
239 # Conversions work differently for mock storage classes, since they
240 # all have the same pytype: we use the original storage class being
241 # mocked to see if we can convert, then just make a new MockDataset
242 # that points back to the original.
243 converters={},
244 )
245 self.original = original
246 # Make certain no one tries to use the converters.
247 self._converters = None # type: ignore
249 def _get_converters_by_type(self) -> dict[type, Callable[[Any], Any]]:
250 # Docstring inherited.
251 raise NotImplementedError("MockStorageClass does not use converters.")
253 @classmethod
254 def get_or_register_mock(
255 cls, original: str, factory: StorageClassFactory | None = None
256 ) -> MockStorageClass:
257 """Return a mock storage class for the given original storage class,
258 creating and registering it if necessary.
260 Parameters
261 ----------
262 original : `str`
263 Name of the original storage class to be mocked.
264 factory : `~lsst.daf.butler.StorageClassFactory`, optional
265 Storage class factory singleton instance.
267 Returns
268 -------
269 mock : `MockStorageClass`
270 New storage class that mocks ``original``.
271 """
272 name = get_mock_name(original)
273 if factory is None:
274 factory = StorageClassFactory()
275 if name in factory:
276 return cast(MockStorageClass, factory.getStorageClass(name))
277 else:
278 result = cls(factory.getStorageClass(original), factory)
279 factory.registerStorageClass(result)
280 return result
282 def allComponents(self) -> Mapping[str, MockStorageClass]:
283 # Docstring inherited.
284 return cast(Mapping[str, MockStorageClass], super().allComponents())
286 @property
287 def components(self) -> Mapping[str, MockStorageClass]:
288 # Docstring inherited.
289 return cast(Mapping[str, MockStorageClass], super().components)
291 @property
292 def derivedComponents(self) -> Mapping[str, MockStorageClass]:
293 # Docstring inherited.
294 return cast(Mapping[str, MockStorageClass], super().derivedComponents)
296 def can_convert(self, other: StorageClass) -> bool:
297 # Docstring inherited.
298 if not isinstance(other, MockStorageClass):
299 return False
300 return self.original.can_convert(other.original)
302 def coerce_type(self, incorrect: Any) -> Any:
303 # Docstring inherited.
304 if not isinstance(incorrect, MockDataset):
305 raise TypeError(
306 f"Mock storage class {self.name!r} can only convert in-memory datasets "
307 f"corresponding to other mock storage classes, not {incorrect!r}."
308 )
309 factory = StorageClassFactory()
310 other_storage_class = factory.getStorageClass(incorrect.storage_class)
311 assert isinstance(other_storage_class, MockStorageClass), "Should not get a MockDataset otherwise."
312 if other_storage_class.name == self.name:
313 return incorrect
314 if not self.can_convert(other_storage_class):
315 raise TypeError(
316 f"Mocked storage class {self.original.name!r} cannot convert from "
317 f"{other_storage_class.original.name!r}."
318 )
319 return incorrect.make_derived(storageClass=self.name, converted_from=incorrect)
322def _monkeypatch_daf_butler() -> None:
323 """Replace methods in daf_butler's StorageClassFactory and FormatterFactory
324 classes to automatically recognize mock storage classes.
326 This monkey-patching is executed when the `lsst.pipe.base.tests.mocks`
327 package is imported, and it affects all butler instances created before or
328 after that imported.
329 """
330 original_get_storage_class = StorageClassFactory.getStorageClass
332 def new_get_storage_class(self: StorageClassFactory, storageClassName: str) -> StorageClass:
333 try:
334 return original_get_storage_class(self, storageClassName)
335 except KeyError:
336 if is_mock_name(storageClassName):
337 return MockStorageClass.get_or_register_mock(get_original_name(storageClassName))
338 raise
340 StorageClassFactory.getStorageClass = new_get_storage_class # type: ignore
342 del new_get_storage_class
344 original_get_formatter_class_with_match = FormatterFactory.getFormatterClassWithMatch
346 def new_get_formatter_class_with_match(
347 self: FormatterFactory, entity: Any
348 ) -> tuple[LookupKey, type[Formatter], dict[str, Any]]:
349 try:
350 return original_get_formatter_class_with_match(self, entity)
351 except KeyError:
352 lookup_keys = (LookupKey(name=entity),) if isinstance(entity, str) else entity._lookupNames()
353 for key in lookup_keys:
354 # This matches mock dataset type names before mock storage
355 # classes, and it would even match some regular dataset types
356 # that are automatic connections (logs, configs, metadata) of
357 # mocked tasks. The latter would be a problem, except that
358 # those should have already matched in the try block above.
359 if is_mock_name(key.name):
360 return (key, JsonFormatter, {})
361 raise
363 FormatterFactory.getFormatterClassWithMatch = new_get_formatter_class_with_match # type: ignore
365 del new_get_formatter_class_with_match
367 original_get_formatter_with_match = FormatterFactory.getFormatterWithMatch
369 def new_get_formatter_with_match(
370 self: FormatterFactory, entity: Any, *args: Any, **kwargs: Any
371 ) -> tuple[LookupKey, Formatter]:
372 try:
373 return original_get_formatter_with_match(self, entity, *args, **kwargs)
374 except KeyError:
375 lookup_keys = (LookupKey(name=entity),) if isinstance(entity, str) else entity._lookupNames()
376 for key in lookup_keys:
377 if is_mock_name(key.name):
378 return (key, JsonFormatter(*args, **kwargs))
379 raise
381 FormatterFactory.getFormatterWithMatch = new_get_formatter_with_match # type: ignore
383 del new_get_formatter_with_match
386_monkeypatch_daf_butler()