Coverage for python/lsst/pipe/base/tests/mocks/_storage_class.py: 40%
194 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-04 10:03 +0000
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-04 10:03 +0000
1# This file is part of pipe_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30__all__ = (
31 "MockDataset",
32 "MockStorageClass",
33 "MockDatasetQuantum",
34 "MockStorageClassDelegate",
35 "get_mock_name",
36 "get_original_name",
37 "is_mock_name",
38)
40import sys
41import uuid
42from collections.abc import Callable, Iterable, Mapping
43from typing import Any, cast
45import pydantic
46from lsst.daf.butler import (
47 DataIdValue,
48 DatasetComponent,
49 DatasetRef,
50 DatasetType,
51 Formatter,
52 FormatterFactory,
53 LookupKey,
54 SerializedDatasetType,
55 StorageClass,
56 StorageClassDelegate,
57 StorageClassFactory,
58)
59from lsst.daf.butler.formatters.json import JsonFormatter
60from lsst.utils.introspection import get_full_type_name
62_NAME_PREFIX: str = "_mock_"
65def get_mock_name(original: str) -> str:
66 """Return the name of the mock storage class, dataset type, or task label
67 for the given original name.
69 Parameters
70 ----------
71 original : `str`
72 Original name.
74 Returns
75 -------
76 name : `str`
77 The name of the mocked version.
78 """
79 return _NAME_PREFIX + original
82def get_original_name(mock: str) -> str:
83 """Return the name of the original storage class, dataset type, or task
84 label that corresponds to the given mock name.
86 Parameters
87 ----------
88 mock : `str`
89 The mocked name.
91 Returns
92 -------
93 original : `str`
94 The original name.
95 """
96 assert mock.startswith(_NAME_PREFIX)
97 return mock.removeprefix(_NAME_PREFIX)
100def is_mock_name(name: str) -> bool:
101 """Return whether the given name is that of a mock storage class, dataset
102 type, or task label.
104 Parameters
105 ----------
106 name : `str`
107 The given name to check.
109 Returns
110 -------
111 is_mock : `bool`
112 Whether the name is for a mock or not.
113 """
114 return name.startswith(_NAME_PREFIX)
117# Tests for this module are in the ci_middleware package, where we have easy
118# access to complex real storage classes (and their pytypes) to test against.
121class MockDataset(pydantic.BaseModel):
122 """The in-memory dataset type used by `MockStorageClass`."""
124 dataset_id: uuid.UUID | None
125 """Universal unique identifier for this dataset."""
127 dataset_type: SerializedDatasetType
128 """Butler dataset type or this dataset.
130 See the documentation for ``data_id`` for why this is a
131 `~lsst.daf.butler.SerializedDatasetType` instead of a "real" one.
132 """
134 data_id: dict[str, DataIdValue]
135 """Butler data ID for this dataset.
137 This is a `~lsst.daf.butler.SerializedDataCoordinate` instead of a "real"
138 one for two reasons:
140 - the mock dataset may need to be read from disk in a context in which a
141 `~lsst.daf.butler.DimensionUniverse` is unavailable;
142 - we don't want the complexity of having a separate
143 ``SerializedMockDataCoordinate``.
144 """
146 run: str | None
147 """`~lsst.daf.butler.CollectionType.RUN` collection this dataset belongs
148 to.
149 """
151 quantum: MockDatasetQuantum | None = None
152 """Description of the quantum that produced this dataset.
153 """
155 output_connection_name: str | None = None
156 """The name of the PipelineTask output connection that produced this
157 dataset.
158 """
160 converted_from: MockDataset | None = None
161 """Another `MockDataset` that underwent a storage class conversion to
162 produce this one.
163 """
165 parent: MockDataset | None = None
166 """Another `MockDataset` from which a component was extract to form this
167 one.
168 """
170 parameters: dict[str, str] | None = None
171 """`repr` of all parameters applied when reading this dataset."""
173 @property
174 def storage_class(self) -> str:
175 return cast(str, self.dataset_type.storageClass)
177 def make_derived(self, **kwargs: Any) -> MockDataset:
178 """Return a new MockDataset that represents applying some storage class
179 operation to this one.
181 Parameters
182 ----------
183 **kwargs : `~typing.Any`
184 Keyword arguments are fields of `MockDataset` or
185 `~lsst.daf.butler.SerializedDatasetType` to override in the result.
187 Returns
188 -------
189 derived : `MockDataset`
190 The newly-mocked dataset.
191 """
192 dataset_type_updates = {
193 k: kwargs.pop(k) for k in list(kwargs) if k in SerializedDatasetType.model_fields
194 }
195 kwargs.setdefault("dataset_type", self.dataset_type.model_copy(update=dataset_type_updates))
196 # Fields below are those that should not be propagated to the derived
197 # dataset, because they're not about the intrinsic on-disk thing.
198 kwargs.setdefault("converted_from", None)
199 kwargs.setdefault("parent", None)
200 kwargs.setdefault("parameters", None)
201 # Also use setdefault on the ref in case caller wants to override that
202 # directly, but this is expected to be rare enough that it's not worth
203 # it to try to optimize out the work above to make derived_ref.
204 return self.model_copy(update=kwargs)
206 # Work around the fact that Sphinx chokes on Pydantic docstring formatting,
207 # when we inherit those docstrings in our public classes.
208 if "sphinx" in sys.modules: 208 ↛ 210line 208 didn't jump to line 210, because the condition on line 208 was never true
210 def copy(self, *args: Any, **kwargs: Any) -> Any:
211 """See `pydantic.BaseModel.copy`."""
212 return super().copy(*args, **kwargs)
214 def model_dump(self, *args: Any, **kwargs: Any) -> Any:
215 """See `pydantic.BaseModel.model_dump`."""
216 return super().model_dump(*args, **kwargs)
218 def model_copy(self, *args: Any, **kwargs: Any) -> Any:
219 """See `pydantic.BaseModel.model_copy`."""
220 return super().model_copy(*args, **kwargs)
222 @classmethod
223 def model_json_schema(cls, *args: Any, **kwargs: Any) -> Any:
224 """See `pydantic.BaseModel.model_json_schema`."""
225 return super().model_json_schema(*args, **kwargs)
228class MockDatasetQuantum(pydantic.BaseModel):
229 """Description of the quantum that produced a mock dataset.
231 This is also used to represent task-init operations for init-output mock
232 datasets.
233 """
235 task_label: str
236 """Label of the producing PipelineTask in its pipeline."""
238 data_id: dict[str, DataIdValue]
239 """Data ID for the quantum."""
241 inputs: dict[str, list[MockDataset]]
242 """Mock datasets provided as input to the quantum.
244 Keys are task-internal connection names, not dataset type names.
245 """
247 # Work around the fact that Sphinx chokes on Pydantic docstring formatting,
248 # when we inherit those docstrings in our public classes.
249 if "sphinx" in sys.modules: 249 ↛ 251line 249 didn't jump to line 251, because the condition on line 249 was never true
251 def copy(self, *args: Any, **kwargs: Any) -> Any:
252 """See `pydantic.BaseModel.copy`."""
253 return super().copy(*args, **kwargs)
255 def model_dump(self, *args: Any, **kwargs: Any) -> Any:
256 """See `pydantic.BaseModel.model_dump`."""
257 return super().model_dump(*args, **kwargs)
259 def model_copy(self, *args: Any, **kwargs: Any) -> Any:
260 """See `pydantic.BaseModel.model_copy`."""
261 return super().model_copy(*args, **kwargs)
263 @classmethod
264 def model_json_schema(cls, *args: Any, **kwargs: Any) -> Any:
265 """See `pydantic.BaseModel.model_json_schema`."""
266 return super().model_json_schema(*args, **kwargs)
269MockDataset.model_rebuild()
272class MockStorageClassDelegate(StorageClassDelegate):
273 """Implementation of the StorageClassDelegate interface for mock datasets.
275 This class does not implement assembly and disassembly just because it's
276 not needed right now. That could be added in the future with some
277 additional tracking attributes in `MockDataset`.
278 """
280 def assemble(self, components: dict[str, Any], pytype: type | None = None) -> MockDataset:
281 # Docstring inherited.
282 raise NotImplementedError("Mock storage classes do not implement assembly.")
284 def getComponent(self, composite: Any, componentName: str) -> Any:
285 # Docstring inherited.
286 assert isinstance(
287 composite, MockDataset
288 ), f"MockStorageClassDelegate given a non-mock dataset {composite!r}."
289 return composite.make_derived(
290 name=f"{composite.dataset_type.name}.{componentName}",
291 storageClass=self.storageClass.allComponents()[componentName].name,
292 parentStorageClass=self.storageClass.name,
293 parent=composite,
294 )
296 def disassemble(
297 self, composite: Any, subset: Iterable | None = None, override: Any | None = None
298 ) -> dict[str, DatasetComponent]:
299 # Docstring inherited.
300 raise NotImplementedError("Mock storage classes do not implement disassembly.")
302 def handleParameters(self, inMemoryDataset: Any, parameters: Mapping[str, Any] | None = None) -> Any:
303 # Docstring inherited.
304 assert isinstance(
305 inMemoryDataset, MockDataset
306 ), f"MockStorageClassDelegate given a non-mock dataset {inMemoryDataset!r}."
307 if not parameters:
308 return inMemoryDataset
309 return inMemoryDataset.make_derived(parameters={k: repr(v) for k, v in parameters.items()})
312class MockStorageClass(StorageClass):
313 """A reimplementation of `lsst.daf.butler.StorageClass` for mock datasets.
315 Parameters
316 ----------
317 original : `~lsst.daf.butler.StorageClass`
318 The original storage class.
319 factory : `~lsst.daf.butler.StorageClassFactory` or `None`, optional
320 Storage class factory to use. If `None` the default factory is used.
322 Notes
323 -----
324 Each `MockStorageClass` instance corresponds to a real "original" storage
325 class, with components and conversions that are mocks of the original's
326 components and conversions. The ``pytype`` for all `MockStorageClass`
327 instances is `MockDataset`.
328 """
330 def __init__(self, original: StorageClass, factory: StorageClassFactory | None = None):
331 name = get_mock_name(original.name)
332 if factory is None:
333 factory = StorageClassFactory()
334 super().__init__(
335 name=name,
336 pytype=MockDataset,
337 components={
338 k: self.get_or_register_mock(v.name, factory) for k, v in original.components.items()
339 },
340 derivedComponents={
341 k: self.get_or_register_mock(v.name, factory) for k, v in original.derivedComponents.items()
342 },
343 parameters=frozenset(original.parameters),
344 delegate=get_full_type_name(MockStorageClassDelegate),
345 # Conversions work differently for mock storage classes, since they
346 # all have the same pytype: we use the original storage class being
347 # mocked to see if we can convert, then just make a new MockDataset
348 # that points back to the original.
349 converters={},
350 )
351 self.original = original
352 # Make certain no one tries to use the converters.
353 self._converters = None # type: ignore
355 def _get_converters_by_type(self) -> dict[type, Callable[[Any], Any]]:
356 # Docstring inherited.
357 raise NotImplementedError("MockStorageClass does not use converters.")
359 @classmethod
360 def get_or_register_mock(
361 cls, original: str, factory: StorageClassFactory | None = None
362 ) -> MockStorageClass:
363 """Return a mock storage class for the given original storage class,
364 creating and registering it if necessary.
366 Parameters
367 ----------
368 original : `str`
369 Name of the original storage class to be mocked.
370 factory : `~lsst.daf.butler.StorageClassFactory`, optional
371 Storage class factory singleton instance.
373 Returns
374 -------
375 mock : `MockStorageClass`
376 New storage class that mocks ``original``.
377 """
378 name = get_mock_name(original)
379 if factory is None:
380 factory = StorageClassFactory()
381 if name in factory:
382 return cast(MockStorageClass, factory.getStorageClass(name))
383 else:
384 result = cls(factory.getStorageClass(original), factory)
385 factory.registerStorageClass(result)
386 return result
388 def allComponents(self) -> Mapping[str, MockStorageClass]:
389 # Docstring inherited.
390 return cast(Mapping[str, MockStorageClass], super().allComponents())
392 @property
393 def components(self) -> Mapping[str, MockStorageClass]:
394 # Docstring inherited.
395 return cast(Mapping[str, MockStorageClass], super().components)
397 @property
398 def derivedComponents(self) -> Mapping[str, MockStorageClass]:
399 # Docstring inherited.
400 return cast(Mapping[str, MockStorageClass], super().derivedComponents)
402 def can_convert(self, other: StorageClass) -> bool:
403 # Docstring inherited.
404 if not isinstance(other, MockStorageClass):
405 return False
406 return self.original.can_convert(other.original)
408 def coerce_type(self, incorrect: Any) -> Any:
409 # Docstring inherited.
410 if not isinstance(incorrect, MockDataset):
411 raise TypeError(
412 f"Mock storage class {self.name!r} can only convert in-memory datasets "
413 f"corresponding to other mock storage classes, not {incorrect!r}."
414 )
415 factory = StorageClassFactory()
416 other_storage_class = factory.getStorageClass(incorrect.storage_class)
417 assert isinstance(other_storage_class, MockStorageClass), "Should not get a MockDataset otherwise."
418 if other_storage_class.name == self.name:
419 return incorrect
420 if not self.can_convert(other_storage_class):
421 raise TypeError(
422 f"Mocked storage class {self.original.name!r} cannot convert from "
423 f"{other_storage_class.original.name!r}."
424 )
425 return incorrect.make_derived(storageClass=self.name, converted_from=incorrect)
427 @staticmethod
428 def mock_dataset_type(original_type: DatasetType) -> DatasetType:
429 """Replace a dataset type with a version that uses a mock storage class
430 and name.
432 Parameters
433 ----------
434 original_type : `lsst.daf.butler.DatasetType`
435 Original dataset type to be mocked.
437 Returns
438 -------
439 mock_type : `lsst.daf.butler.DatasetType`
440 A mock version of the dataset type, with name and storage class
441 changed and everything else unchanged.
442 """
443 mock_storage_class = MockStorageClass.get_or_register_mock(original_type.storageClass_name)
444 mock_parent_storage_class = None
445 if original_type.parentStorageClass is not None:
446 mock_parent_storage_class = MockStorageClass.get_or_register_mock(
447 original_type.parentStorageClass.name
448 )
449 return DatasetType(
450 get_mock_name(original_type.name),
451 original_type.dimensions,
452 mock_storage_class,
453 isCalibration=original_type.isCalibration(),
454 parentStorageClass=mock_parent_storage_class,
455 )
457 @staticmethod
458 def mock_dataset_refs(original_refs: Iterable[DatasetRef]) -> list[DatasetRef]:
459 """Replace dataset references with versions that uses a mock storage
460 class and dataset type name.
462 Parameters
463 ----------
464 original_refs : `~collections.abc.Iterable` [ \
465 `lsst.daf.butler.DatasetRef` ]
466 Original dataset references to be mocked.
468 Returns
469 -------
470 mock_refs : `list` [ `lsst.daf.butler.DatasetRef` ]
471 Mocked version of the dataset references, with dataset type name
472 and storage class changed and everything else unchanged.
473 """
474 original_refs = list(original_refs)
475 if not original_refs:
476 return original_refs
477 dataset_type = MockStorageClass.mock_dataset_type(original_refs[0].datasetType)
478 return [
479 DatasetRef(dataset_type, original_ref.dataId, run=original_ref.run, id=original_ref.id)
480 for original_ref in original_refs
481 ]
483 @staticmethod
484 def unmock_dataset_type(mock_type: DatasetType) -> DatasetType:
485 """Replace a mock dataset type with the original one it was created
486 from.
488 Parameters
489 ----------
490 mock_type : `lsst.daf.butler.DatasetType`
491 A dataset type with a mocked name and storage class.
493 Returns
494 -------
495 original_type : `lsst.daf.butler.DatasetType`
496 The original dataset type.
497 """
498 storage_class = mock_type.storageClass
499 parent_storage_class = mock_type.parentStorageClass
500 if isinstance(storage_class, MockStorageClass):
501 storage_class = storage_class.original
502 if parent_storage_class is not None and isinstance(parent_storage_class, MockStorageClass):
503 parent_storage_class = parent_storage_class.original
504 return DatasetType(
505 get_original_name(mock_type.name),
506 mock_type.dimensions,
507 storage_class,
508 isCalibration=mock_type.isCalibration(),
509 parentStorageClass=parent_storage_class,
510 )
512 @staticmethod
513 def unmock_dataset_refs(mock_refs: Iterable[DatasetRef]) -> list[DatasetRef]:
514 """Replace dataset references with versions that do not use a mock
515 storage class and dataset type name.
517 Parameters
518 ----------
519 mock_refs : `~collections.abc.Iterable` [ \
520 `lsst.daf.butler.DatasetRef` ]
521 Dataset references that use a mocked dataset type name and storage
522 class.
524 Returns
525 -------
526 original_refs : `list` [ `lsst.daf.butler.DatasetRef` ]
527 The original dataset references.
528 """
529 mock_refs = list(mock_refs)
530 if not mock_refs:
531 return mock_refs
532 dataset_type = MockStorageClass.unmock_dataset_type(mock_refs[0].datasetType)
533 return [
534 DatasetRef(dataset_type, mock_ref.dataId, run=mock_ref.run, id=mock_ref.id)
535 for mock_ref in mock_refs
536 ]
539def _monkeypatch_daf_butler() -> None:
540 """Replace methods in daf_butler's StorageClassFactory and FormatterFactory
541 classes to automatically recognize mock storage classes.
543 This monkey-patching is executed when the `lsst.pipe.base.tests.mocks`
544 package is imported, and it affects all butler instances created before or
545 after that imported.
546 """
547 original_get_storage_class = StorageClassFactory.getStorageClass
549 def new_get_storage_class(self: StorageClassFactory, storageClassName: str) -> StorageClass:
550 try:
551 return original_get_storage_class(self, storageClassName)
552 except KeyError:
553 if is_mock_name(storageClassName):
554 return MockStorageClass.get_or_register_mock(get_original_name(storageClassName))
555 raise
557 StorageClassFactory.getStorageClass = new_get_storage_class # type: ignore
559 del new_get_storage_class
561 original_get_formatter_class_with_match = FormatterFactory.getFormatterClassWithMatch
563 def new_get_formatter_class_with_match(
564 self: FormatterFactory, entity: Any
565 ) -> tuple[LookupKey, type[Formatter], dict[str, Any]]:
566 try:
567 return original_get_formatter_class_with_match(self, entity)
568 except KeyError:
569 lookup_keys = (LookupKey(name=entity),) if isinstance(entity, str) else entity._lookupNames()
570 for key in lookup_keys:
571 # This matches mock dataset type names before mock storage
572 # classes, and it would even match some regular dataset types
573 # that are automatic connections (logs, configs, metadata) of
574 # mocked tasks. The latter would be a problem, except that
575 # those should have already matched in the try block above.
576 if is_mock_name(key.name):
577 return (key, JsonFormatter, {})
578 raise
580 FormatterFactory.getFormatterClassWithMatch = new_get_formatter_class_with_match # type: ignore
582 del new_get_formatter_class_with_match
584 original_get_formatter_with_match = FormatterFactory.getFormatterWithMatch
586 def new_get_formatter_with_match(
587 self: FormatterFactory, entity: Any, *args: Any, **kwargs: Any
588 ) -> tuple[LookupKey, Formatter]:
589 try:
590 return original_get_formatter_with_match(self, entity, *args, **kwargs)
591 except KeyError:
592 lookup_keys = (LookupKey(name=entity),) if isinstance(entity, str) else entity._lookupNames()
593 for key in lookup_keys:
594 if is_mock_name(key.name):
595 return (key, JsonFormatter(*args, **kwargs))
596 raise
598 FormatterFactory.getFormatterWithMatch = new_get_formatter_with_match # type: ignore
600 del new_get_formatter_with_match
603_monkeypatch_daf_butler()