Coverage for python/lsst/pipe/base/tests/mocks/_storage_class.py: 39%
198 statements
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-11 03:31 -0700
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-11 03:31 -0700
1# This file is part of pipe_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30__all__ = (
31 "MockDataset",
32 "MockStorageClass",
33 "MockDatasetQuantum",
34 "MockStorageClassDelegate",
35 "get_mock_name",
36 "get_original_name",
37 "is_mock_name",
38)
40import sys
41import uuid
42from collections.abc import Callable, Iterable, Mapping
43from typing import Any, cast
45import pydantic
46from lsst.daf.butler import (
47 DataIdValue,
48 DatasetComponent,
49 DatasetRef,
50 DatasetType,
51 Formatter,
52 FormatterFactory,
53 LookupKey,
54 SerializedDatasetType,
55 StorageClass,
56 StorageClassDelegate,
57 StorageClassFactory,
58)
59from lsst.daf.butler.formatters.json import JsonFormatter
60from lsst.utils.introspection import get_full_type_name
62_NAME_PREFIX: str = "_mock_"
65def get_mock_name(original: str) -> str:
66 """Return the name of the mock storage class, dataset type, or task label
67 for the given original name.
69 Parameters
70 ----------
71 original : `str`
72 Original name.
74 Returns
75 -------
76 name : `str`
77 The name of the mocked version.
78 """
79 return _NAME_PREFIX + original
82def get_original_name(mock: str) -> str:
83 """Return the name of the original storage class, dataset type, or task
84 label that corresponds to the given mock name.
86 Parameters
87 ----------
88 mock : `str`
89 The mocked name.
91 Returns
92 -------
93 original : `str`
94 The original name.
95 """
96 assert mock.startswith(_NAME_PREFIX)
97 return mock.removeprefix(_NAME_PREFIX)
100def is_mock_name(name: str) -> bool:
101 """Return whether the given name is that of a mock storage class, dataset
102 type, or task label.
104 Parameters
105 ----------
106 name : `str`
107 The given name to check.
109 Returns
110 -------
111 is_mock : `bool`
112 Whether the name is for a mock or not.
113 """
114 return name.startswith(_NAME_PREFIX)
117# Tests for this module are in the ci_middleware package, where we have easy
118# access to complex real storage classes (and their pytypes) to test against.
121class MockDataset(pydantic.BaseModel):
122 """The in-memory dataset type used by `MockStorageClass`."""
124 dataset_id: uuid.UUID | None
125 """Universal unique identifier for this dataset."""
127 dataset_type: SerializedDatasetType
128 """Butler dataset type or this dataset.
130 See the documentation for ``data_id`` for why this is a
131 `~lsst.daf.butler.SerializedDatasetType` instead of a "real" one.
132 """
134 data_id: dict[str, DataIdValue]
135 """Butler data ID for this dataset.
137 This is a `~lsst.daf.butler.SerializedDataCoordinate` instead of a "real"
138 one for two reasons:
140 - the mock dataset may need to be read from disk in a context in which a
141 `~lsst.daf.butler.DimensionUniverse` is unavailable;
142 - we don't want the complexity of having a separate
143 ``SerializedMockDataCoordinate``.
144 """
146 run: str | None
147 """`~lsst.daf.butler.CollectionType.RUN` collection this dataset belongs
148 to.
149 """
151 quantum: MockDatasetQuantum | None = None
152 """Description of the quantum that produced this dataset.
153 """
155 output_connection_name: str | None = None
156 """The name of the PipelineTask output connection that produced this
157 dataset.
158 """
160 converted_from: MockDataset | None = None
161 """Another `MockDataset` that underwent a storage class conversion to
162 produce this one.
163 """
165 parent: MockDataset | None = None
166 """Another `MockDataset` from which a component was extract to form this
167 one.
168 """
170 parameters: dict[str, str] | None = None
171 """`repr` of all parameters applied when reading this dataset."""
173 @property
174 def storage_class(self) -> str:
175 return cast(str, self.dataset_type.storageClass)
177 def make_derived(self, **kwargs: Any) -> MockDataset:
178 """Return a new MockDataset that represents applying some storage class
179 operation to this one.
181 Parameters
182 ----------
183 **kwargs : `~typing.Any`
184 Keyword arguments are fields of `MockDataset` or
185 `~lsst.daf.butler.SerializedDatasetType` to override in the result.
187 Returns
188 -------
189 derived : `MockDataset`
190 The newly-mocked dataset.
191 """
192 dataset_type_updates = {
193 k: kwargs.pop(k) for k in list(kwargs) if k in SerializedDatasetType.model_fields
194 }
195 kwargs.setdefault("dataset_type", self.dataset_type.model_copy(update=dataset_type_updates))
196 # Fields below are those that should not be propagated to the derived
197 # dataset, because they're not about the intrinsic on-disk thing.
198 kwargs.setdefault("converted_from", None)
199 kwargs.setdefault("parent", None)
200 kwargs.setdefault("parameters", None)
201 # Also use setdefault on the ref in case caller wants to override that
202 # directly, but this is expected to be rare enough that it's not worth
203 # it to try to optimize out the work above to make derived_ref.
204 return self.model_copy(update=kwargs)
206 # Work around the fact that Sphinx chokes on Pydantic docstring formatting,
207 # when we inherit those docstrings in our public classes.
208 if "sphinx" in sys.modules: 208 ↛ 210line 208 didn't jump to line 210, because the condition on line 208 was never true
210 def copy(self, *args: Any, **kwargs: Any) -> Any:
211 """See `pydantic.BaseModel.copy`."""
212 return super().copy(*args, **kwargs)
214 def model_dump(self, *args: Any, **kwargs: Any) -> Any:
215 """See `pydantic.BaseModel.model_dump`."""
216 return super().model_dump(*args, **kwargs)
218 def model_dump_json(self, *args: Any, **kwargs: Any) -> Any:
219 """See `pydantic.BaseModel.model_dump_json`."""
220 return super().model_dump(*args, **kwargs)
222 def model_copy(self, *args: Any, **kwargs: Any) -> Any:
223 """See `pydantic.BaseModel.model_copy`."""
224 return super().model_copy(*args, **kwargs)
226 @classmethod
227 def model_json_schema(cls, *args: Any, **kwargs: Any) -> Any:
228 """See `pydantic.BaseModel.model_json_schema`."""
229 return super().model_json_schema(*args, **kwargs)
232class MockDatasetQuantum(pydantic.BaseModel):
233 """Description of the quantum that produced a mock dataset.
235 This is also used to represent task-init operations for init-output mock
236 datasets.
237 """
239 task_label: str
240 """Label of the producing PipelineTask in its pipeline."""
242 data_id: dict[str, DataIdValue]
243 """Data ID for the quantum."""
245 inputs: dict[str, list[MockDataset]]
246 """Mock datasets provided as input to the quantum.
248 Keys are task-internal connection names, not dataset type names.
249 """
251 # Work around the fact that Sphinx chokes on Pydantic docstring formatting,
252 # when we inherit those docstrings in our public classes.
253 if "sphinx" in sys.modules: 253 ↛ 255line 253 didn't jump to line 255, because the condition on line 253 was never true
255 def copy(self, *args: Any, **kwargs: Any) -> Any:
256 """See `pydantic.BaseModel.copy`."""
257 return super().copy(*args, **kwargs)
259 def model_dump(self, *args: Any, **kwargs: Any) -> Any:
260 """See `pydantic.BaseModel.model_dump`."""
261 return super().model_dump(*args, **kwargs)
263 def model_dump_json(self, *args: Any, **kwargs: Any) -> Any:
264 """See `pydantic.BaseModel.model_dump_json`."""
265 return super().model_dump(*args, **kwargs)
267 def model_copy(self, *args: Any, **kwargs: Any) -> Any:
268 """See `pydantic.BaseModel.model_copy`."""
269 return super().model_copy(*args, **kwargs)
271 @classmethod
272 def model_json_schema(cls, *args: Any, **kwargs: Any) -> Any:
273 """See `pydantic.BaseModel.model_json_schema`."""
274 return super().model_json_schema(*args, **kwargs)
277MockDataset.model_rebuild()
280class MockStorageClassDelegate(StorageClassDelegate):
281 """Implementation of the StorageClassDelegate interface for mock datasets.
283 This class does not implement assembly and disassembly just because it's
284 not needed right now. That could be added in the future with some
285 additional tracking attributes in `MockDataset`.
286 """
288 def assemble(self, components: dict[str, Any], pytype: type | None = None) -> MockDataset:
289 # Docstring inherited.
290 raise NotImplementedError("Mock storage classes do not implement assembly.")
292 def getComponent(self, composite: Any, componentName: str) -> Any:
293 # Docstring inherited.
294 assert isinstance(
295 composite, MockDataset
296 ), f"MockStorageClassDelegate given a non-mock dataset {composite!r}."
297 return composite.make_derived(
298 name=f"{composite.dataset_type.name}.{componentName}",
299 storageClass=self.storageClass.allComponents()[componentName].name,
300 parentStorageClass=self.storageClass.name,
301 parent=composite,
302 )
304 def disassemble(
305 self, composite: Any, subset: Iterable | None = None, override: Any | None = None
306 ) -> dict[str, DatasetComponent]:
307 # Docstring inherited.
308 raise NotImplementedError("Mock storage classes do not implement disassembly.")
310 def handleParameters(self, inMemoryDataset: Any, parameters: Mapping[str, Any] | None = None) -> Any:
311 # Docstring inherited.
312 assert isinstance(
313 inMemoryDataset, MockDataset
314 ), f"MockStorageClassDelegate given a non-mock dataset {inMemoryDataset!r}."
315 if not parameters:
316 return inMemoryDataset
317 return inMemoryDataset.make_derived(parameters={k: repr(v) for k, v in parameters.items()})
320class MockStorageClass(StorageClass):
321 """A reimplementation of `lsst.daf.butler.StorageClass` for mock datasets.
323 Parameters
324 ----------
325 original : `~lsst.daf.butler.StorageClass`
326 The original storage class.
327 factory : `~lsst.daf.butler.StorageClassFactory` or `None`, optional
328 Storage class factory to use. If `None` the default factory is used.
330 Notes
331 -----
332 Each `MockStorageClass` instance corresponds to a real "original" storage
333 class, with components and conversions that are mocks of the original's
334 components and conversions. The ``pytype`` for all `MockStorageClass`
335 instances is `MockDataset`.
336 """
338 def __init__(self, original: StorageClass, factory: StorageClassFactory | None = None):
339 name = get_mock_name(original.name)
340 if factory is None:
341 factory = StorageClassFactory()
342 super().__init__(
343 name=name,
344 pytype=MockDataset,
345 components={
346 k: self.get_or_register_mock(v.name, factory) for k, v in original.components.items()
347 },
348 derivedComponents={
349 k: self.get_or_register_mock(v.name, factory) for k, v in original.derivedComponents.items()
350 },
351 parameters=frozenset(original.parameters),
352 delegate=get_full_type_name(MockStorageClassDelegate),
353 # Conversions work differently for mock storage classes, since they
354 # all have the same pytype: we use the original storage class being
355 # mocked to see if we can convert, then just make a new MockDataset
356 # that points back to the original.
357 converters={},
358 )
359 self.original = original
360 # Make certain no one tries to use the converters.
361 self._converters = None # type: ignore
363 def _get_converters_by_type(self) -> dict[type, Callable[[Any], Any]]:
364 # Docstring inherited.
365 raise NotImplementedError("MockStorageClass does not use converters.")
367 @classmethod
368 def get_or_register_mock(
369 cls, original: str, factory: StorageClassFactory | None = None
370 ) -> MockStorageClass:
371 """Return a mock storage class for the given original storage class,
372 creating and registering it if necessary.
374 Parameters
375 ----------
376 original : `str`
377 Name of the original storage class to be mocked.
378 factory : `~lsst.daf.butler.StorageClassFactory`, optional
379 Storage class factory singleton instance.
381 Returns
382 -------
383 mock : `MockStorageClass`
384 New storage class that mocks ``original``.
385 """
386 name = get_mock_name(original)
387 if factory is None:
388 factory = StorageClassFactory()
389 if name in factory:
390 return cast(MockStorageClass, factory.getStorageClass(name))
391 else:
392 result = cls(factory.getStorageClass(original), factory)
393 factory.registerStorageClass(result)
394 return result
396 def allComponents(self) -> Mapping[str, MockStorageClass]:
397 # Docstring inherited.
398 return cast(Mapping[str, MockStorageClass], super().allComponents())
400 @property
401 def components(self) -> Mapping[str, MockStorageClass]:
402 # Docstring inherited.
403 return cast(Mapping[str, MockStorageClass], super().components)
405 @property
406 def derivedComponents(self) -> Mapping[str, MockStorageClass]:
407 # Docstring inherited.
408 return cast(Mapping[str, MockStorageClass], super().derivedComponents)
410 def can_convert(self, other: StorageClass) -> bool:
411 # Docstring inherited.
412 if not isinstance(other, MockStorageClass):
413 return False
414 return self.original.can_convert(other.original)
416 def coerce_type(self, incorrect: Any) -> Any:
417 # Docstring inherited.
418 if not isinstance(incorrect, MockDataset):
419 raise TypeError(
420 f"Mock storage class {self.name!r} can only convert in-memory datasets "
421 f"corresponding to other mock storage classes, not {incorrect!r}."
422 )
423 factory = StorageClassFactory()
424 other_storage_class = factory.getStorageClass(incorrect.storage_class)
425 assert isinstance(other_storage_class, MockStorageClass), "Should not get a MockDataset otherwise."
426 if other_storage_class.name == self.name:
427 return incorrect
428 if not self.can_convert(other_storage_class):
429 raise TypeError(
430 f"Mocked storage class {self.original.name!r} cannot convert from "
431 f"{other_storage_class.original.name!r}."
432 )
433 return incorrect.make_derived(storageClass=self.name, converted_from=incorrect)
435 @staticmethod
436 def mock_dataset_type(original_type: DatasetType) -> DatasetType:
437 """Replace a dataset type with a version that uses a mock storage class
438 and name.
440 Parameters
441 ----------
442 original_type : `lsst.daf.butler.DatasetType`
443 Original dataset type to be mocked.
445 Returns
446 -------
447 mock_type : `lsst.daf.butler.DatasetType`
448 A mock version of the dataset type, with name and storage class
449 changed and everything else unchanged.
450 """
451 mock_storage_class = MockStorageClass.get_or_register_mock(original_type.storageClass_name)
452 mock_parent_storage_class = None
453 if original_type.parentStorageClass is not None:
454 mock_parent_storage_class = MockStorageClass.get_or_register_mock(
455 original_type.parentStorageClass.name
456 )
457 return DatasetType(
458 get_mock_name(original_type.name),
459 original_type.dimensions,
460 mock_storage_class,
461 isCalibration=original_type.isCalibration(),
462 parentStorageClass=mock_parent_storage_class,
463 )
465 @staticmethod
466 def mock_dataset_refs(original_refs: Iterable[DatasetRef]) -> list[DatasetRef]:
467 """Replace dataset references with versions that uses a mock storage
468 class and dataset type name.
470 Parameters
471 ----------
472 original_refs : `~collections.abc.Iterable` [ \
473 `lsst.daf.butler.DatasetRef` ]
474 Original dataset references to be mocked.
476 Returns
477 -------
478 mock_refs : `list` [ `lsst.daf.butler.DatasetRef` ]
479 Mocked version of the dataset references, with dataset type name
480 and storage class changed and everything else unchanged.
481 """
482 original_refs = list(original_refs)
483 if not original_refs:
484 return original_refs
485 dataset_type = MockStorageClass.mock_dataset_type(original_refs[0].datasetType)
486 return [
487 DatasetRef(dataset_type, original_ref.dataId, run=original_ref.run, id=original_ref.id)
488 for original_ref in original_refs
489 ]
491 @staticmethod
492 def unmock_dataset_type(mock_type: DatasetType) -> DatasetType:
493 """Replace a mock dataset type with the original one it was created
494 from.
496 Parameters
497 ----------
498 mock_type : `lsst.daf.butler.DatasetType`
499 A dataset type with a mocked name and storage class.
501 Returns
502 -------
503 original_type : `lsst.daf.butler.DatasetType`
504 The original dataset type.
505 """
506 storage_class = mock_type.storageClass
507 parent_storage_class = mock_type.parentStorageClass
508 if isinstance(storage_class, MockStorageClass):
509 storage_class = storage_class.original
510 if parent_storage_class is not None and isinstance(parent_storage_class, MockStorageClass):
511 parent_storage_class = parent_storage_class.original
512 return DatasetType(
513 get_original_name(mock_type.name),
514 mock_type.dimensions,
515 storage_class,
516 isCalibration=mock_type.isCalibration(),
517 parentStorageClass=parent_storage_class,
518 )
520 @staticmethod
521 def unmock_dataset_refs(mock_refs: Iterable[DatasetRef]) -> list[DatasetRef]:
522 """Replace dataset references with versions that do not use a mock
523 storage class and dataset type name.
525 Parameters
526 ----------
527 mock_refs : `~collections.abc.Iterable` [ \
528 `lsst.daf.butler.DatasetRef` ]
529 Dataset references that use a mocked dataset type name and storage
530 class.
532 Returns
533 -------
534 original_refs : `list` [ `lsst.daf.butler.DatasetRef` ]
535 The original dataset references.
536 """
537 mock_refs = list(mock_refs)
538 if not mock_refs:
539 return mock_refs
540 dataset_type = MockStorageClass.unmock_dataset_type(mock_refs[0].datasetType)
541 return [
542 DatasetRef(dataset_type, mock_ref.dataId, run=mock_ref.run, id=mock_ref.id)
543 for mock_ref in mock_refs
544 ]
547def _monkeypatch_daf_butler() -> None:
548 """Replace methods in daf_butler's StorageClassFactory and FormatterFactory
549 classes to automatically recognize mock storage classes.
551 This monkey-patching is executed when the `lsst.pipe.base.tests.mocks`
552 package is imported, and it affects all butler instances created before or
553 after that imported.
554 """
555 original_get_storage_class = StorageClassFactory.getStorageClass
557 def new_get_storage_class(self: StorageClassFactory, storageClassName: str) -> StorageClass:
558 try:
559 return original_get_storage_class(self, storageClassName)
560 except KeyError:
561 if is_mock_name(storageClassName):
562 return MockStorageClass.get_or_register_mock(get_original_name(storageClassName))
563 raise
565 StorageClassFactory.getStorageClass = new_get_storage_class # type: ignore
567 del new_get_storage_class
569 original_get_formatter_class_with_match = FormatterFactory.getFormatterClassWithMatch
571 def new_get_formatter_class_with_match(
572 self: FormatterFactory, entity: Any
573 ) -> tuple[LookupKey, type[Formatter], dict[str, Any]]:
574 try:
575 return original_get_formatter_class_with_match(self, entity)
576 except KeyError:
577 lookup_keys = (LookupKey(name=entity),) if isinstance(entity, str) else entity._lookupNames()
578 for key in lookup_keys:
579 # This matches mock dataset type names before mock storage
580 # classes, and it would even match some regular dataset types
581 # that are automatic connections (logs, configs, metadata) of
582 # mocked tasks. The latter would be a problem, except that
583 # those should have already matched in the try block above.
584 if is_mock_name(key.name):
585 return (key, JsonFormatter, {})
586 raise
588 FormatterFactory.getFormatterClassWithMatch = new_get_formatter_class_with_match # type: ignore
590 del new_get_formatter_class_with_match
592 original_get_formatter_with_match = FormatterFactory.getFormatterWithMatch
594 def new_get_formatter_with_match(
595 self: FormatterFactory, entity: Any, *args: Any, **kwargs: Any
596 ) -> tuple[LookupKey, Formatter]:
597 try:
598 return original_get_formatter_with_match(self, entity, *args, **kwargs)
599 except KeyError:
600 lookup_keys = (LookupKey(name=entity),) if isinstance(entity, str) else entity._lookupNames()
601 for key in lookup_keys:
602 if is_mock_name(key.name):
603 return (key, JsonFormatter(*args, **kwargs))
604 raise
606 FormatterFactory.getFormatterWithMatch = new_get_formatter_with_match # type: ignore
608 del new_get_formatter_with_match
611_monkeypatch_daf_butler()