Coverage for python/lsst/daf/butler/core/datasets/type.py: 20%
225 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-04-14 09:22 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2023-04-14 09:22 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ["DatasetType", "SerializedDatasetType"]
26import re
27from copy import deepcopy
28from types import MappingProxyType
29from typing import (
30 TYPE_CHECKING,
31 Any,
32 Callable,
33 ClassVar,
34 Dict,
35 Iterable,
36 List,
37 Mapping,
38 Optional,
39 Tuple,
40 Type,
41 Union,
42)
44from pydantic import BaseModel, StrictBool, StrictStr
46from ..configSupport import LookupKey
47from ..dimensions import DimensionGraph, SerializedDimensionGraph
48from ..json import from_json_pydantic, to_json_pydantic
49from ..storageClass import StorageClass, StorageClassFactory
51if TYPE_CHECKING:
52 from ...registry import Registry
53 from ..dimensions import Dimension, DimensionUniverse
56def _safeMakeMappingProxyType(data: Optional[Mapping]) -> Mapping:
57 if data is None:
58 data = {}
59 return MappingProxyType(data)
62class SerializedDatasetType(BaseModel):
63 """Simplified model of a `DatasetType` suitable for serialization."""
65 name: StrictStr
66 storageClass: Optional[StrictStr] = None
67 dimensions: Optional[SerializedDimensionGraph] = None
68 parentStorageClass: Optional[StrictStr] = None
69 isCalibration: StrictBool = False
71 @classmethod
72 def direct(
73 cls,
74 *,
75 name: str,
76 storageClass: Optional[str] = None,
77 dimensions: Optional[Dict] = None,
78 parentStorageClass: Optional[str] = None,
79 isCalibration: bool = False,
80 ) -> SerializedDatasetType:
81 """Construct a `SerializedDatasetType` directly without validators.
83 This differs from PyDantics construct method in that the arguments are
84 explicitly what the model requires, and it will recurse through
85 members, constructing them from their corresponding `direct` methods.
87 This method should only be called when the inputs are trusted.
88 """
89 node = SerializedDatasetType.__new__(cls)
90 setter = object.__setattr__
91 setter(node, "name", name)
92 setter(node, "storageClass", storageClass)
93 setter(
94 node,
95 "dimensions",
96 dimensions if dimensions is None else SerializedDimensionGraph.direct(**dimensions),
97 )
98 setter(node, "parentStorageClass", parentStorageClass)
99 setter(node, "isCalibration", isCalibration)
100 setter(
101 node,
102 "__fields_set__",
103 {"name", "storageClass", "dimensions", "parentStorageClass", "isCalibration"},
104 )
105 return node
108class DatasetType:
109 r"""A named category of Datasets.
111 Defines how they are organized, related, and stored.
113 A concrete, final class whose instances represent `DatasetType`\ s.
114 `DatasetType` instances may be constructed without a `Registry`,
115 but they must be registered
116 via `Registry.registerDatasetType()` before corresponding Datasets
117 may be added.
118 `DatasetType` instances are immutable.
120 Parameters
121 ----------
122 name : `str`
123 A string name for the Dataset; must correspond to the same
124 `DatasetType` across all Registries. Names must start with an
125 upper or lowercase letter, and may contain only letters, numbers,
126 and underscores. Component dataset types should contain a single
127 period separating the base dataset type name from the component name
128 (and may be recursive).
129 dimensions : `DimensionGraph` or iterable of `Dimension` or `str`
130 Dimensions used to label and relate instances of this `DatasetType`.
131 If not a `DimensionGraph`, ``universe`` must be provided as well.
132 storageClass : `StorageClass` or `str`
133 Instance of a `StorageClass` or name of `StorageClass` that defines
134 how this `DatasetType` is persisted.
135 parentStorageClass : `StorageClass` or `str`, optional
136 Instance of a `StorageClass` or name of `StorageClass` that defines
137 how the composite parent is persisted. Must be `None` if this
138 is not a component.
139 universe : `DimensionUniverse`, optional
140 Set of all known dimensions, used to normalize ``dimensions`` if it
141 is not already a `DimensionGraph`.
142 isCalibration : `bool`, optional
143 If `True`, this dataset type may be included in
144 `~CollectionType.CALIBRATION` collections.
146 See Also
147 --------
148 :ref:`daf_butler_organizing_datasets`
149 """
151 __slots__ = (
152 "_name",
153 "_dimensions",
154 "_storageClass",
155 "_storageClassName",
156 "_parentStorageClass",
157 "_parentStorageClassName",
158 "_isCalibration",
159 )
161 _serializedType = SerializedDatasetType
163 VALID_NAME_REGEX = re.compile("^[a-zA-Z_][a-zA-Z0-9_]*(\\.[a-zA-Z_][a-zA-Z0-9_]*)*$")
165 @staticmethod
166 def nameWithComponent(datasetTypeName: str, componentName: str) -> str:
167 """Form a valid DatasetTypeName from a parent and component.
169 No validation is performed.
171 Parameters
172 ----------
173 datasetTypeName : `str`
174 Base type name.
175 componentName : `str`
176 Name of component.
178 Returns
179 -------
180 compTypeName : `str`
181 Name to use for component DatasetType.
182 """
183 return "{}.{}".format(datasetTypeName, componentName)
185 def __init__(
186 self,
187 name: str,
188 dimensions: Union[DimensionGraph, Iterable[Union[Dimension, str]]],
189 storageClass: Union[StorageClass, str],
190 parentStorageClass: Optional[Union[StorageClass, str]] = None,
191 *,
192 universe: Optional[DimensionUniverse] = None,
193 isCalibration: bool = False,
194 ):
195 if self.VALID_NAME_REGEX.match(name) is None:
196 raise ValueError(f"DatasetType name '{name}' is invalid.")
197 self._name = name
198 if not isinstance(dimensions, DimensionGraph):
199 if universe is None:
200 raise ValueError(
201 "If dimensions is not a normalized DimensionGraph, a universe must be provided."
202 )
203 dimensions = universe.extract(dimensions)
204 self._dimensions = dimensions
205 if name in self._dimensions.universe.getGovernorDimensions().names:
206 raise ValueError(f"Governor dimension name {name} cannot be used as a dataset type name.")
207 if not isinstance(storageClass, (StorageClass, str)):
208 raise ValueError(f"StorageClass argument must be StorageClass or str. Got {storageClass}")
209 self._storageClass: Optional[StorageClass]
210 if isinstance(storageClass, StorageClass):
211 self._storageClass = storageClass
212 self._storageClassName = storageClass.name
213 else:
214 self._storageClass = None
215 self._storageClassName = storageClass
217 self._parentStorageClass: Optional[StorageClass] = None
218 self._parentStorageClassName: Optional[str] = None
219 if parentStorageClass is not None:
220 if not isinstance(storageClass, (StorageClass, str)):
221 raise ValueError(
222 f"Parent StorageClass argument must be StorageClass or str. Got {parentStorageClass}"
223 )
225 # Only allowed for a component dataset type
226 _, componentName = self.splitDatasetTypeName(self._name)
227 if componentName is None:
228 raise ValueError(
229 f"Can not specify a parent storage class if this is not a component ({self._name})"
230 )
231 if isinstance(parentStorageClass, StorageClass):
232 self._parentStorageClass = parentStorageClass
233 self._parentStorageClassName = parentStorageClass.name
234 else:
235 self._parentStorageClassName = parentStorageClass
237 # Ensure that parent storage class is specified when we have
238 # a component and is not specified when we don't
239 _, componentName = self.splitDatasetTypeName(self._name)
240 if parentStorageClass is None and componentName is not None:
241 raise ValueError(
242 f"Component dataset type '{self._name}' constructed without parent storage class"
243 )
244 if parentStorageClass is not None and componentName is None:
245 raise ValueError(f"Parent storage class specified by {self._name} is not a composite")
246 self._isCalibration = isCalibration
248 def __repr__(self) -> str:
249 extra = ""
250 if self._parentStorageClassName:
251 extra = f", parentStorageClass={self._parentStorageClassName}"
252 if self._isCalibration:
253 extra += ", isCalibration=True"
254 return f"DatasetType({self.name!r}, {self.dimensions}, {self._storageClassName}{extra})"
256 def _equal_ignoring_storage_class(self, other: Any) -> bool:
257 """Check everything is equal except the storage class.
259 Parameters
260 ----------
261 other : Any
262 Object to check against this one.
264 Returns
265 -------
266 mostly : `bool`
267 Returns `True` if everything except the storage class is equal.
268 """
269 if not isinstance(other, type(self)):
270 return False
271 if self._name != other._name:
272 return False
273 if self._dimensions != other._dimensions:
274 return False
275 if self._isCalibration != other._isCalibration:
276 return False
277 if self._parentStorageClass is not None and other._parentStorageClass is not None:
278 return self._parentStorageClass == other._parentStorageClass
279 else:
280 return self._parentStorageClassName == other._parentStorageClassName
282 def __eq__(self, other: Any) -> bool:
283 mostly_equal = self._equal_ignoring_storage_class(other)
284 if not mostly_equal:
285 return False
287 # Be careful not to force a storage class to import the corresponding
288 # python code.
289 if self._storageClass is not None and other._storageClass is not None:
290 if self._storageClass != other._storageClass:
291 return False
292 else:
293 if self._storageClassName != other._storageClassName:
294 return False
295 return True
297 def is_compatible_with(self, other: DatasetType) -> bool:
298 """Determine if the given `DatasetType` is compatible with this one.
300 Compatibility requires a matching name and dimensions and a storage
301 class for this dataset type that can convert the python type associated
302 with the other storage class to this python type.
304 Parameters
305 ----------
306 other : `DatasetType`
307 Dataset type to check.
309 Returns
310 -------
311 is_compatible : `bool`
312 Returns `True` if the other dataset type is either the same as this
313 or the storage class associated with the other can be converted to
314 this.
315 """
316 mostly_equal = self._equal_ignoring_storage_class(other)
317 if not mostly_equal:
318 return False
320 # If the storage class names match then they are compatible.
321 if self._storageClassName == other._storageClassName:
322 return True
324 # Now required to check the full storage class.
325 self_sc = self.storageClass
326 other_sc = other.storageClass
328 return self_sc.can_convert(other_sc)
330 def __hash__(self) -> int:
331 """Hash DatasetType instance.
333 This only uses StorageClass name which is it consistent with the
334 implementation of StorageClass hash method.
335 """
336 return hash((self._name, self._dimensions, self._storageClassName, self._parentStorageClassName))
338 def __lt__(self, other: Any) -> bool:
339 """Sort using the dataset type name."""
340 if not isinstance(other, type(self)):
341 return NotImplemented
342 return self.name < other.name
344 @property
345 def name(self) -> str:
346 """Return a string name for the Dataset.
348 Must correspond to the same `DatasetType` across all Registries.
349 """
350 return self._name
352 @property
353 def dimensions(self) -> DimensionGraph:
354 r"""Return the `Dimension`\ s fir this dataset type.
356 The dimensions label and relate instances of this
357 `DatasetType` (`DimensionGraph`).
358 """
359 return self._dimensions
361 @property
362 def storageClass(self) -> StorageClass:
363 """Return `StorageClass` instance associated with this dataset type.
365 The `StorageClass` defines how this `DatasetType`
366 is persisted. Note that if DatasetType was constructed with a name
367 of a StorageClass then Butler has to be initialized before using
368 this property.
369 """
370 if self._storageClass is None:
371 self._storageClass = StorageClassFactory().getStorageClass(self._storageClassName)
372 return self._storageClass
374 @property
375 def storageClass_name(self) -> str:
376 """Return the storage class name.
378 This will never force the storage class to be imported.
379 """
380 return self._storageClassName
382 @property
383 def parentStorageClass(self) -> Optional[StorageClass]:
384 """Return the storage class of the composite containing this component.
386 Note that if DatasetType was constructed with a name of a
387 StorageClass then Butler has to be initialized before using this
388 property. Can be `None` if this is not a component of a composite.
389 Must be defined if this is a component.
390 """
391 if self._parentStorageClass is None and self._parentStorageClassName is None:
392 return None
393 if self._parentStorageClass is None and self._parentStorageClassName is not None:
394 self._parentStorageClass = StorageClassFactory().getStorageClass(self._parentStorageClassName)
395 return self._parentStorageClass
397 def isCalibration(self) -> bool:
398 """Return if datasets of this type can be in calibration collections.
400 Returns
401 -------
402 flag : `bool`
403 `True` if datasets of this type may be included in calibration
404 collections.
405 """
406 return self._isCalibration
408 @staticmethod
409 def splitDatasetTypeName(datasetTypeName: str) -> Tuple[str, Optional[str]]:
410 """Return the root name and the component from a composite name.
412 Parameters
413 ----------
414 datasetTypeName : `str`
415 The name of the dataset type, can include a component using
416 a "."-separator.
418 Returns
419 -------
420 rootName : `str`
421 Root name without any components.
422 componentName : `str`
423 The component if it has been specified, else `None`.
425 Notes
426 -----
427 If the dataset type name is ``a.b.c`` this method will return a
428 root name of ``a`` and a component name of ``b.c``.
429 """
430 comp = None
431 root = datasetTypeName
432 if "." in root:
433 # If there is doubt, the component is after the first "."
434 root, comp = root.split(".", maxsplit=1)
435 return root, comp
437 def nameAndComponent(self) -> Tuple[str, Optional[str]]:
438 """Return the root name of this dataset type and any component.
440 Returns
441 -------
442 rootName : `str`
443 Root name for this `DatasetType` without any components.
444 componentName : `str`
445 The component if it has been specified, else `None`.
446 """
447 return self.splitDatasetTypeName(self.name)
449 def component(self) -> Optional[str]:
450 """Return the component name (if defined).
452 Returns
453 -------
454 comp : `str`
455 Name of component part of DatasetType name. `None` if this
456 `DatasetType` is not associated with a component.
457 """
458 _, comp = self.nameAndComponent()
459 return comp
461 def componentTypeName(self, component: str) -> str:
462 """Derive a component dataset type from a composite.
464 Parameters
465 ----------
466 component : `str`
467 Name of component
469 Returns
470 -------
471 derived : `str`
472 Compound name of this `DatasetType` and the component.
474 Raises
475 ------
476 KeyError
477 Requested component is not supported by this `DatasetType`.
478 """
479 if component in self.storageClass.allComponents():
480 return self.nameWithComponent(self.name, component)
481 raise KeyError(f"Requested component ({component}) not understood by this DatasetType ({self})")
483 def makeCompositeDatasetType(self) -> DatasetType:
484 """Return a composite dataset type from the component.
486 Returns
487 -------
488 composite : `DatasetType`
489 The composite dataset type.
491 Raises
492 ------
493 RuntimeError
494 Raised if this dataset type is not a component dataset type.
495 """
496 if not self.isComponent():
497 raise RuntimeError(f"DatasetType {self.name} must be a component to form the composite")
498 composite_name, _ = self.nameAndComponent()
499 if self.parentStorageClass is None:
500 raise ValueError(
501 f"Parent storage class is not set. Unable to create composite type from {self.name}"
502 )
503 return DatasetType(
504 composite_name,
505 dimensions=self.dimensions,
506 storageClass=self.parentStorageClass,
507 isCalibration=self.isCalibration(),
508 )
510 def makeComponentDatasetType(self, component: str) -> DatasetType:
511 """Return a component dataset type from a composite.
513 Assumes the same dimensions as the parent.
515 Parameters
516 ----------
517 component : `str`
518 Name of component
520 Returns
521 -------
522 datasetType : `DatasetType`
523 A new DatasetType instance.
524 """
525 # The component could be a read/write or read component
526 return DatasetType(
527 self.componentTypeName(component),
528 dimensions=self.dimensions,
529 storageClass=self.storageClass.allComponents()[component],
530 parentStorageClass=self.storageClass,
531 isCalibration=self.isCalibration(),
532 )
534 def makeAllComponentDatasetTypes(self) -> List[DatasetType]:
535 """Return all component dataset types for this composite.
537 Returns
538 -------
539 all : `list` of `DatasetType`
540 All the component dataset types. If this is not a composite
541 then returns an empty list.
542 """
543 return [
544 self.makeComponentDatasetType(componentName)
545 for componentName in self.storageClass.allComponents()
546 ]
548 def overrideStorageClass(self, storageClass: str | StorageClass) -> DatasetType:
549 """Create a new `DatasetType` from this one but with an updated
550 `StorageClass`.
552 Parameters
553 ----------
554 storageClass : `str` or `StorageClass`
555 The new storage class.
557 Returns
558 -------
559 modified : `DatasetType`
560 A dataset type that is the same as the current one but with a
561 different storage class. Will be ``self`` if the given storage
562 class is the current one.
564 Notes
565 -----
566 If this is a component dataset type, the parent storage class will be
567 retained.
568 """
569 if storageClass == self._storageClassName or storageClass == self._storageClass:
570 return self
571 parent = self._parentStorageClass if self._parentStorageClass else self._parentStorageClassName
572 new = DatasetType(
573 self.name,
574 dimensions=self.dimensions,
575 storageClass=storageClass,
576 parentStorageClass=parent,
577 isCalibration=self.isCalibration(),
578 )
579 # Check validity.
580 if new.is_compatible_with(self) or self.is_compatible_with(new):
581 return new
582 raise ValueError(
583 f"The new storage class ({new.storageClass}) is not compatible with the "
584 f"existing storage class ({self.storageClass})."
585 )
587 def isComponent(self) -> bool:
588 """Return whether this `DatasetType` refers to a component.
590 Returns
591 -------
592 isComponent : `bool`
593 `True` if this `DatasetType` is a component, `False` otherwise.
594 """
595 if self.component():
596 return True
597 return False
599 def isComposite(self) -> bool:
600 """Return whether this `DatasetType` is a composite.
602 Returns
603 -------
604 isComposite : `bool`
605 `True` if this `DatasetType` is a composite type, `False`
606 otherwise.
607 """
608 return self.storageClass.isComposite()
610 def _lookupNames(self) -> Tuple[LookupKey, ...]:
611 """Return name keys to use for lookups in configurations.
613 The names are returned in order of priority.
615 Returns
616 -------
617 names : `tuple` of `LookupKey`
618 Tuple of the `DatasetType` name and the `StorageClass` name.
619 If the name includes a component the name with the component
620 is first, then the name without the component and finally
621 the storage class name and the storage class name of the
622 composite.
623 """
624 rootName, componentName = self.nameAndComponent()
625 lookups: Tuple[LookupKey, ...] = (LookupKey(name=self.name),)
626 if componentName is not None:
627 lookups = lookups + (LookupKey(name=rootName),)
629 if self.dimensions:
630 # Dimensions are a lower priority than dataset type name
631 lookups = lookups + (LookupKey(dimensions=self.dimensions),)
633 storageClasses = self.storageClass._lookupNames()
634 if componentName is not None and self.parentStorageClass is not None:
635 storageClasses += self.parentStorageClass._lookupNames()
637 return lookups + storageClasses
639 def to_simple(self, minimal: bool = False) -> SerializedDatasetType:
640 """Convert this class to a simple python type.
642 This makes it suitable for serialization.
644 Parameters
645 ----------
646 minimal : `bool`, optional
647 Use minimal serialization. Requires Registry to convert
648 back to a full type.
650 Returns
651 -------
652 simple : `SerializedDatasetType`
653 The object converted to a class suitable for serialization.
654 """
655 as_dict: Dict[str, Any]
656 if minimal:
657 # Only needs the name.
658 as_dict = {"name": self.name}
659 else:
660 # Convert to a dict form
661 as_dict = {
662 "name": self.name,
663 "storageClass": self._storageClassName,
664 "isCalibration": self._isCalibration,
665 "dimensions": self.dimensions.to_simple(),
666 }
668 if self._parentStorageClassName is not None:
669 as_dict["parentStorageClass"] = self._parentStorageClassName
670 return SerializedDatasetType(**as_dict)
672 @classmethod
673 def from_simple(
674 cls,
675 simple: SerializedDatasetType,
676 universe: Optional[DimensionUniverse] = None,
677 registry: Optional[Registry] = None,
678 ) -> DatasetType:
679 """Construct a new object from the simplified form.
681 This is usually data returned from the `to_simple` method.
683 Parameters
684 ----------
685 simple : `SerializedDatasetType`
686 The value returned by `to_simple()`.
687 universe : `DimensionUniverse`
688 The special graph of all known dimensions of which this graph will
689 be a subset. Can be `None` if a registry is provided.
690 registry : `lsst.daf.butler.Registry`, optional
691 Registry to use to convert simple name of a DatasetType to
692 a full `DatasetType`. Can be `None` if a full description of
693 the type is provided along with a universe.
695 Returns
696 -------
697 datasetType : `DatasetType`
698 Newly-constructed object.
699 """
700 if simple.storageClass is None:
701 # Treat this as minimalist representation
702 if registry is None:
703 raise ValueError(
704 f"Unable to convert a DatasetType name '{simple}' to DatasetType without a Registry"
705 )
706 return registry.getDatasetType(simple.name)
708 if universe is None and registry is None:
709 raise ValueError("One of universe or registry must be provided.")
711 if universe is None and registry is not None:
712 # registry should not be none by now but test helps mypy
713 universe = registry.dimensions
715 if universe is None:
716 # this is for mypy
717 raise ValueError("Unable to determine a usable universe")
719 if simple.dimensions is None:
720 # mypy hint
721 raise ValueError(f"Dimensions must be specified in {simple}")
723 return cls(
724 name=simple.name,
725 dimensions=DimensionGraph.from_simple(simple.dimensions, universe=universe),
726 storageClass=simple.storageClass,
727 isCalibration=simple.isCalibration,
728 parentStorageClass=simple.parentStorageClass,
729 universe=universe,
730 )
732 to_json = to_json_pydantic
733 from_json: ClassVar = classmethod(from_json_pydantic)
735 def __reduce__(
736 self,
737 ) -> Tuple[
738 Callable, Tuple[Type[DatasetType], Tuple[str, DimensionGraph, str, Optional[str]], Dict[str, bool]]
739 ]:
740 """Support pickling.
742 StorageClass instances can not normally be pickled, so we pickle
743 StorageClass name instead of instance.
744 """
745 return _unpickle_via_factory, (
746 self.__class__,
747 (self.name, self.dimensions, self._storageClassName, self._parentStorageClassName),
748 {"isCalibration": self._isCalibration},
749 )
751 def __deepcopy__(self, memo: Any) -> DatasetType:
752 """Support for deep copy method.
754 Normally ``deepcopy`` will use pickle mechanism to make copies.
755 We want to avoid that to support (possibly degenerate) use case when
756 DatasetType is constructed with StorageClass instance which is not
757 registered with StorageClassFactory (this happens in unit tests).
758 Instead we re-implement ``__deepcopy__`` method.
759 """
760 return DatasetType(
761 name=deepcopy(self.name, memo),
762 dimensions=deepcopy(self.dimensions, memo),
763 storageClass=deepcopy(self._storageClass or self._storageClassName, memo),
764 parentStorageClass=deepcopy(self._parentStorageClass or self._parentStorageClassName, memo),
765 isCalibration=deepcopy(self._isCalibration, memo),
766 )
769def _unpickle_via_factory(factory: Callable, args: Any, kwargs: Any) -> DatasetType:
770 """Unpickle something by calling a factory.
772 Allows subclasses to unpickle using `__reduce__` with keyword
773 arguments as well as positional arguments.
774 """
775 return factory(*args, **kwargs)