Coverage for python/lsst/daf/butler/core/datasets/type.py: 22%
220 statements
« prev ^ index » next coverage.py v6.4.2, created at 2022-07-21 02:43 -0700
« prev ^ index » next coverage.py v6.4.2, created at 2022-07-21 02:43 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ["DatasetType", "SerializedDatasetType"]
26import re
27from copy import deepcopy
28from types import MappingProxyType
29from typing import TYPE_CHECKING, Any, Callable, Dict, Iterable, List, Mapping, Optional, Tuple, Type, Union
31from pydantic import BaseModel, StrictBool, StrictStr
33from ..configSupport import LookupKey
34from ..dimensions import DimensionGraph, SerializedDimensionGraph
35from ..json import from_json_pydantic, to_json_pydantic
36from ..storageClass import StorageClass, StorageClassFactory
38if TYPE_CHECKING: 38 ↛ 39line 38 didn't jump to line 39, because the condition on line 38 was never true
39 from ...registry import Registry
40 from ..dimensions import Dimension, DimensionUniverse
43def _safeMakeMappingProxyType(data: Optional[Mapping]) -> Mapping:
44 if data is None:
45 data = {}
46 return MappingProxyType(data)
49class SerializedDatasetType(BaseModel):
50 """Simplified model of a `DatasetType` suitable for serialization."""
52 name: StrictStr
53 storageClass: Optional[StrictStr] = None
54 dimensions: Optional[SerializedDimensionGraph] = None
55 parentStorageClass: Optional[StrictStr] = None
56 isCalibration: StrictBool = False
58 @classmethod
59 def direct(
60 cls,
61 *,
62 name: str,
63 storageClass: Optional[str] = None,
64 dimensions: Optional[Dict] = None,
65 parentStorageClass: Optional[str] = None,
66 isCalibration: bool = False,
67 ) -> SerializedDatasetType:
68 """Construct a `SerializedDatasetType` directly without validators.
70 This differs from PyDantics construct method in that the arguments are
71 explicitly what the model requires, and it will recurse through
72 members, constructing them from their corresponding `direct` methods.
74 This method should only be called when the inputs are trusted.
75 """
76 node = SerializedDatasetType.__new__(cls)
77 setter = object.__setattr__
78 setter(node, "name", name)
79 setter(node, "storageClass", storageClass)
80 setter(
81 node,
82 "dimensions",
83 dimensions if dimensions is None else SerializedDimensionGraph.direct(**dimensions),
84 )
85 setter(node, "parentStorageClass", parentStorageClass)
86 setter(node, "isCalibration", isCalibration)
87 setter(
88 node,
89 "__fields_set__",
90 {"name", "storageClass", "dimensions", "parentStorageClass", "isCalibration"},
91 )
92 return node
95class DatasetType:
96 r"""A named category of Datasets.
98 Defines how they are organized, related, and stored.
100 A concrete, final class whose instances represent `DatasetType`\ s.
101 `DatasetType` instances may be constructed without a `Registry`,
102 but they must be registered
103 via `Registry.registerDatasetType()` before corresponding Datasets
104 may be added.
105 `DatasetType` instances are immutable.
107 Parameters
108 ----------
109 name : `str`
110 A string name for the Dataset; must correspond to the same
111 `DatasetType` across all Registries. Names must start with an
112 upper or lowercase letter, and may contain only letters, numbers,
113 and underscores. Component dataset types should contain a single
114 period separating the base dataset type name from the component name
115 (and may be recursive).
116 dimensions : `DimensionGraph` or iterable of `Dimension` or `str`
117 Dimensions used to label and relate instances of this `DatasetType`.
118 If not a `DimensionGraph`, ``universe`` must be provided as well.
119 storageClass : `StorageClass` or `str`
120 Instance of a `StorageClass` or name of `StorageClass` that defines
121 how this `DatasetType` is persisted.
122 parentStorageClass : `StorageClass` or `str`, optional
123 Instance of a `StorageClass` or name of `StorageClass` that defines
124 how the composite parent is persisted. Must be `None` if this
125 is not a component.
126 universe : `DimensionUniverse`, optional
127 Set of all known dimensions, used to normalize ``dimensions`` if it
128 is not already a `DimensionGraph`.
129 isCalibration : `bool`, optional
130 If `True`, this dataset type may be included in
131 `~CollectionType.CALIBRATION` collections.
133 See Also
134 --------
135 :ref:`daf_butler_organizing_datasets`
136 """
138 __slots__ = (
139 "_name",
140 "_dimensions",
141 "_storageClass",
142 "_storageClassName",
143 "_parentStorageClass",
144 "_parentStorageClassName",
145 "_isCalibration",
146 )
148 _serializedType = SerializedDatasetType
150 VALID_NAME_REGEX = re.compile("^[a-zA-Z_][a-zA-Z0-9_]*(\\.[a-zA-Z_][a-zA-Z0-9_]*)*$")
152 @staticmethod
153 def nameWithComponent(datasetTypeName: str, componentName: str) -> str:
154 """Form a valid DatasetTypeName from a parent and component.
156 No validation is performed.
158 Parameters
159 ----------
160 datasetTypeName : `str`
161 Base type name.
162 componentName : `str`
163 Name of component.
165 Returns
166 -------
167 compTypeName : `str`
168 Name to use for component DatasetType.
169 """
170 return "{}.{}".format(datasetTypeName, componentName)
172 def __init__(
173 self,
174 name: str,
175 dimensions: Union[DimensionGraph, Iterable[Union[Dimension, str]]],
176 storageClass: Union[StorageClass, str],
177 parentStorageClass: Optional[Union[StorageClass, str]] = None,
178 *,
179 universe: Optional[DimensionUniverse] = None,
180 isCalibration: bool = False,
181 ):
182 if self.VALID_NAME_REGEX.match(name) is None:
183 raise ValueError(f"DatasetType name '{name}' is invalid.")
184 self._name = name
185 if not isinstance(dimensions, DimensionGraph):
186 if universe is None:
187 raise ValueError(
188 "If dimensions is not a normalized DimensionGraph, a universe must be provided."
189 )
190 dimensions = universe.extract(dimensions)
191 self._dimensions = dimensions
192 if name in self._dimensions.universe.getGovernorDimensions().names:
193 raise ValueError(f"Governor dimension name {name} cannot be used as a dataset type name.")
194 if not isinstance(storageClass, (StorageClass, str)):
195 raise ValueError(f"StorageClass argument must be StorageClass or str. Got {storageClass}")
196 self._storageClass: Optional[StorageClass]
197 if isinstance(storageClass, StorageClass):
198 self._storageClass = storageClass
199 self._storageClassName = storageClass.name
200 else:
201 self._storageClass = None
202 self._storageClassName = storageClass
204 self._parentStorageClass: Optional[StorageClass] = None
205 self._parentStorageClassName: Optional[str] = None
206 if parentStorageClass is not None:
207 if not isinstance(storageClass, (StorageClass, str)):
208 raise ValueError(
209 f"Parent StorageClass argument must be StorageClass or str. Got {parentStorageClass}"
210 )
212 # Only allowed for a component dataset type
213 _, componentName = self.splitDatasetTypeName(self._name)
214 if componentName is None:
215 raise ValueError(
216 f"Can not specify a parent storage class if this is not a component ({self._name})"
217 )
218 if isinstance(parentStorageClass, StorageClass):
219 self._parentStorageClass = parentStorageClass
220 self._parentStorageClassName = parentStorageClass.name
221 else:
222 self._parentStorageClassName = parentStorageClass
224 # Ensure that parent storage class is specified when we have
225 # a component and is not specified when we don't
226 _, componentName = self.splitDatasetTypeName(self._name)
227 if parentStorageClass is None and componentName is not None:
228 raise ValueError(
229 f"Component dataset type '{self._name}' constructed without parent storage class"
230 )
231 if parentStorageClass is not None and componentName is None:
232 raise ValueError(f"Parent storage class specified by {self._name} is not a composite")
233 self._isCalibration = isCalibration
235 def __repr__(self) -> str:
236 extra = ""
237 if self._parentStorageClassName:
238 extra = f", parentStorageClass={self._parentStorageClassName}"
239 if self._isCalibration:
240 extra += ", isCalibration=True"
241 return f"DatasetType({self.name!r}, {self.dimensions}, {self._storageClassName}{extra})"
243 def _equal_ignoring_storage_class(self, other: Any) -> bool:
244 """Check everything is equal except the storage class.
246 Parameters
247 ----------
248 other : Any
249 Object to check against this one.
251 Returns
252 -------
253 mostly : `bool`
254 Returns `True` if everything except the storage class is equal.
255 """
256 if not isinstance(other, type(self)):
257 return False
258 if self._name != other._name:
259 return False
260 if self._dimensions != other._dimensions:
261 return False
262 if self._isCalibration != other._isCalibration:
263 return False
264 if self._parentStorageClass is not None and other._parentStorageClass is not None:
265 return self._parentStorageClass == other._parentStorageClass
266 else:
267 return self._parentStorageClassName == other._parentStorageClassName
269 def __eq__(self, other: Any) -> bool:
270 mostly_equal = self._equal_ignoring_storage_class(other)
271 if not mostly_equal:
272 return False
274 # Be careful not to force a storage class to import the corresponding
275 # python code.
276 if self._storageClass is not None and other._storageClass is not None:
277 if self._storageClass != other._storageClass:
278 return False
279 else:
280 if self._storageClassName != other._storageClassName:
281 return False
282 return True
284 def is_compatible_with(self, other: DatasetType) -> bool:
285 """Determine if the given `DatasetType` is compatible with this one.
287 Compatibility requires a matching name and dimensions and a storage
288 class for this dataset type that can convert the python type associated
289 with the other storage class to this python type.
291 Parameters
292 ----------
293 other : `DatasetType`
294 Dataset type to check.
296 Returns
297 -------
298 is_compatible : `bool`
299 Returns `True` if the other dataset type is either the same as this
300 or the storage class associated with the other can be converted to
301 this.
302 """
303 mostly_equal = self._equal_ignoring_storage_class(other)
304 if not mostly_equal:
305 return False
307 # If the storage class names match then they are compatible.
308 if self._storageClassName == other._storageClassName:
309 return True
311 # Now required to check the full storage class.
312 self_sc = self.storageClass
313 other_sc = other.storageClass
315 return self_sc.can_convert(other_sc)
317 def __hash__(self) -> int:
318 """Hash DatasetType instance.
320 This only uses StorageClass name which is it consistent with the
321 implementation of StorageClass hash method.
322 """
323 return hash((self._name, self._dimensions, self._storageClassName, self._parentStorageClassName))
325 def __lt__(self, other: Any) -> bool:
326 """Sort using the dataset type name."""
327 if not isinstance(other, type(self)):
328 return NotImplemented
329 return self.name < other.name
331 @property
332 def name(self) -> str:
333 """Return a string name for the Dataset.
335 Must correspond to the same `DatasetType` across all Registries.
336 """
337 return self._name
339 @property
340 def dimensions(self) -> DimensionGraph:
341 r"""Return the `Dimension`\ s fir this dataset type.
343 The dimensions label and relate instances of this
344 `DatasetType` (`DimensionGraph`).
345 """
346 return self._dimensions
348 @property
349 def storageClass(self) -> StorageClass:
350 """Return `StorageClass` instance associated with this dataset type.
352 The `StorageClass` defines how this `DatasetType`
353 is persisted. Note that if DatasetType was constructed with a name
354 of a StorageClass then Butler has to be initialized before using
355 this property.
356 """
357 if self._storageClass is None:
358 self._storageClass = StorageClassFactory().getStorageClass(self._storageClassName)
359 return self._storageClass
361 @property
362 def storageClass_name(self) -> str:
363 """Return the storage class name.
365 This will never force the storage class to be imported.
366 """
367 return self._storageClassName
369 @property
370 def parentStorageClass(self) -> Optional[StorageClass]:
371 """Return the storage class of the composite containing this component.
373 Note that if DatasetType was constructed with a name of a
374 StorageClass then Butler has to be initialized before using this
375 property. Can be `None` if this is not a component of a composite.
376 Must be defined if this is a component.
377 """
378 if self._parentStorageClass is None and self._parentStorageClassName is None:
379 return None
380 if self._parentStorageClass is None and self._parentStorageClassName is not None:
381 self._parentStorageClass = StorageClassFactory().getStorageClass(self._parentStorageClassName)
382 return self._parentStorageClass
384 def isCalibration(self) -> bool:
385 """Return if datasets of this type can be in calibration collections.
387 Returns
388 -------
389 flag : `bool`
390 `True` if datasets of this type may be included in calibration
391 collections.
392 """
393 return self._isCalibration
395 @staticmethod
396 def splitDatasetTypeName(datasetTypeName: str) -> Tuple[str, Optional[str]]:
397 """Return the root name and the component from a composite name.
399 Parameters
400 ----------
401 datasetTypeName : `str`
402 The name of the dataset type, can include a component using
403 a "."-separator.
405 Returns
406 -------
407 rootName : `str`
408 Root name without any components.
409 componentName : `str`
410 The component if it has been specified, else `None`.
412 Notes
413 -----
414 If the dataset type name is ``a.b.c`` this method will return a
415 root name of ``a`` and a component name of ``b.c``.
416 """
417 comp = None
418 root = datasetTypeName
419 if "." in root:
420 # If there is doubt, the component is after the first "."
421 root, comp = root.split(".", maxsplit=1)
422 return root, comp
424 def nameAndComponent(self) -> Tuple[str, Optional[str]]:
425 """Return the root name of this dataset type and any component.
427 Returns
428 -------
429 rootName : `str`
430 Root name for this `DatasetType` without any components.
431 componentName : `str`
432 The component if it has been specified, else `None`.
433 """
434 return self.splitDatasetTypeName(self.name)
436 def component(self) -> Optional[str]:
437 """Return the component name (if defined).
439 Returns
440 -------
441 comp : `str`
442 Name of component part of DatasetType name. `None` if this
443 `DatasetType` is not associated with a component.
444 """
445 _, comp = self.nameAndComponent()
446 return comp
448 def componentTypeName(self, component: str) -> str:
449 """Derive a component dataset type from a composite.
451 Parameters
452 ----------
453 component : `str`
454 Name of component
456 Returns
457 -------
458 derived : `str`
459 Compound name of this `DatasetType` and the component.
461 Raises
462 ------
463 KeyError
464 Requested component is not supported by this `DatasetType`.
465 """
466 if component in self.storageClass.allComponents():
467 return self.nameWithComponent(self.name, component)
468 raise KeyError(f"Requested component ({component}) not understood by this DatasetType ({self})")
470 def makeCompositeDatasetType(self) -> DatasetType:
471 """Return a composite dataset type from the component.
473 Returns
474 -------
475 composite : `DatasetType`
476 The composite dataset type.
478 Raises
479 ------
480 RuntimeError
481 Raised if this dataset type is not a component dataset type.
482 """
483 if not self.isComponent():
484 raise RuntimeError(f"DatasetType {self.name} must be a component to form the composite")
485 composite_name, _ = self.nameAndComponent()
486 if self.parentStorageClass is None:
487 raise ValueError(
488 f"Parent storage class is not set. Unable to create composite type from {self.name}"
489 )
490 return DatasetType(
491 composite_name,
492 dimensions=self.dimensions,
493 storageClass=self.parentStorageClass,
494 isCalibration=self.isCalibration(),
495 )
497 def makeComponentDatasetType(self, component: str) -> DatasetType:
498 """Return a component dataset type from a composite.
500 Assumes the same dimensions as the parent.
502 Parameters
503 ----------
504 component : `str`
505 Name of component
507 Returns
508 -------
509 datasetType : `DatasetType`
510 A new DatasetType instance.
511 """
512 # The component could be a read/write or read component
513 return DatasetType(
514 self.componentTypeName(component),
515 dimensions=self.dimensions,
516 storageClass=self.storageClass.allComponents()[component],
517 parentStorageClass=self.storageClass,
518 isCalibration=self.isCalibration(),
519 )
521 def makeAllComponentDatasetTypes(self) -> List[DatasetType]:
522 """Return all component dataset types for this composite.
524 Returns
525 -------
526 all : `list` of `DatasetType`
527 All the component dataset types. If this is not a composite
528 then returns an empty list.
529 """
530 return [
531 self.makeComponentDatasetType(componentName)
532 for componentName in self.storageClass.allComponents()
533 ]
535 def isComponent(self) -> bool:
536 """Return whether this `DatasetType` refers to a component.
538 Returns
539 -------
540 isComponent : `bool`
541 `True` if this `DatasetType` is a component, `False` otherwise.
542 """
543 if self.component():
544 return True
545 return False
547 def isComposite(self) -> bool:
548 """Return whether this `DatasetType` is a composite.
550 Returns
551 -------
552 isComposite : `bool`
553 `True` if this `DatasetType` is a composite type, `False`
554 otherwise.
555 """
556 return self.storageClass.isComposite()
558 def _lookupNames(self) -> Tuple[LookupKey, ...]:
559 """Return name keys to use for lookups in configurations.
561 The names are returned in order of priority.
563 Returns
564 -------
565 names : `tuple` of `LookupKey`
566 Tuple of the `DatasetType` name and the `StorageClass` name.
567 If the name includes a component the name with the component
568 is first, then the name without the component and finally
569 the storage class name and the storage class name of the
570 composite.
571 """
572 rootName, componentName = self.nameAndComponent()
573 lookups: Tuple[LookupKey, ...] = (LookupKey(name=self.name),)
574 if componentName is not None:
575 lookups = lookups + (LookupKey(name=rootName),)
577 if self.dimensions:
578 # Dimensions are a lower priority than dataset type name
579 lookups = lookups + (LookupKey(dimensions=self.dimensions),)
581 storageClasses = self.storageClass._lookupNames()
582 if componentName is not None and self.parentStorageClass is not None:
583 storageClasses += self.parentStorageClass._lookupNames()
585 return lookups + storageClasses
587 def to_simple(self, minimal: bool = False) -> SerializedDatasetType:
588 """Convert this class to a simple python type.
590 This makes it suitable for serialization.
592 Parameters
593 ----------
594 minimal : `bool`, optional
595 Use minimal serialization. Requires Registry to convert
596 back to a full type.
598 Returns
599 -------
600 simple : `SerializedDatasetType`
601 The object converted to a class suitable for serialization.
602 """
603 as_dict: Dict[str, Any]
604 if minimal:
605 # Only needs the name.
606 as_dict = {"name": self.name}
607 else:
608 # Convert to a dict form
609 as_dict = {
610 "name": self.name,
611 "storageClass": self._storageClassName,
612 "isCalibration": self._isCalibration,
613 "dimensions": self.dimensions.to_simple(),
614 }
616 if self._parentStorageClassName is not None:
617 as_dict["parentStorageClass"] = self._parentStorageClassName
618 return SerializedDatasetType(**as_dict)
620 @classmethod
621 def from_simple(
622 cls,
623 simple: SerializedDatasetType,
624 universe: Optional[DimensionUniverse] = None,
625 registry: Optional[Registry] = None,
626 ) -> DatasetType:
627 """Construct a new object from the simplified form.
629 This is usually data returned from the `to_simple` method.
631 Parameters
632 ----------
633 simple : `SerializedDatasetType`
634 The value returned by `to_simple()`.
635 universe : `DimensionUniverse`
636 The special graph of all known dimensions of which this graph will
637 be a subset. Can be `None` if a registry is provided.
638 registry : `lsst.daf.butler.Registry`, optional
639 Registry to use to convert simple name of a DatasetType to
640 a full `DatasetType`. Can be `None` if a full description of
641 the type is provided along with a universe.
643 Returns
644 -------
645 datasetType : `DatasetType`
646 Newly-constructed object.
647 """
648 if simple.storageClass is None:
649 # Treat this as minimalist representation
650 if registry is None:
651 raise ValueError(
652 f"Unable to convert a DatasetType name '{simple}' to DatasetType without a Registry"
653 )
654 return registry.getDatasetType(simple.name)
656 if universe is None and registry is None:
657 raise ValueError("One of universe or registry must be provided.")
659 if universe is None and registry is not None:
660 # registry should not be none by now but test helps mypy
661 universe = registry.dimensions
663 if universe is None:
664 # this is for mypy
665 raise ValueError("Unable to determine a usable universe")
667 if simple.dimensions is None:
668 # mypy hint
669 raise ValueError(f"Dimensions must be specified in {simple}")
671 return cls(
672 name=simple.name,
673 dimensions=DimensionGraph.from_simple(simple.dimensions, universe=universe),
674 storageClass=simple.storageClass,
675 isCalibration=simple.isCalibration,
676 parentStorageClass=simple.parentStorageClass,
677 universe=universe,
678 )
680 to_json = to_json_pydantic
681 from_json = classmethod(from_json_pydantic)
683 def __reduce__(
684 self,
685 ) -> Tuple[
686 Callable, Tuple[Type[DatasetType], Tuple[str, DimensionGraph, str, Optional[str]], Dict[str, bool]]
687 ]:
688 """Support pickling.
690 StorageClass instances can not normally be pickled, so we pickle
691 StorageClass name instead of instance.
692 """
693 return _unpickle_via_factory, (
694 self.__class__,
695 (self.name, self.dimensions, self._storageClassName, self._parentStorageClassName),
696 {"isCalibration": self._isCalibration},
697 )
699 def __deepcopy__(self, memo: Any) -> DatasetType:
700 """Support for deep copy method.
702 Normally ``deepcopy`` will use pickle mechanism to make copies.
703 We want to avoid that to support (possibly degenerate) use case when
704 DatasetType is constructed with StorageClass instance which is not
705 registered with StorageClassFactory (this happens in unit tests).
706 Instead we re-implement ``__deepcopy__`` method.
707 """
708 return DatasetType(
709 name=deepcopy(self.name, memo),
710 dimensions=deepcopy(self.dimensions, memo),
711 storageClass=deepcopy(self._storageClass or self._storageClassName, memo),
712 parentStorageClass=deepcopy(self._parentStorageClass or self._parentStorageClassName, memo),
713 isCalibration=deepcopy(self._isCalibration, memo),
714 )
717def _unpickle_via_factory(factory: Callable, args: Any, kwargs: Any) -> DatasetType:
718 """Unpickle something by calling a factory.
720 Allows subclasses to unpickle using `__reduce__` with keyword
721 arguments as well as positional arguments.
722 """
723 return factory(*args, **kwargs)