Coverage for python/lsst/daf/butler/core/datasets/type.py : 21%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ["DatasetType", "SerializedDatasetType"]
26from copy import deepcopy
27import re
29from types import MappingProxyType
31from typing import (
32 TYPE_CHECKING,
33 Any,
34 Callable,
35 Dict,
36 Iterable,
37 List,
38 Mapping,
39 Optional,
40 Tuple,
41 Type,
42 Union,
43)
45from pydantic import BaseModel, StrictStr, StrictBool
47from ..storageClass import StorageClass, StorageClassFactory
48from ..dimensions import DimensionGraph, SerializedDimensionGraph
49from ..configSupport import LookupKey
50from ..json import from_json_pydantic, to_json_pydantic
52if TYPE_CHECKING: 52 ↛ 53line 52 didn't jump to line 53, because the condition on line 52 was never true
53 from ..dimensions import Dimension, DimensionUniverse
54 from ...registry import Registry
57def _safeMakeMappingProxyType(data: Optional[Mapping]) -> Mapping:
58 if data is None:
59 data = {}
60 return MappingProxyType(data)
63class SerializedDatasetType(BaseModel):
64 """Simplified model of a `DatasetType` suitable for serialization."""
66 name: StrictStr
67 storageClass: Optional[StrictStr] = None
68 dimensions: Optional[SerializedDimensionGraph] = None
69 parentStorageClass: Optional[StrictStr] = None
70 isCalibration: StrictBool = False
73class DatasetType:
74 r"""A named category of Datasets.
76 Defines how they are organized, related, and stored.
78 A concrete, final class whose instances represent `DatasetType`\ s.
79 `DatasetType` instances may be constructed without a `Registry`,
80 but they must be registered
81 via `Registry.registerDatasetType()` before corresponding Datasets
82 may be added.
83 `DatasetType` instances are immutable.
85 Parameters
86 ----------
87 name : `str`
88 A string name for the Dataset; must correspond to the same
89 `DatasetType` across all Registries. Names must start with an
90 upper or lowercase letter, and may contain only letters, numbers,
91 and underscores. Component dataset types should contain a single
92 period separating the base dataset type name from the component name
93 (and may be recursive).
94 dimensions : `DimensionGraph` or iterable of `Dimension`
95 Dimensions used to label and relate instances of this `DatasetType`.
96 If not a `DimensionGraph`, ``universe`` must be provided as well.
97 storageClass : `StorageClass` or `str`
98 Instance of a `StorageClass` or name of `StorageClass` that defines
99 how this `DatasetType` is persisted.
100 parentStorageClass : `StorageClass` or `str`, optional
101 Instance of a `StorageClass` or name of `StorageClass` that defines
102 how the composite parent is persisted. Must be `None` if this
103 is not a component. Mandatory if it is a component but can be the
104 special temporary placeholder
105 (`DatasetType.PlaceholderParentStorageClass`) to allow
106 construction with an intent to finalize later.
107 universe : `DimensionUniverse`, optional
108 Set of all known dimensions, used to normalize ``dimensions`` if it
109 is not already a `DimensionGraph`.
110 isCalibration : `bool`, optional
111 If `True`, this dataset type may be included in
112 `~CollectionType.CALIBRATION` collections.
113 """
115 __slots__ = ("_name", "_dimensions", "_storageClass", "_storageClassName",
116 "_parentStorageClass", "_parentStorageClassName",
117 "_isCalibration")
119 _serializedType = SerializedDatasetType
121 VALID_NAME_REGEX = re.compile("^[a-zA-Z][a-zA-Z0-9_]*(\\.[a-zA-Z][a-zA-Z0-9_]*)*$")
123 PlaceholderParentStorageClass = StorageClass("PlaceHolder")
124 """Placeholder StorageClass that can be used temporarily for a
125 component.
127 This can be useful in pipeline construction where we are creating
128 dataset types without a registry.
129 """
131 @staticmethod
132 def nameWithComponent(datasetTypeName: str, componentName: str) -> str:
133 """Form a valid DatasetTypeName from a parent and component.
135 No validation is performed.
137 Parameters
138 ----------
139 datasetTypeName : `str`
140 Base type name.
141 componentName : `str`
142 Name of component.
144 Returns
145 -------
146 compTypeName : `str`
147 Name to use for component DatasetType.
148 """
149 return "{}.{}".format(datasetTypeName, componentName)
151 def __init__(self, name: str, dimensions: Union[DimensionGraph, Iterable[Dimension]],
152 storageClass: Union[StorageClass, str],
153 parentStorageClass: Optional[Union[StorageClass, str]] = None, *,
154 universe: Optional[DimensionUniverse] = None,
155 isCalibration: bool = False):
156 if self.VALID_NAME_REGEX.match(name) is None:
157 raise ValueError(f"DatasetType name '{name}' is invalid.")
158 self._name = name
159 if not isinstance(dimensions, DimensionGraph):
160 if universe is None:
161 raise ValueError("If dimensions is not a normalized DimensionGraph, "
162 "a universe must be provided.")
163 dimensions = universe.extract(dimensions)
164 self._dimensions = dimensions
165 if name in self._dimensions.universe.getGovernorDimensions().names:
166 raise ValueError(f"Governor dimension name {name} cannot be used as a dataset type name.")
167 if not isinstance(storageClass, (StorageClass, str)):
168 raise ValueError("StorageClass argument must be StorageClass or str. "
169 f"Got {storageClass}")
170 self._storageClass: Optional[StorageClass]
171 if isinstance(storageClass, StorageClass):
172 self._storageClass = storageClass
173 self._storageClassName = storageClass.name
174 else:
175 self._storageClass = None
176 self._storageClassName = storageClass
178 self._parentStorageClass: Optional[StorageClass] = None
179 self._parentStorageClassName: Optional[str] = None
180 if parentStorageClass is not None:
181 if not isinstance(storageClass, (StorageClass, str)):
182 raise ValueError("Parent StorageClass argument must be StorageClass or str. "
183 f"Got {parentStorageClass}")
185 # Only allowed for a component dataset type
186 _, componentName = self.splitDatasetTypeName(self._name)
187 if componentName is None:
188 raise ValueError("Can not specify a parent storage class if this is not a component"
189 f" ({self._name})")
190 if isinstance(parentStorageClass, StorageClass):
191 self._parentStorageClass = parentStorageClass
192 self._parentStorageClassName = parentStorageClass.name
193 else:
194 self._parentStorageClassName = parentStorageClass
196 # Ensure that parent storage class is specified when we have
197 # a component and is not specified when we don't
198 _, componentName = self.splitDatasetTypeName(self._name)
199 if parentStorageClass is None and componentName is not None:
200 raise ValueError(f"Component dataset type '{self._name}' constructed without parent"
201 " storage class")
202 if parentStorageClass is not None and componentName is None:
203 raise ValueError(f"Parent storage class specified by {self._name} is not a composite")
204 self._isCalibration = isCalibration
206 def __repr__(self) -> str:
207 extra = ""
208 if self._parentStorageClassName:
209 extra = f", parentStorageClass={self._parentStorageClassName}"
210 if self._isCalibration:
211 extra += ", isCalibration=True"
212 return f"DatasetType({self.name!r}, {self.dimensions}, {self._storageClassName}{extra})"
214 def __eq__(self, other: Any) -> bool:
215 if not isinstance(other, type(self)):
216 return False
217 if self._name != other._name:
218 return False
219 if self._dimensions != other._dimensions:
220 return False
221 if self._storageClass is not None and other._storageClass is not None:
222 if self._storageClass != other._storageClass:
223 return False
224 else:
225 if self._storageClassName != other._storageClassName:
226 return False
227 if self._isCalibration != other._isCalibration:
228 return False
229 if self._parentStorageClass is not None and other._parentStorageClass is not None:
230 return self._parentStorageClass == other._parentStorageClass
231 else:
232 return self._parentStorageClassName == other._parentStorageClassName
234 def __hash__(self) -> int:
235 """Hash DatasetType instance.
237 This only uses StorageClass name which is it consistent with the
238 implementation of StorageClass hash method.
239 """
240 return hash((self._name, self._dimensions, self._storageClassName,
241 self._parentStorageClassName))
243 def __lt__(self, other: Any) -> bool:
244 """Sort using the dataset type name."""
245 if not isinstance(other, type(self)):
246 return NotImplemented
247 return self.name < other.name
249 @property
250 def name(self) -> str:
251 """Return a string name for the Dataset.
253 Mmust correspond to the same `DatasetType` across all Registries.
254 """
255 return self._name
257 @property
258 def dimensions(self) -> DimensionGraph:
259 r"""Return the `Dimension`\ s fir this dataset type.
261 The dimensions label and relate instances of this
262 `DatasetType` (`DimensionGraph`).
263 """
264 return self._dimensions
266 @property
267 def storageClass(self) -> StorageClass:
268 """Return `StorageClass` instance associated with this dataset type.
270 The `StorageClass` defines how this `DatasetType`
271 is persisted. Note that if DatasetType was constructed with a name
272 of a StorageClass then Butler has to be initialized before using
273 this property.
274 """
275 if self._storageClass is None:
276 self._storageClass = StorageClassFactory().getStorageClass(self._storageClassName)
277 return self._storageClass
279 @property
280 def parentStorageClass(self) -> Optional[StorageClass]:
281 """Return the storage class of the composite containing this component.
283 Note that if DatasetType was constructed with a name of a
284 StorageClass then Butler has to be initialized before using this
285 property. Can be `None` if this is not a component of a composite.
286 Must be defined if this is a component.
287 """
288 if self._parentStorageClass is None and self._parentStorageClassName is None:
289 return None
290 if self._parentStorageClass is None and self._parentStorageClassName is not None:
291 self._parentStorageClass = StorageClassFactory().getStorageClass(self._parentStorageClassName)
292 return self._parentStorageClass
294 def isCalibration(self) -> bool:
295 """Return if datasets of this type can be in calibration collections.
297 Returns
298 -------
299 flag : `bool`
300 `True` if datasets of this type may be included in calibration
301 collections.
302 """
303 return self._isCalibration
305 def finalizeParentStorageClass(self, newParent: StorageClass) -> None:
306 """Finalize the parent storage class definition.
308 Replaces the current placeholder parent storage class with
309 the real parent.
311 Parameters
312 ----------
313 newParent : `StorageClass`
314 The new parent to be associated with this composite dataset
315 type. This replaces the temporary placeholder parent that
316 was specified during construction.
318 Raises
319 ------
320 ValueError
321 Raised if this dataset type is not a component of a composite.
322 Raised if a StorageClass is not given.
323 Raised if the parent currently associated with the dataset
324 type is not a placeholder.
325 """
326 if not self.isComponent():
327 raise ValueError("Can not set a parent storage class if this is not a component"
328 f" ({self.name})")
329 if self._parentStorageClass != self.PlaceholderParentStorageClass:
330 raise ValueError(f"This DatasetType has a parent of {self._parentStorageClassName} and"
331 " is not a placeholder.")
332 if not isinstance(newParent, StorageClass):
333 raise ValueError(f"Supplied parent must be a StorageClass. Got {newParent!r}")
334 self._parentStorageClass = newParent
335 self._parentStorageClassName = newParent.name
337 @staticmethod
338 def splitDatasetTypeName(datasetTypeName: str) -> Tuple[str, Optional[str]]:
339 """Return the root name and the component from a composite name.
341 Parameters
342 ----------
343 datasetTypeName : `str`
344 The name of the dataset type, can include a component using
345 a "."-separator.
347 Returns
348 -------
349 rootName : `str`
350 Root name without any components.
351 componentName : `str`
352 The component if it has been specified, else `None`.
354 Notes
355 -----
356 If the dataset type name is ``a.b.c`` this method will return a
357 root name of ``a`` and a component name of ``b.c``.
358 """
359 comp = None
360 root = datasetTypeName
361 if "." in root:
362 # If there is doubt, the component is after the first "."
363 root, comp = root.split(".", maxsplit=1)
364 return root, comp
366 def nameAndComponent(self) -> Tuple[str, Optional[str]]:
367 """Return the root name of this dataset type and any component.
369 Returns
370 -------
371 rootName : `str`
372 Root name for this `DatasetType` without any components.
373 componentName : `str`
374 The component if it has been specified, else `None`.
375 """
376 return self.splitDatasetTypeName(self.name)
378 def component(self) -> Optional[str]:
379 """Return the component name (if defined).
381 Returns
382 -------
383 comp : `str`
384 Name of component part of DatasetType name. `None` if this
385 `DatasetType` is not associated with a component.
386 """
387 _, comp = self.nameAndComponent()
388 return comp
390 def componentTypeName(self, component: str) -> str:
391 """Derive a component dataset type from a composite.
393 Parameters
394 ----------
395 component : `str`
396 Name of component
398 Returns
399 -------
400 derived : `str`
401 Compound name of this `DatasetType` and the component.
403 Raises
404 ------
405 KeyError
406 Requested component is not supported by this `DatasetType`.
407 """
408 if component in self.storageClass.allComponents():
409 return self.nameWithComponent(self.name, component)
410 raise KeyError("Requested component ({}) not understood by this DatasetType".format(component))
412 def makeCompositeDatasetType(self) -> DatasetType:
413 """Return a composite dataset type from the component.
415 Returns
416 -------
417 composite : `DatasetType`
418 The composite dataset type.
420 Raises
421 ------
422 RuntimeError
423 Raised if this dataset type is not a component dataset type.
424 """
425 if not self.isComponent():
426 raise RuntimeError(f"DatasetType {self.name} must be a component to form the composite")
427 composite_name, _ = self.nameAndComponent()
428 if self.parentStorageClass is None:
429 raise ValueError("Parent storage class is not set. "
430 f"Unable to create composite type from {self.name}")
431 return DatasetType(composite_name, dimensions=self.dimensions,
432 storageClass=self.parentStorageClass)
434 def makeComponentDatasetType(self, component: str) -> DatasetType:
435 """Return a component dataset type from a composite.
437 Assumes the same dimensions as the parent.
439 Parameters
440 ----------
441 component : `str`
442 Name of component
444 Returns
445 -------
446 datasetType : `DatasetType`
447 A new DatasetType instance.
448 """
449 # The component could be a read/write or read component
450 return DatasetType(self.componentTypeName(component), dimensions=self.dimensions,
451 storageClass=self.storageClass.allComponents()[component],
452 parentStorageClass=self.storageClass)
454 def makeAllComponentDatasetTypes(self) -> List[DatasetType]:
455 """Return all component dataset types for this composite.
457 Returns
458 -------
459 all : `list` of `DatasetType`
460 All the component dataset types. If this is not a composite
461 then returns an empty list.
462 """
463 return [self.makeComponentDatasetType(componentName)
464 for componentName in self.storageClass.allComponents()]
466 def isComponent(self) -> bool:
467 """Return whether this `DatasetType` refers to a component.
469 Returns
470 -------
471 isComponent : `bool`
472 `True` if this `DatasetType` is a component, `False` otherwise.
473 """
474 if self.component():
475 return True
476 return False
478 def isComposite(self) -> bool:
479 """Return whether this `DatasetType` is a composite.
481 Returns
482 -------
483 isComposite : `bool`
484 `True` if this `DatasetType` is a composite type, `False`
485 otherwise.
486 """
487 return self.storageClass.isComposite()
489 def _lookupNames(self) -> Tuple[LookupKey, ...]:
490 """Return name keys to use for lookups in configurations.
492 The names are returned in order of priority.
494 Returns
495 -------
496 names : `tuple` of `LookupKey`
497 Tuple of the `DatasetType` name and the `StorageClass` name.
498 If the name includes a component the name with the component
499 is first, then the name without the component and finally
500 the storage class name.
501 """
502 rootName, componentName = self.nameAndComponent()
503 lookups: Tuple[LookupKey, ...] = (LookupKey(name=self.name),)
504 if componentName is not None:
505 lookups = lookups + (LookupKey(name=rootName),)
507 if self.dimensions:
508 # Dimensions are a lower priority than dataset type name
509 lookups = lookups + (LookupKey(dimensions=self.dimensions),)
511 return lookups + self.storageClass._lookupNames()
513 def to_simple(self, minimal: bool = False) -> SerializedDatasetType:
514 """Convert this class to a simple python type.
516 This makes it suitable for serialization.
518 Parameters
519 ----------
520 minimal : `bool`, optional
521 Use minimal serialization. Requires Registry to convert
522 back to a full type.
524 Returns
525 -------
526 simple : `SerializedDatasetType`
527 The object converted to a class suitable for serialization.
528 """
529 as_dict: Dict[str, Any]
530 if minimal:
531 # Only needs the name.
532 as_dict = {"name": self.name}
533 else:
534 # Convert to a dict form
535 as_dict = {"name": self.name,
536 "storageClass": self._storageClassName,
537 "isCalibration": self._isCalibration,
538 "dimensions": self.dimensions.to_simple(),
539 }
541 if self._parentStorageClassName is not None:
542 as_dict["parentStorageClass"] = self._parentStorageClassName
543 return SerializedDatasetType(**as_dict)
545 @classmethod
546 def from_simple(cls, simple: SerializedDatasetType,
547 universe: Optional[DimensionUniverse] = None,
548 registry: Optional[Registry] = None) -> DatasetType:
549 """Construct a new object from the simplified form.
551 This is usally data returned from the `to_simple` method.
553 Parameters
554 ----------
555 simple : `SerializedDatasetType`
556 The value returned by `to_simple()`.
557 universe : `DimensionUniverse`
558 The special graph of all known dimensions of which this graph will
559 be a subset. Can be `None` if a registry is provided.
560 registry : `lsst.daf.butler.Registry`, optional
561 Registry to use to convert simple name of a DatasetType to
562 a full `DatasetType`. Can be `None` if a full description of
563 the type is provided along with a universe.
565 Returns
566 -------
567 datasetType : `DatasetType`
568 Newly-constructed object.
569 """
570 if simple.storageClass is None:
571 # Treat this as minimalist representation
572 if registry is None:
573 raise ValueError(f"Unable to convert a DatasetType name '{simple}' to DatasetType"
574 " without a Registry")
575 return registry.getDatasetType(simple.name)
577 if universe is None and registry is None:
578 raise ValueError("One of universe or registry must be provided.")
580 if universe is None and registry is not None:
581 # registry should not be none by now but test helps mypy
582 universe = registry.dimensions
584 if universe is None:
585 # this is for mypy
586 raise ValueError("Unable to determine a usable universe")
588 if simple.dimensions is None:
589 # mypy hint
590 raise ValueError(f"Dimensions must be specified in {simple}")
592 return cls(name=simple.name,
593 dimensions=DimensionGraph.from_simple(simple.dimensions, universe=universe),
594 storageClass=simple.storageClass,
595 isCalibration=simple.isCalibration,
596 parentStorageClass=simple.parentStorageClass,
597 universe=universe)
599 to_json = to_json_pydantic
600 from_json = classmethod(from_json_pydantic)
602 def __reduce__(self) -> Tuple[Callable, Tuple[Type[DatasetType],
603 Tuple[str, DimensionGraph, str, Optional[str]],
604 Dict[str, bool]]]:
605 """Support pickling.
607 StorageClass instances can not normally be pickled, so we pickle
608 StorageClass name instead of instance.
609 """
610 return _unpickle_via_factory, (self.__class__, (self.name, self.dimensions, self._storageClassName,
611 self._parentStorageClassName),
612 {"isCalibration": self._isCalibration})
614 def __deepcopy__(self, memo: Any) -> DatasetType:
615 """Support for deep copy method.
617 Normally ``deepcopy`` will use pickle mechanism to make copies.
618 We want to avoid that to support (possibly degenerate) use case when
619 DatasetType is constructed with StorageClass instance which is not
620 registered with StorageClassFactory (this happens in unit tests).
621 Instead we re-implement ``__deepcopy__`` method.
622 """
623 return DatasetType(name=deepcopy(self.name, memo),
624 dimensions=deepcopy(self.dimensions, memo),
625 storageClass=deepcopy(self._storageClass or self._storageClassName, memo),
626 parentStorageClass=deepcopy(self._parentStorageClass
627 or self._parentStorageClassName, memo),
628 isCalibration=deepcopy(self._isCalibration, memo))
631def _unpickle_via_factory(factory: Callable, args: Any, kwargs: Any) -> DatasetType:
632 """Unpickle something by calling a factory.
634 Allows subclasses to unpickle using `__reduce__` with keyword
635 arguments as well as positional arguments.
636 """
637 return factory(*args, **kwargs)