Coverage for python/lsst/daf/butler/core/datasets/type.py : 21%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ["DatasetType", "SerializedDatasetType"]
26from copy import deepcopy
27import re
29from types import MappingProxyType
31from typing import (
32 TYPE_CHECKING,
33 Any,
34 Callable,
35 Dict,
36 Iterable,
37 List,
38 Mapping,
39 Optional,
40 Tuple,
41 Type,
42 Union,
43)
45from pydantic import BaseModel, StrictStr, StrictBool
47from ..storageClass import StorageClass, StorageClassFactory
48from ..dimensions import DimensionGraph, SerializedDimensionGraph
49from ..configSupport import LookupKey
50from ..json import from_json_pydantic, to_json_pydantic
52if TYPE_CHECKING: 52 ↛ 53line 52 didn't jump to line 53, because the condition on line 52 was never true
53 from ..dimensions import Dimension, DimensionUniverse
54 from ...registry import Registry
57def _safeMakeMappingProxyType(data: Optional[Mapping]) -> Mapping:
58 if data is None:
59 data = {}
60 return MappingProxyType(data)
63class SerializedDatasetType(BaseModel):
64 """Simplified model of a `DatasetType` suitable for serialization."""
66 name: StrictStr
67 storageClass: Optional[StrictStr] = None
68 dimensions: Optional[SerializedDimensionGraph] = None
69 parentStorageClass: Optional[StrictStr] = None
70 isCalibration: StrictBool = False
73class DatasetType:
74 r"""A named category of Datasets.
76 Defines how they are organized, related, and stored.
78 A concrete, final class whose instances represent `DatasetType`\ s.
79 `DatasetType` instances may be constructed without a `Registry`,
80 but they must be registered
81 via `Registry.registerDatasetType()` before corresponding Datasets
82 may be added.
83 `DatasetType` instances are immutable.
85 Parameters
86 ----------
87 name : `str`
88 A string name for the Dataset; must correspond to the same
89 `DatasetType` across all Registries. Names must start with an
90 upper or lowercase letter, and may contain only letters, numbers,
91 and underscores. Component dataset types should contain a single
92 period separating the base dataset type name from the component name
93 (and may be recursive).
94 dimensions : `DimensionGraph` or iterable of `Dimension`
95 Dimensions used to label and relate instances of this `DatasetType`.
96 If not a `DimensionGraph`, ``universe`` must be provided as well.
97 storageClass : `StorageClass` or `str`
98 Instance of a `StorageClass` or name of `StorageClass` that defines
99 how this `DatasetType` is persisted.
100 parentStorageClass : `StorageClass` or `str`, optional
101 Instance of a `StorageClass` or name of `StorageClass` that defines
102 how the composite parent is persisted. Must be `None` if this
103 is not a component.
104 universe : `DimensionUniverse`, optional
105 Set of all known dimensions, used to normalize ``dimensions`` if it
106 is not already a `DimensionGraph`.
107 isCalibration : `bool`, optional
108 If `True`, this dataset type may be included in
109 `~CollectionType.CALIBRATION` collections.
110 """
112 __slots__ = ("_name", "_dimensions", "_storageClass", "_storageClassName",
113 "_parentStorageClass", "_parentStorageClassName",
114 "_isCalibration")
116 _serializedType = SerializedDatasetType
118 VALID_NAME_REGEX = re.compile("^[a-zA-Z][a-zA-Z0-9_]*(\\.[a-zA-Z][a-zA-Z0-9_]*)*$")
120 @staticmethod
121 def nameWithComponent(datasetTypeName: str, componentName: str) -> str:
122 """Form a valid DatasetTypeName from a parent and component.
124 No validation is performed.
126 Parameters
127 ----------
128 datasetTypeName : `str`
129 Base type name.
130 componentName : `str`
131 Name of component.
133 Returns
134 -------
135 compTypeName : `str`
136 Name to use for component DatasetType.
137 """
138 return "{}.{}".format(datasetTypeName, componentName)
140 def __init__(self, name: str, dimensions: Union[DimensionGraph, Iterable[Dimension]],
141 storageClass: Union[StorageClass, str],
142 parentStorageClass: Optional[Union[StorageClass, str]] = None, *,
143 universe: Optional[DimensionUniverse] = None,
144 isCalibration: bool = False):
145 if self.VALID_NAME_REGEX.match(name) is None:
146 raise ValueError(f"DatasetType name '{name}' is invalid.")
147 self._name = name
148 if not isinstance(dimensions, DimensionGraph):
149 if universe is None:
150 raise ValueError("If dimensions is not a normalized DimensionGraph, "
151 "a universe must be provided.")
152 dimensions = universe.extract(dimensions)
153 self._dimensions = dimensions
154 if name in self._dimensions.universe.getGovernorDimensions().names:
155 raise ValueError(f"Governor dimension name {name} cannot be used as a dataset type name.")
156 if not isinstance(storageClass, (StorageClass, str)):
157 raise ValueError("StorageClass argument must be StorageClass or str. "
158 f"Got {storageClass}")
159 self._storageClass: Optional[StorageClass]
160 if isinstance(storageClass, StorageClass):
161 self._storageClass = storageClass
162 self._storageClassName = storageClass.name
163 else:
164 self._storageClass = None
165 self._storageClassName = storageClass
167 self._parentStorageClass: Optional[StorageClass] = None
168 self._parentStorageClassName: Optional[str] = None
169 if parentStorageClass is not None:
170 if not isinstance(storageClass, (StorageClass, str)):
171 raise ValueError("Parent StorageClass argument must be StorageClass or str. "
172 f"Got {parentStorageClass}")
174 # Only allowed for a component dataset type
175 _, componentName = self.splitDatasetTypeName(self._name)
176 if componentName is None:
177 raise ValueError("Can not specify a parent storage class if this is not a component"
178 f" ({self._name})")
179 if isinstance(parentStorageClass, StorageClass):
180 self._parentStorageClass = parentStorageClass
181 self._parentStorageClassName = parentStorageClass.name
182 else:
183 self._parentStorageClassName = parentStorageClass
185 # Ensure that parent storage class is specified when we have
186 # a component and is not specified when we don't
187 _, componentName = self.splitDatasetTypeName(self._name)
188 if parentStorageClass is None and componentName is not None:
189 raise ValueError(f"Component dataset type '{self._name}' constructed without parent"
190 " storage class")
191 if parentStorageClass is not None and componentName is None:
192 raise ValueError(f"Parent storage class specified by {self._name} is not a composite")
193 self._isCalibration = isCalibration
195 def __repr__(self) -> str:
196 extra = ""
197 if self._parentStorageClassName:
198 extra = f", parentStorageClass={self._parentStorageClassName}"
199 if self._isCalibration:
200 extra += ", isCalibration=True"
201 return f"DatasetType({self.name!r}, {self.dimensions}, {self._storageClassName}{extra})"
203 def __eq__(self, other: Any) -> bool:
204 if not isinstance(other, type(self)):
205 return False
206 if self._name != other._name:
207 return False
208 if self._dimensions != other._dimensions:
209 return False
210 if self._storageClass is not None and other._storageClass is not None:
211 if self._storageClass != other._storageClass:
212 return False
213 else:
214 if self._storageClassName != other._storageClassName:
215 return False
216 if self._isCalibration != other._isCalibration:
217 return False
218 if self._parentStorageClass is not None and other._parentStorageClass is not None:
219 return self._parentStorageClass == other._parentStorageClass
220 else:
221 return self._parentStorageClassName == other._parentStorageClassName
223 def __hash__(self) -> int:
224 """Hash DatasetType instance.
226 This only uses StorageClass name which is it consistent with the
227 implementation of StorageClass hash method.
228 """
229 return hash((self._name, self._dimensions, self._storageClassName,
230 self._parentStorageClassName))
232 def __lt__(self, other: Any) -> bool:
233 """Sort using the dataset type name."""
234 if not isinstance(other, type(self)):
235 return NotImplemented
236 return self.name < other.name
238 @property
239 def name(self) -> str:
240 """Return a string name for the Dataset.
242 Mmust correspond to the same `DatasetType` across all Registries.
243 """
244 return self._name
246 @property
247 def dimensions(self) -> DimensionGraph:
248 r"""Return the `Dimension`\ s fir this dataset type.
250 The dimensions label and relate instances of this
251 `DatasetType` (`DimensionGraph`).
252 """
253 return self._dimensions
255 @property
256 def storageClass(self) -> StorageClass:
257 """Return `StorageClass` instance associated with this dataset type.
259 The `StorageClass` defines how this `DatasetType`
260 is persisted. Note that if DatasetType was constructed with a name
261 of a StorageClass then Butler has to be initialized before using
262 this property.
263 """
264 if self._storageClass is None:
265 self._storageClass = StorageClassFactory().getStorageClass(self._storageClassName)
266 return self._storageClass
268 @property
269 def parentStorageClass(self) -> Optional[StorageClass]:
270 """Return the storage class of the composite containing this component.
272 Note that if DatasetType was constructed with a name of a
273 StorageClass then Butler has to be initialized before using this
274 property. Can be `None` if this is not a component of a composite.
275 Must be defined if this is a component.
276 """
277 if self._parentStorageClass is None and self._parentStorageClassName is None:
278 return None
279 if self._parentStorageClass is None and self._parentStorageClassName is not None:
280 self._parentStorageClass = StorageClassFactory().getStorageClass(self._parentStorageClassName)
281 return self._parentStorageClass
283 def isCalibration(self) -> bool:
284 """Return if datasets of this type can be in calibration collections.
286 Returns
287 -------
288 flag : `bool`
289 `True` if datasets of this type may be included in calibration
290 collections.
291 """
292 return self._isCalibration
294 @staticmethod
295 def splitDatasetTypeName(datasetTypeName: str) -> Tuple[str, Optional[str]]:
296 """Return the root name and the component from a composite name.
298 Parameters
299 ----------
300 datasetTypeName : `str`
301 The name of the dataset type, can include a component using
302 a "."-separator.
304 Returns
305 -------
306 rootName : `str`
307 Root name without any components.
308 componentName : `str`
309 The component if it has been specified, else `None`.
311 Notes
312 -----
313 If the dataset type name is ``a.b.c`` this method will return a
314 root name of ``a`` and a component name of ``b.c``.
315 """
316 comp = None
317 root = datasetTypeName
318 if "." in root:
319 # If there is doubt, the component is after the first "."
320 root, comp = root.split(".", maxsplit=1)
321 return root, comp
323 def nameAndComponent(self) -> Tuple[str, Optional[str]]:
324 """Return the root name of this dataset type and any component.
326 Returns
327 -------
328 rootName : `str`
329 Root name for this `DatasetType` without any components.
330 componentName : `str`
331 The component if it has been specified, else `None`.
332 """
333 return self.splitDatasetTypeName(self.name)
335 def component(self) -> Optional[str]:
336 """Return the component name (if defined).
338 Returns
339 -------
340 comp : `str`
341 Name of component part of DatasetType name. `None` if this
342 `DatasetType` is not associated with a component.
343 """
344 _, comp = self.nameAndComponent()
345 return comp
347 def componentTypeName(self, component: str) -> str:
348 """Derive a component dataset type from a composite.
350 Parameters
351 ----------
352 component : `str`
353 Name of component
355 Returns
356 -------
357 derived : `str`
358 Compound name of this `DatasetType` and the component.
360 Raises
361 ------
362 KeyError
363 Requested component is not supported by this `DatasetType`.
364 """
365 if component in self.storageClass.allComponents():
366 return self.nameWithComponent(self.name, component)
367 raise KeyError("Requested component ({}) not understood by this DatasetType".format(component))
369 def makeCompositeDatasetType(self) -> DatasetType:
370 """Return a composite dataset type from the component.
372 Returns
373 -------
374 composite : `DatasetType`
375 The composite dataset type.
377 Raises
378 ------
379 RuntimeError
380 Raised if this dataset type is not a component dataset type.
381 """
382 if not self.isComponent():
383 raise RuntimeError(f"DatasetType {self.name} must be a component to form the composite")
384 composite_name, _ = self.nameAndComponent()
385 if self.parentStorageClass is None:
386 raise ValueError("Parent storage class is not set. "
387 f"Unable to create composite type from {self.name}")
388 return DatasetType(composite_name, dimensions=self.dimensions,
389 storageClass=self.parentStorageClass)
391 def makeComponentDatasetType(self, component: str) -> DatasetType:
392 """Return a component dataset type from a composite.
394 Assumes the same dimensions as the parent.
396 Parameters
397 ----------
398 component : `str`
399 Name of component
401 Returns
402 -------
403 datasetType : `DatasetType`
404 A new DatasetType instance.
405 """
406 # The component could be a read/write or read component
407 return DatasetType(self.componentTypeName(component), dimensions=self.dimensions,
408 storageClass=self.storageClass.allComponents()[component],
409 parentStorageClass=self.storageClass)
411 def makeAllComponentDatasetTypes(self) -> List[DatasetType]:
412 """Return all component dataset types for this composite.
414 Returns
415 -------
416 all : `list` of `DatasetType`
417 All the component dataset types. If this is not a composite
418 then returns an empty list.
419 """
420 return [self.makeComponentDatasetType(componentName)
421 for componentName in self.storageClass.allComponents()]
423 def isComponent(self) -> bool:
424 """Return whether this `DatasetType` refers to a component.
426 Returns
427 -------
428 isComponent : `bool`
429 `True` if this `DatasetType` is a component, `False` otherwise.
430 """
431 if self.component():
432 return True
433 return False
435 def isComposite(self) -> bool:
436 """Return whether this `DatasetType` is a composite.
438 Returns
439 -------
440 isComposite : `bool`
441 `True` if this `DatasetType` is a composite type, `False`
442 otherwise.
443 """
444 return self.storageClass.isComposite()
446 def _lookupNames(self) -> Tuple[LookupKey, ...]:
447 """Return name keys to use for lookups in configurations.
449 The names are returned in order of priority.
451 Returns
452 -------
453 names : `tuple` of `LookupKey`
454 Tuple of the `DatasetType` name and the `StorageClass` name.
455 If the name includes a component the name with the component
456 is first, then the name without the component and finally
457 the storage class name and the storage class name of the
458 composite.
459 """
460 rootName, componentName = self.nameAndComponent()
461 lookups: Tuple[LookupKey, ...] = (LookupKey(name=self.name),)
462 if componentName is not None:
463 lookups = lookups + (LookupKey(name=rootName),)
465 if self.dimensions:
466 # Dimensions are a lower priority than dataset type name
467 lookups = lookups + (LookupKey(dimensions=self.dimensions),)
469 storageClasses = self.storageClass._lookupNames()
470 if componentName is not None and self.parentStorageClass is not None:
471 storageClasses += self.parentStorageClass._lookupNames()
473 return lookups + storageClasses
475 def to_simple(self, minimal: bool = False) -> SerializedDatasetType:
476 """Convert this class to a simple python type.
478 This makes it suitable for serialization.
480 Parameters
481 ----------
482 minimal : `bool`, optional
483 Use minimal serialization. Requires Registry to convert
484 back to a full type.
486 Returns
487 -------
488 simple : `SerializedDatasetType`
489 The object converted to a class suitable for serialization.
490 """
491 as_dict: Dict[str, Any]
492 if minimal:
493 # Only needs the name.
494 as_dict = {"name": self.name}
495 else:
496 # Convert to a dict form
497 as_dict = {"name": self.name,
498 "storageClass": self._storageClassName,
499 "isCalibration": self._isCalibration,
500 "dimensions": self.dimensions.to_simple(),
501 }
503 if self._parentStorageClassName is not None:
504 as_dict["parentStorageClass"] = self._parentStorageClassName
505 return SerializedDatasetType(**as_dict)
507 @classmethod
508 def from_simple(cls, simple: SerializedDatasetType,
509 universe: Optional[DimensionUniverse] = None,
510 registry: Optional[Registry] = None) -> DatasetType:
511 """Construct a new object from the simplified form.
513 This is usally data returned from the `to_simple` method.
515 Parameters
516 ----------
517 simple : `SerializedDatasetType`
518 The value returned by `to_simple()`.
519 universe : `DimensionUniverse`
520 The special graph of all known dimensions of which this graph will
521 be a subset. Can be `None` if a registry is provided.
522 registry : `lsst.daf.butler.Registry`, optional
523 Registry to use to convert simple name of a DatasetType to
524 a full `DatasetType`. Can be `None` if a full description of
525 the type is provided along with a universe.
527 Returns
528 -------
529 datasetType : `DatasetType`
530 Newly-constructed object.
531 """
532 if simple.storageClass is None:
533 # Treat this as minimalist representation
534 if registry is None:
535 raise ValueError(f"Unable to convert a DatasetType name '{simple}' to DatasetType"
536 " without a Registry")
537 return registry.getDatasetType(simple.name)
539 if universe is None and registry is None:
540 raise ValueError("One of universe or registry must be provided.")
542 if universe is None and registry is not None:
543 # registry should not be none by now but test helps mypy
544 universe = registry.dimensions
546 if universe is None:
547 # this is for mypy
548 raise ValueError("Unable to determine a usable universe")
550 if simple.dimensions is None:
551 # mypy hint
552 raise ValueError(f"Dimensions must be specified in {simple}")
554 return cls(name=simple.name,
555 dimensions=DimensionGraph.from_simple(simple.dimensions, universe=universe),
556 storageClass=simple.storageClass,
557 isCalibration=simple.isCalibration,
558 parentStorageClass=simple.parentStorageClass,
559 universe=universe)
561 to_json = to_json_pydantic
562 from_json = classmethod(from_json_pydantic)
564 def __reduce__(self) -> Tuple[Callable, Tuple[Type[DatasetType],
565 Tuple[str, DimensionGraph, str, Optional[str]],
566 Dict[str, bool]]]:
567 """Support pickling.
569 StorageClass instances can not normally be pickled, so we pickle
570 StorageClass name instead of instance.
571 """
572 return _unpickle_via_factory, (self.__class__, (self.name, self.dimensions, self._storageClassName,
573 self._parentStorageClassName),
574 {"isCalibration": self._isCalibration})
576 def __deepcopy__(self, memo: Any) -> DatasetType:
577 """Support for deep copy method.
579 Normally ``deepcopy`` will use pickle mechanism to make copies.
580 We want to avoid that to support (possibly degenerate) use case when
581 DatasetType is constructed with StorageClass instance which is not
582 registered with StorageClassFactory (this happens in unit tests).
583 Instead we re-implement ``__deepcopy__`` method.
584 """
585 return DatasetType(name=deepcopy(self.name, memo),
586 dimensions=deepcopy(self.dimensions, memo),
587 storageClass=deepcopy(self._storageClass or self._storageClassName, memo),
588 parentStorageClass=deepcopy(self._parentStorageClass
589 or self._parentStorageClassName, memo),
590 isCalibration=deepcopy(self._isCalibration, memo))
593def _unpickle_via_factory(factory: Callable, args: Any, kwargs: Any) -> DatasetType:
594 """Unpickle something by calling a factory.
596 Allows subclasses to unpickle using `__reduce__` with keyword
597 arguments as well as positional arguments.
598 """
599 return factory(*args, **kwargs)