Coverage for python/lsst/daf/butler/core/datasets/type.py: 21%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ["DatasetType", "SerializedDatasetType"]
26from copy import deepcopy
27import re
29from types import MappingProxyType
31from typing import (
32 TYPE_CHECKING,
33 Any,
34 Callable,
35 Dict,
36 Iterable,
37 List,
38 Mapping,
39 Optional,
40 Tuple,
41 Type,
42 Union,
43)
45from pydantic import BaseModel, StrictStr, StrictBool
47from ..storageClass import StorageClass, StorageClassFactory
48from ..dimensions import DimensionGraph, SerializedDimensionGraph
49from ..configSupport import LookupKey
50from ..json import from_json_pydantic, to_json_pydantic
52if TYPE_CHECKING: 52 ↛ 53line 52 didn't jump to line 53, because the condition on line 52 was never true
53 from ..dimensions import Dimension, DimensionUniverse
54 from ...registry import Registry
57def _safeMakeMappingProxyType(data: Optional[Mapping]) -> Mapping:
58 if data is None:
59 data = {}
60 return MappingProxyType(data)
63class SerializedDatasetType(BaseModel):
64 """Simplified model of a `DatasetType` suitable for serialization."""
66 name: StrictStr
67 storageClass: Optional[StrictStr] = None
68 dimensions: Optional[SerializedDimensionGraph] = None
69 parentStorageClass: Optional[StrictStr] = None
70 isCalibration: StrictBool = False
73class DatasetType:
74 r"""A named category of Datasets.
76 Defines how they are organized, related, and stored.
78 A concrete, final class whose instances represent `DatasetType`\ s.
79 `DatasetType` instances may be constructed without a `Registry`,
80 but they must be registered
81 via `Registry.registerDatasetType()` before corresponding Datasets
82 may be added.
83 `DatasetType` instances are immutable.
85 Parameters
86 ----------
87 name : `str`
88 A string name for the Dataset; must correspond to the same
89 `DatasetType` across all Registries. Names must start with an
90 upper or lowercase letter, and may contain only letters, numbers,
91 and underscores. Component dataset types should contain a single
92 period separating the base dataset type name from the component name
93 (and may be recursive).
94 dimensions : `DimensionGraph` or iterable of `Dimension`
95 Dimensions used to label and relate instances of this `DatasetType`.
96 If not a `DimensionGraph`, ``universe`` must be provided as well.
97 storageClass : `StorageClass` or `str`
98 Instance of a `StorageClass` or name of `StorageClass` that defines
99 how this `DatasetType` is persisted.
100 parentStorageClass : `StorageClass` or `str`, optional
101 Instance of a `StorageClass` or name of `StorageClass` that defines
102 how the composite parent is persisted. Must be `None` if this
103 is not a component.
104 universe : `DimensionUniverse`, optional
105 Set of all known dimensions, used to normalize ``dimensions`` if it
106 is not already a `DimensionGraph`.
107 isCalibration : `bool`, optional
108 If `True`, this dataset type may be included in
109 `~CollectionType.CALIBRATION` collections.
111 See Also
112 --------
113 :ref:`daf_butler_organizing_datasets`
114 """
116 __slots__ = ("_name", "_dimensions", "_storageClass", "_storageClassName",
117 "_parentStorageClass", "_parentStorageClassName",
118 "_isCalibration")
120 _serializedType = SerializedDatasetType
122 VALID_NAME_REGEX = re.compile("^[a-zA-Z][a-zA-Z0-9_]*(\\.[a-zA-Z][a-zA-Z0-9_]*)*$")
124 @staticmethod
125 def nameWithComponent(datasetTypeName: str, componentName: str) -> str:
126 """Form a valid DatasetTypeName from a parent and component.
128 No validation is performed.
130 Parameters
131 ----------
132 datasetTypeName : `str`
133 Base type name.
134 componentName : `str`
135 Name of component.
137 Returns
138 -------
139 compTypeName : `str`
140 Name to use for component DatasetType.
141 """
142 return "{}.{}".format(datasetTypeName, componentName)
144 def __init__(self, name: str, dimensions: Union[DimensionGraph, Iterable[Dimension]],
145 storageClass: Union[StorageClass, str],
146 parentStorageClass: Optional[Union[StorageClass, str]] = None, *,
147 universe: Optional[DimensionUniverse] = None,
148 isCalibration: bool = False):
149 if self.VALID_NAME_REGEX.match(name) is None:
150 raise ValueError(f"DatasetType name '{name}' is invalid.")
151 self._name = name
152 if not isinstance(dimensions, DimensionGraph):
153 if universe is None:
154 raise ValueError("If dimensions is not a normalized DimensionGraph, "
155 "a universe must be provided.")
156 dimensions = universe.extract(dimensions)
157 self._dimensions = dimensions
158 if name in self._dimensions.universe.getGovernorDimensions().names:
159 raise ValueError(f"Governor dimension name {name} cannot be used as a dataset type name.")
160 if not isinstance(storageClass, (StorageClass, str)):
161 raise ValueError("StorageClass argument must be StorageClass or str. "
162 f"Got {storageClass}")
163 self._storageClass: Optional[StorageClass]
164 if isinstance(storageClass, StorageClass):
165 self._storageClass = storageClass
166 self._storageClassName = storageClass.name
167 else:
168 self._storageClass = None
169 self._storageClassName = storageClass
171 self._parentStorageClass: Optional[StorageClass] = None
172 self._parentStorageClassName: Optional[str] = None
173 if parentStorageClass is not None:
174 if not isinstance(storageClass, (StorageClass, str)):
175 raise ValueError("Parent StorageClass argument must be StorageClass or str. "
176 f"Got {parentStorageClass}")
178 # Only allowed for a component dataset type
179 _, componentName = self.splitDatasetTypeName(self._name)
180 if componentName is None:
181 raise ValueError("Can not specify a parent storage class if this is not a component"
182 f" ({self._name})")
183 if isinstance(parentStorageClass, StorageClass):
184 self._parentStorageClass = parentStorageClass
185 self._parentStorageClassName = parentStorageClass.name
186 else:
187 self._parentStorageClassName = parentStorageClass
189 # Ensure that parent storage class is specified when we have
190 # a component and is not specified when we don't
191 _, componentName = self.splitDatasetTypeName(self._name)
192 if parentStorageClass is None and componentName is not None:
193 raise ValueError(f"Component dataset type '{self._name}' constructed without parent"
194 " storage class")
195 if parentStorageClass is not None and componentName is None:
196 raise ValueError(f"Parent storage class specified by {self._name} is not a composite")
197 self._isCalibration = isCalibration
199 def __repr__(self) -> str:
200 extra = ""
201 if self._parentStorageClassName:
202 extra = f", parentStorageClass={self._parentStorageClassName}"
203 if self._isCalibration:
204 extra += ", isCalibration=True"
205 return f"DatasetType({self.name!r}, {self.dimensions}, {self._storageClassName}{extra})"
207 def __eq__(self, other: Any) -> bool:
208 if not isinstance(other, type(self)):
209 return False
210 if self._name != other._name:
211 return False
212 if self._dimensions != other._dimensions:
213 return False
214 if self._storageClass is not None and other._storageClass is not None:
215 if self._storageClass != other._storageClass:
216 return False
217 else:
218 if self._storageClassName != other._storageClassName:
219 return False
220 if self._isCalibration != other._isCalibration:
221 return False
222 if self._parentStorageClass is not None and other._parentStorageClass is not None:
223 return self._parentStorageClass == other._parentStorageClass
224 else:
225 return self._parentStorageClassName == other._parentStorageClassName
227 def __hash__(self) -> int:
228 """Hash DatasetType instance.
230 This only uses StorageClass name which is it consistent with the
231 implementation of StorageClass hash method.
232 """
233 return hash((self._name, self._dimensions, self._storageClassName,
234 self._parentStorageClassName))
236 def __lt__(self, other: Any) -> bool:
237 """Sort using the dataset type name."""
238 if not isinstance(other, type(self)):
239 return NotImplemented
240 return self.name < other.name
242 @property
243 def name(self) -> str:
244 """Return a string name for the Dataset.
246 Mmust correspond to the same `DatasetType` across all Registries.
247 """
248 return self._name
250 @property
251 def dimensions(self) -> DimensionGraph:
252 r"""Return the `Dimension`\ s fir this dataset type.
254 The dimensions label and relate instances of this
255 `DatasetType` (`DimensionGraph`).
256 """
257 return self._dimensions
259 @property
260 def storageClass(self) -> StorageClass:
261 """Return `StorageClass` instance associated with this dataset type.
263 The `StorageClass` defines how this `DatasetType`
264 is persisted. Note that if DatasetType was constructed with a name
265 of a StorageClass then Butler has to be initialized before using
266 this property.
267 """
268 if self._storageClass is None:
269 self._storageClass = StorageClassFactory().getStorageClass(self._storageClassName)
270 return self._storageClass
272 @property
273 def parentStorageClass(self) -> Optional[StorageClass]:
274 """Return the storage class of the composite containing this component.
276 Note that if DatasetType was constructed with a name of a
277 StorageClass then Butler has to be initialized before using this
278 property. Can be `None` if this is not a component of a composite.
279 Must be defined if this is a component.
280 """
281 if self._parentStorageClass is None and self._parentStorageClassName is None:
282 return None
283 if self._parentStorageClass is None and self._parentStorageClassName is not None:
284 self._parentStorageClass = StorageClassFactory().getStorageClass(self._parentStorageClassName)
285 return self._parentStorageClass
287 def isCalibration(self) -> bool:
288 """Return if datasets of this type can be in calibration collections.
290 Returns
291 -------
292 flag : `bool`
293 `True` if datasets of this type may be included in calibration
294 collections.
295 """
296 return self._isCalibration
298 @staticmethod
299 def splitDatasetTypeName(datasetTypeName: str) -> Tuple[str, Optional[str]]:
300 """Return the root name and the component from a composite name.
302 Parameters
303 ----------
304 datasetTypeName : `str`
305 The name of the dataset type, can include a component using
306 a "."-separator.
308 Returns
309 -------
310 rootName : `str`
311 Root name without any components.
312 componentName : `str`
313 The component if it has been specified, else `None`.
315 Notes
316 -----
317 If the dataset type name is ``a.b.c`` this method will return a
318 root name of ``a`` and a component name of ``b.c``.
319 """
320 comp = None
321 root = datasetTypeName
322 if "." in root:
323 # If there is doubt, the component is after the first "."
324 root, comp = root.split(".", maxsplit=1)
325 return root, comp
327 def nameAndComponent(self) -> Tuple[str, Optional[str]]:
328 """Return the root name of this dataset type and any component.
330 Returns
331 -------
332 rootName : `str`
333 Root name for this `DatasetType` without any components.
334 componentName : `str`
335 The component if it has been specified, else `None`.
336 """
337 return self.splitDatasetTypeName(self.name)
339 def component(self) -> Optional[str]:
340 """Return the component name (if defined).
342 Returns
343 -------
344 comp : `str`
345 Name of component part of DatasetType name. `None` if this
346 `DatasetType` is not associated with a component.
347 """
348 _, comp = self.nameAndComponent()
349 return comp
351 def componentTypeName(self, component: str) -> str:
352 """Derive a component dataset type from a composite.
354 Parameters
355 ----------
356 component : `str`
357 Name of component
359 Returns
360 -------
361 derived : `str`
362 Compound name of this `DatasetType` and the component.
364 Raises
365 ------
366 KeyError
367 Requested component is not supported by this `DatasetType`.
368 """
369 if component in self.storageClass.allComponents():
370 return self.nameWithComponent(self.name, component)
371 raise KeyError("Requested component ({}) not understood by this DatasetType".format(component))
373 def makeCompositeDatasetType(self) -> DatasetType:
374 """Return a composite dataset type from the component.
376 Returns
377 -------
378 composite : `DatasetType`
379 The composite dataset type.
381 Raises
382 ------
383 RuntimeError
384 Raised if this dataset type is not a component dataset type.
385 """
386 if not self.isComponent():
387 raise RuntimeError(f"DatasetType {self.name} must be a component to form the composite")
388 composite_name, _ = self.nameAndComponent()
389 if self.parentStorageClass is None:
390 raise ValueError("Parent storage class is not set. "
391 f"Unable to create composite type from {self.name}")
392 return DatasetType(composite_name, dimensions=self.dimensions,
393 storageClass=self.parentStorageClass)
395 def makeComponentDatasetType(self, component: str) -> DatasetType:
396 """Return a component dataset type from a composite.
398 Assumes the same dimensions as the parent.
400 Parameters
401 ----------
402 component : `str`
403 Name of component
405 Returns
406 -------
407 datasetType : `DatasetType`
408 A new DatasetType instance.
409 """
410 # The component could be a read/write or read component
411 return DatasetType(self.componentTypeName(component), dimensions=self.dimensions,
412 storageClass=self.storageClass.allComponents()[component],
413 parentStorageClass=self.storageClass)
415 def makeAllComponentDatasetTypes(self) -> List[DatasetType]:
416 """Return all component dataset types for this composite.
418 Returns
419 -------
420 all : `list` of `DatasetType`
421 All the component dataset types. If this is not a composite
422 then returns an empty list.
423 """
424 return [self.makeComponentDatasetType(componentName)
425 for componentName in self.storageClass.allComponents()]
427 def isComponent(self) -> bool:
428 """Return whether this `DatasetType` refers to a component.
430 Returns
431 -------
432 isComponent : `bool`
433 `True` if this `DatasetType` is a component, `False` otherwise.
434 """
435 if self.component():
436 return True
437 return False
439 def isComposite(self) -> bool:
440 """Return whether this `DatasetType` is a composite.
442 Returns
443 -------
444 isComposite : `bool`
445 `True` if this `DatasetType` is a composite type, `False`
446 otherwise.
447 """
448 return self.storageClass.isComposite()
450 def _lookupNames(self) -> Tuple[LookupKey, ...]:
451 """Return name keys to use for lookups in configurations.
453 The names are returned in order of priority.
455 Returns
456 -------
457 names : `tuple` of `LookupKey`
458 Tuple of the `DatasetType` name and the `StorageClass` name.
459 If the name includes a component the name with the component
460 is first, then the name without the component and finally
461 the storage class name and the storage class name of the
462 composite.
463 """
464 rootName, componentName = self.nameAndComponent()
465 lookups: Tuple[LookupKey, ...] = (LookupKey(name=self.name),)
466 if componentName is not None:
467 lookups = lookups + (LookupKey(name=rootName),)
469 if self.dimensions:
470 # Dimensions are a lower priority than dataset type name
471 lookups = lookups + (LookupKey(dimensions=self.dimensions),)
473 storageClasses = self.storageClass._lookupNames()
474 if componentName is not None and self.parentStorageClass is not None:
475 storageClasses += self.parentStorageClass._lookupNames()
477 return lookups + storageClasses
479 def to_simple(self, minimal: bool = False) -> SerializedDatasetType:
480 """Convert this class to a simple python type.
482 This makes it suitable for serialization.
484 Parameters
485 ----------
486 minimal : `bool`, optional
487 Use minimal serialization. Requires Registry to convert
488 back to a full type.
490 Returns
491 -------
492 simple : `SerializedDatasetType`
493 The object converted to a class suitable for serialization.
494 """
495 as_dict: Dict[str, Any]
496 if minimal:
497 # Only needs the name.
498 as_dict = {"name": self.name}
499 else:
500 # Convert to a dict form
501 as_dict = {"name": self.name,
502 "storageClass": self._storageClassName,
503 "isCalibration": self._isCalibration,
504 "dimensions": self.dimensions.to_simple(),
505 }
507 if self._parentStorageClassName is not None:
508 as_dict["parentStorageClass"] = self._parentStorageClassName
509 return SerializedDatasetType(**as_dict)
511 @classmethod
512 def from_simple(cls, simple: SerializedDatasetType,
513 universe: Optional[DimensionUniverse] = None,
514 registry: Optional[Registry] = None) -> DatasetType:
515 """Construct a new object from the simplified form.
517 This is usally data returned from the `to_simple` method.
519 Parameters
520 ----------
521 simple : `SerializedDatasetType`
522 The value returned by `to_simple()`.
523 universe : `DimensionUniverse`
524 The special graph of all known dimensions of which this graph will
525 be a subset. Can be `None` if a registry is provided.
526 registry : `lsst.daf.butler.Registry`, optional
527 Registry to use to convert simple name of a DatasetType to
528 a full `DatasetType`. Can be `None` if a full description of
529 the type is provided along with a universe.
531 Returns
532 -------
533 datasetType : `DatasetType`
534 Newly-constructed object.
535 """
536 if simple.storageClass is None:
537 # Treat this as minimalist representation
538 if registry is None:
539 raise ValueError(f"Unable to convert a DatasetType name '{simple}' to DatasetType"
540 " without a Registry")
541 return registry.getDatasetType(simple.name)
543 if universe is None and registry is None:
544 raise ValueError("One of universe or registry must be provided.")
546 if universe is None and registry is not None:
547 # registry should not be none by now but test helps mypy
548 universe = registry.dimensions
550 if universe is None:
551 # this is for mypy
552 raise ValueError("Unable to determine a usable universe")
554 if simple.dimensions is None:
555 # mypy hint
556 raise ValueError(f"Dimensions must be specified in {simple}")
558 return cls(name=simple.name,
559 dimensions=DimensionGraph.from_simple(simple.dimensions, universe=universe),
560 storageClass=simple.storageClass,
561 isCalibration=simple.isCalibration,
562 parentStorageClass=simple.parentStorageClass,
563 universe=universe)
565 to_json = to_json_pydantic
566 from_json = classmethod(from_json_pydantic)
568 def __reduce__(self) -> Tuple[Callable, Tuple[Type[DatasetType],
569 Tuple[str, DimensionGraph, str, Optional[str]],
570 Dict[str, bool]]]:
571 """Support pickling.
573 StorageClass instances can not normally be pickled, so we pickle
574 StorageClass name instead of instance.
575 """
576 return _unpickle_via_factory, (self.__class__, (self.name, self.dimensions, self._storageClassName,
577 self._parentStorageClassName),
578 {"isCalibration": self._isCalibration})
580 def __deepcopy__(self, memo: Any) -> DatasetType:
581 """Support for deep copy method.
583 Normally ``deepcopy`` will use pickle mechanism to make copies.
584 We want to avoid that to support (possibly degenerate) use case when
585 DatasetType is constructed with StorageClass instance which is not
586 registered with StorageClassFactory (this happens in unit tests).
587 Instead we re-implement ``__deepcopy__`` method.
588 """
589 return DatasetType(name=deepcopy(self.name, memo),
590 dimensions=deepcopy(self.dimensions, memo),
591 storageClass=deepcopy(self._storageClass or self._storageClassName, memo),
592 parentStorageClass=deepcopy(self._parentStorageClass
593 or self._parentStorageClassName, memo),
594 isCalibration=deepcopy(self._isCalibration, memo))
597def _unpickle_via_factory(factory: Callable, args: Any, kwargs: Any) -> DatasetType:
598 """Unpickle something by calling a factory.
600 Allows subclasses to unpickle using `__reduce__` with keyword
601 arguments as well as positional arguments.
602 """
603 return factory(*args, **kwargs)