Coverage for python/lsst/daf/butler/core/datasets/type.py : 19%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ["DatasetType"]
26from copy import deepcopy
27import re
29from types import MappingProxyType
31from typing import (
32 TYPE_CHECKING,
33 Any,
34 Callable,
35 Dict,
36 Iterable,
37 List,
38 Mapping,
39 Optional,
40 Tuple,
41 Type,
42 Union,
43)
46from ..storageClass import StorageClass, StorageClassFactory
47from ..dimensions import DimensionGraph
48from ..configSupport import LookupKey
49from ..json import from_json_generic, to_json_generic
51if TYPE_CHECKING: 51 ↛ 52line 51 didn't jump to line 52, because the condition on line 51 was never true
52 from ..dimensions import Dimension, DimensionUniverse
53 from ...registry import Registry
56def _safeMakeMappingProxyType(data: Optional[Mapping]) -> Mapping:
57 if data is None:
58 data = {}
59 return MappingProxyType(data)
62class DatasetType:
63 r"""A named category of Datasets.
65 Defines how they are organized, related, and stored.
67 A concrete, final class whose instances represent `DatasetType`\ s.
68 `DatasetType` instances may be constructed without a `Registry`,
69 but they must be registered
70 via `Registry.registerDatasetType()` before corresponding Datasets
71 may be added.
72 `DatasetType` instances are immutable.
74 Parameters
75 ----------
76 name : `str`
77 A string name for the Dataset; must correspond to the same
78 `DatasetType` across all Registries. Names must start with an
79 upper or lowercase letter, and may contain only letters, numbers,
80 and underscores. Component dataset types should contain a single
81 period separating the base dataset type name from the component name
82 (and may be recursive).
83 dimensions : `DimensionGraph` or iterable of `Dimension`
84 Dimensions used to label and relate instances of this `DatasetType`.
85 If not a `DimensionGraph`, ``universe`` must be provided as well.
86 storageClass : `StorageClass` or `str`
87 Instance of a `StorageClass` or name of `StorageClass` that defines
88 how this `DatasetType` is persisted.
89 parentStorageClass : `StorageClass` or `str`, optional
90 Instance of a `StorageClass` or name of `StorageClass` that defines
91 how the composite parent is persisted. Must be `None` if this
92 is not a component. Mandatory if it is a component but can be the
93 special temporary placeholder
94 (`DatasetType.PlaceholderParentStorageClass`) to allow
95 construction with an intent to finalize later.
96 universe : `DimensionUniverse`, optional
97 Set of all known dimensions, used to normalize ``dimensions`` if it
98 is not already a `DimensionGraph`.
99 isCalibration : `bool`, optional
100 If `True`, this dataset type may be included in
101 `~CollectionType.CALIBRATION` collections.
102 """
104 __slots__ = ("_name", "_dimensions", "_storageClass", "_storageClassName",
105 "_parentStorageClass", "_parentStorageClassName",
106 "_isCalibration")
108 VALID_NAME_REGEX = re.compile("^[a-zA-Z][a-zA-Z0-9_]*(\\.[a-zA-Z][a-zA-Z0-9_]*)*$")
110 PlaceholderParentStorageClass = StorageClass("PlaceHolder")
111 """Placeholder StorageClass that can be used temporarily for a
112 component.
114 This can be useful in pipeline construction where we are creating
115 dataset types without a registry.
116 """
118 @staticmethod
119 def nameWithComponent(datasetTypeName: str, componentName: str) -> str:
120 """Form a valid DatasetTypeName from a parent and component.
122 No validation is performed.
124 Parameters
125 ----------
126 datasetTypeName : `str`
127 Base type name.
128 componentName : `str`
129 Name of component.
131 Returns
132 -------
133 compTypeName : `str`
134 Name to use for component DatasetType.
135 """
136 return "{}.{}".format(datasetTypeName, componentName)
138 def __init__(self, name: str, dimensions: Union[DimensionGraph, Iterable[Dimension]],
139 storageClass: Union[StorageClass, str],
140 parentStorageClass: Optional[Union[StorageClass, str]] = None, *,
141 universe: Optional[DimensionUniverse] = None,
142 isCalibration: bool = False):
143 if self.VALID_NAME_REGEX.match(name) is None:
144 raise ValueError(f"DatasetType name '{name}' is invalid.")
145 self._name = name
146 if not isinstance(dimensions, DimensionGraph):
147 if universe is None:
148 raise ValueError("If dimensions is not a normalized DimensionGraph, "
149 "a universe must be provided.")
150 dimensions = universe.extract(dimensions)
151 self._dimensions = dimensions
152 if name in self._dimensions.universe.getGovernorDimensions().names:
153 raise ValueError(f"Governor dimension name {name} cannot be used as a dataset type name.")
154 if not isinstance(storageClass, (StorageClass, str)):
155 raise ValueError("StorageClass argument must be StorageClass or str. "
156 f"Got {storageClass}")
157 self._storageClass: Optional[StorageClass]
158 if isinstance(storageClass, StorageClass):
159 self._storageClass = storageClass
160 self._storageClassName = storageClass.name
161 else:
162 self._storageClass = None
163 self._storageClassName = storageClass
165 self._parentStorageClass: Optional[StorageClass] = None
166 self._parentStorageClassName: Optional[str] = None
167 if parentStorageClass is not None:
168 if not isinstance(storageClass, (StorageClass, str)):
169 raise ValueError("Parent StorageClass argument must be StorageClass or str. "
170 f"Got {parentStorageClass}")
172 # Only allowed for a component dataset type
173 _, componentName = self.splitDatasetTypeName(self._name)
174 if componentName is None:
175 raise ValueError("Can not specify a parent storage class if this is not a component"
176 f" ({self._name})")
177 if isinstance(parentStorageClass, StorageClass):
178 self._parentStorageClass = parentStorageClass
179 self._parentStorageClassName = parentStorageClass.name
180 else:
181 self._parentStorageClassName = parentStorageClass
183 # Ensure that parent storage class is specified when we have
184 # a component and is not specified when we don't
185 _, componentName = self.splitDatasetTypeName(self._name)
186 if parentStorageClass is None and componentName is not None:
187 raise ValueError(f"Component dataset type '{self._name}' constructed without parent"
188 " storage class")
189 if parentStorageClass is not None and componentName is None:
190 raise ValueError(f"Parent storage class specified by {self._name} is not a composite")
191 self._isCalibration = isCalibration
193 def __repr__(self) -> str:
194 extra = ""
195 if self._parentStorageClassName:
196 extra = f", parentStorageClass={self._parentStorageClassName}"
197 if self._isCalibration:
198 extra += ", isCalibration=True"
199 return f"DatasetType({self.name!r}, {self.dimensions}, {self._storageClassName}{extra})"
201 def __eq__(self, other: Any) -> bool:
202 if not isinstance(other, type(self)):
203 return False
204 if self._name != other._name:
205 return False
206 if self._dimensions != other._dimensions:
207 return False
208 if self._storageClass is not None and other._storageClass is not None:
209 if self._storageClass != other._storageClass:
210 return False
211 else:
212 if self._storageClassName != other._storageClassName:
213 return False
214 if self._isCalibration != other._isCalibration:
215 return False
216 if self._parentStorageClass is not None and other._parentStorageClass is not None:
217 return self._parentStorageClass == other._parentStorageClass
218 else:
219 return self._parentStorageClassName == other._parentStorageClassName
221 def __hash__(self) -> int:
222 """Hash DatasetType instance.
224 This only uses StorageClass name which is it consistent with the
225 implementation of StorageClass hash method.
226 """
227 return hash((self._name, self._dimensions, self._storageClassName,
228 self._parentStorageClassName))
230 def __lt__(self, other: Any) -> bool:
231 """Sort using the dataset type name."""
232 if not isinstance(other, type(self)):
233 return NotImplemented
234 return self.name < other.name
236 @property
237 def name(self) -> str:
238 """Return a string name for the Dataset.
240 Mmust correspond to the same `DatasetType` across all Registries.
241 """
242 return self._name
244 @property
245 def dimensions(self) -> DimensionGraph:
246 r"""Return the `Dimension`\ s fir this dataset type.
248 The dimensions label and relate instances of this
249 `DatasetType` (`DimensionGraph`).
250 """
251 return self._dimensions
253 @property
254 def storageClass(self) -> StorageClass:
255 """Return `StorageClass` instance associated with this dataset type.
257 The `StorageClass` defines how this `DatasetType`
258 is persisted. Note that if DatasetType was constructed with a name
259 of a StorageClass then Butler has to be initialized before using
260 this property.
261 """
262 if self._storageClass is None:
263 self._storageClass = StorageClassFactory().getStorageClass(self._storageClassName)
264 return self._storageClass
266 @property
267 def parentStorageClass(self) -> Optional[StorageClass]:
268 """Return the storage class of the composite containing this component.
270 Note that if DatasetType was constructed with a name of a
271 StorageClass then Butler has to be initialized before using this
272 property. Can be `None` if this is not a component of a composite.
273 Must be defined if this is a component.
274 """
275 if self._parentStorageClass is None and self._parentStorageClassName is None:
276 return None
277 if self._parentStorageClass is None and self._parentStorageClassName is not None:
278 self._parentStorageClass = StorageClassFactory().getStorageClass(self._parentStorageClassName)
279 return self._parentStorageClass
281 def isCalibration(self) -> bool:
282 """Return if datasets of this type can be in calibration collections.
284 Returns
285 -------
286 flag : `bool`
287 `True` if datasets of this type may be included in calibration
288 collections.
289 """
290 return self._isCalibration
292 def finalizeParentStorageClass(self, newParent: StorageClass) -> None:
293 """Finalize the parent storage class definition.
295 Replaces the current placeholder parent storage class with
296 the real parent.
298 Parameters
299 ----------
300 newParent : `StorageClass`
301 The new parent to be associated with this composite dataset
302 type. This replaces the temporary placeholder parent that
303 was specified during construction.
305 Raises
306 ------
307 ValueError
308 Raised if this dataset type is not a component of a composite.
309 Raised if a StorageClass is not given.
310 Raised if the parent currently associated with the dataset
311 type is not a placeholder.
312 """
313 if not self.isComponent():
314 raise ValueError("Can not set a parent storage class if this is not a component"
315 f" ({self.name})")
316 if self._parentStorageClass != self.PlaceholderParentStorageClass:
317 raise ValueError(f"This DatasetType has a parent of {self._parentStorageClassName} and"
318 " is not a placeholder.")
319 if not isinstance(newParent, StorageClass):
320 raise ValueError(f"Supplied parent must be a StorageClass. Got {newParent!r}")
321 self._parentStorageClass = newParent
322 self._parentStorageClassName = newParent.name
324 @staticmethod
325 def splitDatasetTypeName(datasetTypeName: str) -> Tuple[str, Optional[str]]:
326 """Return the root name and the component from a composite name.
328 Parameters
329 ----------
330 datasetTypeName : `str`
331 The name of the dataset type, can include a component using
332 a "."-separator.
334 Returns
335 -------
336 rootName : `str`
337 Root name without any components.
338 componentName : `str`
339 The component if it has been specified, else `None`.
341 Notes
342 -----
343 If the dataset type name is ``a.b.c`` this method will return a
344 root name of ``a`` and a component name of ``b.c``.
345 """
346 comp = None
347 root = datasetTypeName
348 if "." in root:
349 # If there is doubt, the component is after the first "."
350 root, comp = root.split(".", maxsplit=1)
351 return root, comp
353 def nameAndComponent(self) -> Tuple[str, Optional[str]]:
354 """Return the root name of this dataset type and any component.
356 Returns
357 -------
358 rootName : `str`
359 Root name for this `DatasetType` without any components.
360 componentName : `str`
361 The component if it has been specified, else `None`.
362 """
363 return self.splitDatasetTypeName(self.name)
365 def component(self) -> Optional[str]:
366 """Return the component name (if defined).
368 Returns
369 -------
370 comp : `str`
371 Name of component part of DatasetType name. `None` if this
372 `DatasetType` is not associated with a component.
373 """
374 _, comp = self.nameAndComponent()
375 return comp
377 def componentTypeName(self, component: str) -> str:
378 """Derive a component dataset type from a composite.
380 Parameters
381 ----------
382 component : `str`
383 Name of component
385 Returns
386 -------
387 derived : `str`
388 Compound name of this `DatasetType` and the component.
390 Raises
391 ------
392 KeyError
393 Requested component is not supported by this `DatasetType`.
394 """
395 if component in self.storageClass.allComponents():
396 return self.nameWithComponent(self.name, component)
397 raise KeyError("Requested component ({}) not understood by this DatasetType".format(component))
399 def makeCompositeDatasetType(self) -> DatasetType:
400 """Return a composite dataset type from the component.
402 Returns
403 -------
404 composite : `DatasetType`
405 The composite dataset type.
407 Raises
408 ------
409 RuntimeError
410 Raised if this dataset type is not a component dataset type.
411 """
412 if not self.isComponent():
413 raise RuntimeError(f"DatasetType {self.name} must be a component to form the composite")
414 composite_name, _ = self.nameAndComponent()
415 if self.parentStorageClass is None:
416 raise ValueError("Parent storage class is not set. "
417 f"Unable to create composite type from {self.name}")
418 return DatasetType(composite_name, dimensions=self.dimensions,
419 storageClass=self.parentStorageClass)
421 def makeComponentDatasetType(self, component: str) -> DatasetType:
422 """Return a component dataset type from a composite.
424 Assumes the same dimensions as the parent.
426 Parameters
427 ----------
428 component : `str`
429 Name of component
431 Returns
432 -------
433 datasetType : `DatasetType`
434 A new DatasetType instance.
435 """
436 # The component could be a read/write or read component
437 return DatasetType(self.componentTypeName(component), dimensions=self.dimensions,
438 storageClass=self.storageClass.allComponents()[component],
439 parentStorageClass=self.storageClass)
441 def makeAllComponentDatasetTypes(self) -> List[DatasetType]:
442 """Return all component dataset types for this composite.
444 Returns
445 -------
446 all : `list` of `DatasetType`
447 All the component dataset types. If this is not a composite
448 then returns an empty list.
449 """
450 return [self.makeComponentDatasetType(componentName)
451 for componentName in self.storageClass.allComponents()]
453 def isComponent(self) -> bool:
454 """Return whether this `DatasetType` refers to a component.
456 Returns
457 -------
458 isComponent : `bool`
459 `True` if this `DatasetType` is a component, `False` otherwise.
460 """
461 if self.component():
462 return True
463 return False
465 def isComposite(self) -> bool:
466 """Return whether this `DatasetType` is a composite.
468 Returns
469 -------
470 isComposite : `bool`
471 `True` if this `DatasetType` is a composite type, `False`
472 otherwise.
473 """
474 return self.storageClass.isComposite()
476 def _lookupNames(self) -> Tuple[LookupKey, ...]:
477 """Return name keys to use for lookups in configurations.
479 The names are returned in order of priority.
481 Returns
482 -------
483 names : `tuple` of `LookupKey`
484 Tuple of the `DatasetType` name and the `StorageClass` name.
485 If the name includes a component the name with the component
486 is first, then the name without the component and finally
487 the storage class name.
488 """
489 rootName, componentName = self.nameAndComponent()
490 lookups: Tuple[LookupKey, ...] = (LookupKey(name=self.name),)
491 if componentName is not None:
492 lookups = lookups + (LookupKey(name=rootName),)
494 if self.dimensions:
495 # Dimensions are a lower priority than dataset type name
496 lookups = lookups + (LookupKey(dimensions=self.dimensions),)
498 return lookups + self.storageClass._lookupNames()
500 def to_simple(self, minimal: bool = False) -> Union[Dict, str]:
501 """Convert this class to a simple python type.
503 This makes it suitable for serialization.
505 Parameters
506 ----------
507 minimal : `bool`, optional
508 Use minimal serialization. Requires Registry to convert
509 back to a full type.
511 Returns
512 -------
513 simple : `dict` or `str`
514 The object converted to a dictionary or a simple string.
515 """
516 if minimal:
517 # Only needs the name.
518 return self.name
520 # Convert to a dict form
521 as_dict = {"name": self.name,
522 "storageClass": self._storageClassName,
523 "isCalibration": self._isCalibration,
524 "dimensions": self.dimensions.to_simple(),
525 }
527 if self._parentStorageClassName is not None:
528 as_dict["parentStorageClass"] = self._parentStorageClassName
529 return as_dict
531 @classmethod
532 def from_simple(cls, simple: Union[Dict, str],
533 universe: Optional[DimensionUniverse] = None,
534 registry: Optional[Registry] = None) -> DatasetType:
535 """Construct a new object from the simplified form.
537 This is usally data returned from the `to_simple` method.
539 Parameters
540 ----------
541 simple : `dict` of [`str`, `Any`] or `str`
542 The value returned by `to_simple()`.
543 universe : `DimensionUniverse`
544 The special graph of all known dimensions of which this graph will
545 be a subset. Can be `None` if a registry is provided.
546 registry : `lsst.daf.butler.Registry`, optional
547 Registry to use to convert simple name of a DatasetType to
548 a full `DatasetType`. Can be `None` if a full description of
549 the type is provided along with a universe.
551 Returns
552 -------
553 datasetType : `DatasetType`
554 Newly-constructed object.
555 """
556 if isinstance(simple, str):
557 if registry is None:
558 raise ValueError(f"Unable to convert a DatasetType name '{simple}' to DatasetType"
559 " without a Registry")
560 return registry.getDatasetType(simple)
562 if universe is None and registry is None:
563 raise ValueError("One of universe or registry must be provided.")
565 if universe is None and registry is not None:
566 # registry should not be none by now but test helps mypy
567 universe = registry.dimensions
569 if universe is None:
570 # this is for mypy
571 raise ValueError("Unable to determine a usable universe")
573 return cls(name=simple["name"],
574 dimensions=DimensionGraph.from_simple(simple["dimensions"], universe=universe),
575 storageClass=simple["storageClass"],
576 isCalibration=simple.get("isCalibration", False),
577 parentStorageClass=simple.get("parentStorageClass"),
578 universe=universe)
580 to_json = to_json_generic
581 from_json = classmethod(from_json_generic)
583 def __reduce__(self) -> Tuple[Callable, Tuple[Type[DatasetType],
584 Tuple[str, DimensionGraph, str, Optional[str]],
585 Dict[str, bool]]]:
586 """Support pickling.
588 StorageClass instances can not normally be pickled, so we pickle
589 StorageClass name instead of instance.
590 """
591 return _unpickle_via_factory, (self.__class__, (self.name, self.dimensions, self._storageClassName,
592 self._parentStorageClassName),
593 {"isCalibration": self._isCalibration})
595 def __deepcopy__(self, memo: Any) -> DatasetType:
596 """Support for deep copy method.
598 Normally ``deepcopy`` will use pickle mechanism to make copies.
599 We want to avoid that to support (possibly degenerate) use case when
600 DatasetType is constructed with StorageClass instance which is not
601 registered with StorageClassFactory (this happens in unit tests).
602 Instead we re-implement ``__deepcopy__`` method.
603 """
604 return DatasetType(name=deepcopy(self.name, memo),
605 dimensions=deepcopy(self.dimensions, memo),
606 storageClass=deepcopy(self._storageClass or self._storageClassName, memo),
607 parentStorageClass=deepcopy(self._parentStorageClass
608 or self._parentStorageClassName, memo),
609 isCalibration=deepcopy(self._isCalibration, memo))
612def _unpickle_via_factory(factory: Callable, args: Any, kwargs: Any) -> DatasetType:
613 """Unpickle something by calling a factory.
615 Allows subclasses to unpickle using `__reduce__` with keyword
616 arguments as well as positional arguments.
617 """
618 return factory(*args, **kwargs)