Coverage for python/lsst/daf/butler/core/datasets/type.py : 19%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ["DatasetType"]
26from copy import deepcopy
27import re
29from types import MappingProxyType
31from typing import (
32 TYPE_CHECKING,
33 Any,
34 Callable,
35 Dict,
36 Iterable,
37 List,
38 Mapping,
39 Optional,
40 Tuple,
41 Type,
42 Union,
43)
46from ..storageClass import StorageClass, StorageClassFactory
47from ..dimensions import DimensionGraph
48from ..configSupport import LookupKey
49from ..json import from_json_generic, to_json_generic
51if TYPE_CHECKING: 51 ↛ 52line 51 didn't jump to line 52, because the condition on line 51 was never true
52 from ..dimensions import Dimension, DimensionUniverse
53 from ...registry import Registry
56def _safeMakeMappingProxyType(data: Optional[Mapping]) -> Mapping:
57 if data is None:
58 data = {}
59 return MappingProxyType(data)
62class DatasetType:
63 r"""A named category of Datasets that defines how they are organized,
64 related, and stored.
66 A concrete, final class whose instances represent `DatasetType`\ s.
67 `DatasetType` instances may be constructed without a `Registry`,
68 but they must be registered
69 via `Registry.registerDatasetType()` before corresponding Datasets
70 may be added.
71 `DatasetType` instances are immutable.
73 Parameters
74 ----------
75 name : `str`
76 A string name for the Dataset; must correspond to the same
77 `DatasetType` across all Registries. Names must start with an
78 upper or lowercase letter, and may contain only letters, numbers,
79 and underscores. Component dataset types should contain a single
80 period separating the base dataset type name from the component name
81 (and may be recursive).
82 dimensions : `DimensionGraph` or iterable of `Dimension`
83 Dimensions used to label and relate instances of this `DatasetType`.
84 If not a `DimensionGraph`, ``universe`` must be provided as well.
85 storageClass : `StorageClass` or `str`
86 Instance of a `StorageClass` or name of `StorageClass` that defines
87 how this `DatasetType` is persisted.
88 parentStorageClass : `StorageClass` or `str`, optional
89 Instance of a `StorageClass` or name of `StorageClass` that defines
90 how the composite parent is persisted. Must be `None` if this
91 is not a component. Mandatory if it is a component but can be the
92 special temporary placeholder
93 (`DatasetType.PlaceholderParentStorageClass`) to allow
94 construction with an intent to finalize later.
95 universe : `DimensionUniverse`, optional
96 Set of all known dimensions, used to normalize ``dimensions`` if it
97 is not already a `DimensionGraph`.
98 isCalibration : `bool`, optional
99 If `True`, this dataset type may be included in
100 `~CollectionType.CALIBRATION` collections.
101 """
103 __slots__ = ("_name", "_dimensions", "_storageClass", "_storageClassName",
104 "_parentStorageClass", "_parentStorageClassName",
105 "_isCalibration")
107 VALID_NAME_REGEX = re.compile("^[a-zA-Z][a-zA-Z0-9_]*(\\.[a-zA-Z][a-zA-Z0-9_]*)*$")
109 PlaceholderParentStorageClass = StorageClass("PlaceHolder")
110 """Placeholder StorageClass that can be used temporarily for a
111 component.
113 This can be useful in pipeline construction where we are creating
114 dataset types without a registry.
115 """
117 @staticmethod
118 def nameWithComponent(datasetTypeName: str, componentName: str) -> str:
119 """Form a valid DatasetTypeName from a parent and component.
121 No validation is performed.
123 Parameters
124 ----------
125 datasetTypeName : `str`
126 Base type name.
127 componentName : `str`
128 Name of component.
130 Returns
131 -------
132 compTypeName : `str`
133 Name to use for component DatasetType.
134 """
135 return "{}.{}".format(datasetTypeName, componentName)
137 def __init__(self, name: str, dimensions: Union[DimensionGraph, Iterable[Dimension]],
138 storageClass: Union[StorageClass, str],
139 parentStorageClass: Optional[Union[StorageClass, str]] = None, *,
140 universe: Optional[DimensionUniverse] = None,
141 isCalibration: bool = False):
142 if self.VALID_NAME_REGEX.match(name) is None:
143 raise ValueError(f"DatasetType name '{name}' is invalid.")
144 self._name = name
145 if not isinstance(dimensions, DimensionGraph):
146 if universe is None:
147 raise ValueError("If dimensions is not a normalized DimensionGraph, "
148 "a universe must be provided.")
149 dimensions = universe.extract(dimensions)
150 self._dimensions = dimensions
151 if name in self._dimensions.universe.getGovernorDimensions().names:
152 raise ValueError(f"Governor dimension name {name} cannot be used as a dataset type name.")
153 if not isinstance(storageClass, (StorageClass, str)):
154 raise ValueError("StorageClass argument must be StorageClass or str. "
155 f"Got {storageClass}")
156 self._storageClass: Optional[StorageClass]
157 if isinstance(storageClass, StorageClass):
158 self._storageClass = storageClass
159 self._storageClassName = storageClass.name
160 else:
161 self._storageClass = None
162 self._storageClassName = storageClass
164 self._parentStorageClass: Optional[StorageClass] = None
165 self._parentStorageClassName: Optional[str] = None
166 if parentStorageClass is not None:
167 if not isinstance(storageClass, (StorageClass, str)):
168 raise ValueError("Parent StorageClass argument must be StorageClass or str. "
169 f"Got {parentStorageClass}")
171 # Only allowed for a component dataset type
172 _, componentName = self.splitDatasetTypeName(self._name)
173 if componentName is None:
174 raise ValueError("Can not specify a parent storage class if this is not a component"
175 f" ({self._name})")
176 if isinstance(parentStorageClass, StorageClass):
177 self._parentStorageClass = parentStorageClass
178 self._parentStorageClassName = parentStorageClass.name
179 else:
180 self._parentStorageClassName = parentStorageClass
182 # Ensure that parent storage class is specified when we have
183 # a component and is not specified when we don't
184 _, componentName = self.splitDatasetTypeName(self._name)
185 if parentStorageClass is None and componentName is not None:
186 raise ValueError(f"Component dataset type '{self._name}' constructed without parent"
187 " storage class")
188 if parentStorageClass is not None and componentName is None:
189 raise ValueError(f"Parent storage class specified by {self._name} is not a composite")
190 self._isCalibration = isCalibration
192 def __repr__(self) -> str:
193 extra = ""
194 if self._parentStorageClassName:
195 extra = f", parentStorageClass={self._parentStorageClassName}"
196 if self._isCalibration:
197 extra += ", isCalibration=True"
198 return f"DatasetType({self.name!r}, {self.dimensions}, {self._storageClassName}{extra})"
200 def __eq__(self, other: Any) -> bool:
201 if not isinstance(other, type(self)):
202 return False
203 if self._name != other._name:
204 return False
205 if self._dimensions != other._dimensions:
206 return False
207 if self._storageClass is not None and other._storageClass is not None:
208 if self._storageClass != other._storageClass:
209 return False
210 else:
211 if self._storageClassName != other._storageClassName:
212 return False
213 if self._isCalibration != other._isCalibration:
214 return False
215 if self._parentStorageClass is not None and other._parentStorageClass is not None:
216 return self._parentStorageClass == other._parentStorageClass
217 else:
218 return self._parentStorageClassName == other._parentStorageClassName
220 def __hash__(self) -> int:
221 """Hash DatasetType instance.
223 This only uses StorageClass name which is it consistent with the
224 implementation of StorageClass hash method.
225 """
226 return hash((self._name, self._dimensions, self._storageClassName,
227 self._parentStorageClassName))
229 def __lt__(self, other: Any) -> bool:
230 """Sort using the dataset type name.
231 """
232 if not isinstance(other, type(self)):
233 return NotImplemented
234 return self.name < other.name
236 @property
237 def name(self) -> str:
238 """A string name for the Dataset; must correspond to the same
239 `DatasetType` across all Registries.
240 """
241 return self._name
243 @property
244 def dimensions(self) -> DimensionGraph:
245 r"""The `Dimension`\ s that label and relate instances of this
246 `DatasetType` (`DimensionGraph`).
247 """
248 return self._dimensions
250 @property
251 def storageClass(self) -> StorageClass:
252 """`StorageClass` instance that defines how this `DatasetType`
253 is persisted. Note that if DatasetType was constructed with a name
254 of a StorageClass then Butler has to be initialized before using
255 this property.
256 """
257 if self._storageClass is None:
258 self._storageClass = StorageClassFactory().getStorageClass(self._storageClassName)
259 return self._storageClass
261 @property
262 def parentStorageClass(self) -> Optional[StorageClass]:
263 """`StorageClass` instance that defines how the composite associated
264 with this `DatasetType` is persisted.
266 Note that if DatasetType was constructed with a name of a
267 StorageClass then Butler has to be initialized before using this
268 property. Can be `None` if this is not a component of a composite.
269 Must be defined if this is a component.
270 """
271 if self._parentStorageClass is None and self._parentStorageClassName is None:
272 return None
273 if self._parentStorageClass is None and self._parentStorageClassName is not None:
274 self._parentStorageClass = StorageClassFactory().getStorageClass(self._parentStorageClassName)
275 return self._parentStorageClass
277 def isCalibration(self) -> bool:
278 """Return whether datasets of this type may be included in calibration
279 collections.
281 Returns
282 -------
283 flag : `bool`
284 `True` if datasets of this type may be included in calibration
285 collections.
286 """
287 return self._isCalibration
289 def finalizeParentStorageClass(self, newParent: StorageClass) -> None:
290 """Replace the current placeholder parent storage class with
291 the real parent.
293 Parameters
294 ----------
295 newParent : `StorageClass`
296 The new parent to be associated with this composite dataset
297 type. This replaces the temporary placeholder parent that
298 was specified during construction.
300 Raises
301 ------
302 ValueError
303 Raised if this dataset type is not a component of a composite.
304 Raised if a StorageClass is not given.
305 Raised if the parent currently associated with the dataset
306 type is not a placeholder.
307 """
308 if not self.isComponent():
309 raise ValueError("Can not set a parent storage class if this is not a component"
310 f" ({self.name})")
311 if self._parentStorageClass != self.PlaceholderParentStorageClass:
312 raise ValueError(f"This DatasetType has a parent of {self._parentStorageClassName} and"
313 " is not a placeholder.")
314 if not isinstance(newParent, StorageClass):
315 raise ValueError(f"Supplied parent must be a StorageClass. Got {newParent!r}")
316 self._parentStorageClass = newParent
317 self._parentStorageClassName = newParent.name
319 @staticmethod
320 def splitDatasetTypeName(datasetTypeName: str) -> Tuple[str, Optional[str]]:
321 """Given a dataset type name, return the root name and the component
322 name.
324 Parameters
325 ----------
326 datasetTypeName : `str`
327 The name of the dataset type, can include a component using
328 a "."-separator.
330 Returns
331 -------
332 rootName : `str`
333 Root name without any components.
334 componentName : `str`
335 The component if it has been specified, else `None`.
337 Notes
338 -----
339 If the dataset type name is ``a.b.c`` this method will return a
340 root name of ``a`` and a component name of ``b.c``.
341 """
342 comp = None
343 root = datasetTypeName
344 if "." in root:
345 # If there is doubt, the component is after the first "."
346 root, comp = root.split(".", maxsplit=1)
347 return root, comp
349 def nameAndComponent(self) -> Tuple[str, Optional[str]]:
350 """Return the root name of this dataset type and the component
351 name (if defined).
353 Returns
354 -------
355 rootName : `str`
356 Root name for this `DatasetType` without any components.
357 componentName : `str`
358 The component if it has been specified, else `None`.
359 """
360 return self.splitDatasetTypeName(self.name)
362 def component(self) -> Optional[str]:
363 """Component name (if defined)
365 Returns
366 -------
367 comp : `str`
368 Name of component part of DatasetType name. `None` if this
369 `DatasetType` is not associated with a component.
370 """
371 _, comp = self.nameAndComponent()
372 return comp
374 def componentTypeName(self, component: str) -> str:
375 """Given a component name, derive the datasetTypeName of that component
377 Parameters
378 ----------
379 component : `str`
380 Name of component
382 Returns
383 -------
384 derived : `str`
385 Compound name of this `DatasetType` and the component.
387 Raises
388 ------
389 KeyError
390 Requested component is not supported by this `DatasetType`.
391 """
392 if component in self.storageClass.allComponents():
393 return self.nameWithComponent(self.name, component)
394 raise KeyError("Requested component ({}) not understood by this DatasetType".format(component))
396 def makeCompositeDatasetType(self) -> DatasetType:
397 """Return a DatasetType suitable for the composite version of this
398 component dataset type.
400 Returns
401 -------
402 composite : `DatasetType`
403 The composite dataset type.
405 Raises
406 ------
407 RuntimeError
408 Raised if this dataset type is not a component dataset type.
409 """
410 if not self.isComponent():
411 raise RuntimeError(f"DatasetType {self.name} must be a component to form the composite")
412 composite_name, _ = self.nameAndComponent()
413 if self.parentStorageClass is None:
414 raise ValueError("Parent storage class is not set. "
415 f"Unable to create composite type from {self.name}")
416 return DatasetType(composite_name, dimensions=self.dimensions,
417 storageClass=self.parentStorageClass)
419 def makeComponentDatasetType(self, component: str) -> DatasetType:
420 """Return a DatasetType suitable for the given component, assuming the
421 same dimensions as the parent.
423 Parameters
424 ----------
425 component : `str`
426 Name of component
428 Returns
429 -------
430 datasetType : `DatasetType`
431 A new DatasetType instance.
432 """
433 # The component could be a read/write or read component
434 return DatasetType(self.componentTypeName(component), dimensions=self.dimensions,
435 storageClass=self.storageClass.allComponents()[component],
436 parentStorageClass=self.storageClass)
438 def makeAllComponentDatasetTypes(self) -> List[DatasetType]:
439 """Return all the component dataset types assocaited with this
440 dataset type.
442 Returns
443 -------
444 all : `list` of `DatasetType`
445 All the component dataset types. If this is not a composite
446 then returns an empty list.
447 """
448 return [self.makeComponentDatasetType(componentName)
449 for componentName in self.storageClass.allComponents()]
451 def isComponent(self) -> bool:
452 """Boolean indicating whether this `DatasetType` refers to a
453 component of a composite.
455 Returns
456 -------
457 isComponent : `bool`
458 `True` if this `DatasetType` is a component, `False` otherwise.
459 """
460 if self.component():
461 return True
462 return False
464 def isComposite(self) -> bool:
465 """Boolean indicating whether this `DatasetType` is a composite type.
467 Returns
468 -------
469 isComposite : `bool`
470 `True` if this `DatasetType` is a composite type, `False`
471 otherwise.
472 """
473 return self.storageClass.isComposite()
475 def _lookupNames(self) -> Tuple[LookupKey, ...]:
476 """Name keys to use when looking up this datasetType in a
477 configuration.
479 The names are returned in order of priority.
481 Returns
482 -------
483 names : `tuple` of `LookupKey`
484 Tuple of the `DatasetType` name and the `StorageClass` name.
485 If the name includes a component the name with the component
486 is first, then the name without the component and finally
487 the storage class name.
488 """
489 rootName, componentName = self.nameAndComponent()
490 lookups: Tuple[LookupKey, ...] = (LookupKey(name=self.name),)
491 if componentName is not None:
492 lookups = lookups + (LookupKey(name=rootName),)
494 if self.dimensions:
495 # Dimensions are a lower priority than dataset type name
496 lookups = lookups + (LookupKey(dimensions=self.dimensions),)
498 return lookups + self.storageClass._lookupNames()
500 def to_simple(self, minimal: bool = False) -> Union[Dict, str]:
501 """Convert this class to a simple python type suitable for
502 serialization.
504 Parameters
505 ----------
506 minimal : `bool`, optional
507 Use minimal serialization. Requires Registry to convert
508 back to a full type.
510 Returns
511 -------
512 simple : `dict` or `str`
513 The object converted to a dictionary or a simple string.
514 """
515 if minimal:
516 # Only needs the name.
517 return self.name
519 # Convert to a dict form
520 as_dict = {"name": self.name,
521 "storageClass": self._storageClassName,
522 "isCalibration": self._isCalibration,
523 "dimensions": self.dimensions.to_simple(),
524 }
526 if self._parentStorageClassName is not None:
527 as_dict["parentStorageClass"] = self._parentStorageClassName
528 return as_dict
530 @classmethod
531 def from_simple(cls, simple: Union[Dict, str],
532 universe: Optional[DimensionUniverse] = None,
533 registry: Optional[Registry] = None) -> DatasetType:
534 """Construct a new object from the data returned from the `to_simple`
535 method.
537 Parameters
538 ----------
539 simple : `dict` of [`str`, `Any`] or `str`
540 The value returned by `to_simple()`.
541 universe : `DimensionUniverse`
542 The special graph of all known dimensions of which this graph will
543 be a subset. Can be `None` if a registry is provided.
544 registry : `lsst.daf.butler.Registry`, optional
545 Registry to use to convert simple name of a DatasetType to
546 a full `DatasetType`. Can be `None` if a full description of
547 the type is provided along with a universe.
549 Returns
550 -------
551 datasetType : `DatasetType`
552 Newly-constructed object.
553 """
554 if isinstance(simple, str):
555 if registry is None:
556 raise ValueError(f"Unable to convert a DatasetType name '{simple}' to DatasetType"
557 " without a Registry")
558 return registry.getDatasetType(simple)
560 if universe is None and registry is None:
561 raise ValueError("One of universe or registry must be provided.")
563 if universe is None and registry is not None:
564 # registry should not be none by now but test helps mypy
565 universe = registry.dimensions
567 if universe is None:
568 # this is for mypy
569 raise ValueError("Unable to determine a usable universe")
571 return cls(name=simple["name"],
572 dimensions=DimensionGraph.from_simple(simple["dimensions"], universe=universe),
573 storageClass=simple["storageClass"],
574 isCalibration=simple.get("isCalibration", False),
575 parentStorageClass=simple.get("parentStorageClass"),
576 universe=universe)
578 to_json = to_json_generic
579 from_json = classmethod(from_json_generic)
581 def __reduce__(self) -> Tuple[Callable, Tuple[Type[DatasetType],
582 Tuple[str, DimensionGraph, str, Optional[str]],
583 Dict[str, bool]]]:
584 """Support pickling.
586 StorageClass instances can not normally be pickled, so we pickle
587 StorageClass name instead of instance.
588 """
589 return _unpickle_via_factory, (self.__class__, (self.name, self.dimensions, self._storageClassName,
590 self._parentStorageClassName),
591 {"isCalibration": self._isCalibration})
593 def __deepcopy__(self, memo: Any) -> DatasetType:
594 """Support for deep copy method.
596 Normally ``deepcopy`` will use pickle mechanism to make copies.
597 We want to avoid that to support (possibly degenerate) use case when
598 DatasetType is constructed with StorageClass instance which is not
599 registered with StorageClassFactory (this happens in unit tests).
600 Instead we re-implement ``__deepcopy__`` method.
601 """
602 return DatasetType(name=deepcopy(self.name, memo),
603 dimensions=deepcopy(self.dimensions, memo),
604 storageClass=deepcopy(self._storageClass or self._storageClassName, memo),
605 parentStorageClass=deepcopy(self._parentStorageClass
606 or self._parentStorageClassName, memo),
607 isCalibration=deepcopy(self._isCalibration, memo))
610def _unpickle_via_factory(factory: Callable, args: Any, kwargs: Any) -> DatasetType:
611 """Unpickle something by calling a factory
613 Allows subclasses to unpickle using `__reduce__` with keyword
614 arguments as well as positional arguments.
615 """
616 return factory(*args, **kwargs)