Coverage for python/lsst/daf/butler/core/datasets/type.py : 19%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ["DatasetType"]
26from copy import deepcopy
27import re
29from types import MappingProxyType
31from typing import (
32 TYPE_CHECKING,
33 Any,
34 Callable,
35 Dict,
36 Iterable,
37 List,
38 Mapping,
39 Optional,
40 Tuple,
41 Type,
42 Union,
43)
46from ..storageClass import StorageClass, StorageClassFactory
47from ..dimensions import DimensionGraph
48from ..configSupport import LookupKey
49from ..json import from_json_generic, to_json_generic
51if TYPE_CHECKING: 51 ↛ 52line 51 didn't jump to line 52, because the condition on line 51 was never true
52 from ..dimensions import Dimension, DimensionUniverse
53 from ...registry import Registry
56def _safeMakeMappingProxyType(data: Optional[Mapping]) -> Mapping:
57 if data is None:
58 data = {}
59 return MappingProxyType(data)
62class DatasetType:
63 r"""A named category of Datasets that defines how they are organized,
64 related, and stored.
66 A concrete, final class whose instances represent `DatasetType`\ s.
67 `DatasetType` instances may be constructed without a `Registry`,
68 but they must be registered
69 via `Registry.registerDatasetType()` before corresponding Datasets
70 may be added.
71 `DatasetType` instances are immutable.
73 Parameters
74 ----------
75 name : `str`
76 A string name for the Dataset; must correspond to the same
77 `DatasetType` across all Registries. Names must start with an
78 upper or lowercase letter, and may contain only letters, numbers,
79 and underscores. Component dataset types should contain a single
80 period separating the base dataset type name from the component name
81 (and may be recursive).
82 dimensions : `DimensionGraph` or iterable of `Dimension`
83 Dimensions used to label and relate instances of this `DatasetType`.
84 If not a `DimensionGraph`, ``universe`` must be provided as well.
85 storageClass : `StorageClass` or `str`
86 Instance of a `StorageClass` or name of `StorageClass` that defines
87 how this `DatasetType` is persisted.
88 parentStorageClass : `StorageClass` or `str`, optional
89 Instance of a `StorageClass` or name of `StorageClass` that defines
90 how the composite parent is persisted. Must be `None` if this
91 is not a component. Mandatory if it is a component but can be the
92 special temporary placeholder
93 (`DatasetType.PlaceholderParentStorageClass`) to allow
94 construction with an intent to finalize later.
95 universe : `DimensionUniverse`, optional
96 Set of all known dimensions, used to normalize ``dimensions`` if it
97 is not already a `DimensionGraph`.
98 isCalibration : `bool`, optional
99 If `True`, this dataset type may be included in
100 `~CollectionType.CALIBRATION` collections.
101 """
103 __slots__ = ("_name", "_dimensions", "_storageClass", "_storageClassName",
104 "_parentStorageClass", "_parentStorageClassName",
105 "_isCalibration")
107 VALID_NAME_REGEX = re.compile("^[a-zA-Z][a-zA-Z0-9_]*(\\.[a-zA-Z][a-zA-Z0-9_]*)*$")
109 PlaceholderParentStorageClass = StorageClass("PlaceHolder")
110 """Placeholder StorageClass that can be used temporarily for a
111 component.
113 This can be useful in pipeline construction where we are creating
114 dataset types without a registry.
115 """
117 @staticmethod
118 def nameWithComponent(datasetTypeName: str, componentName: str) -> str:
119 """Form a valid DatasetTypeName from a parent and component.
121 No validation is performed.
123 Parameters
124 ----------
125 datasetTypeName : `str`
126 Base type name.
127 componentName : `str`
128 Name of component.
130 Returns
131 -------
132 compTypeName : `str`
133 Name to use for component DatasetType.
134 """
135 return "{}.{}".format(datasetTypeName, componentName)
137 def __init__(self, name: str, dimensions: Union[DimensionGraph, Iterable[Dimension]],
138 storageClass: Union[StorageClass, str],
139 parentStorageClass: Optional[Union[StorageClass, str]] = None, *,
140 universe: Optional[DimensionUniverse] = None,
141 isCalibration: bool = False):
142 if self.VALID_NAME_REGEX.match(name) is None:
143 raise ValueError(f"DatasetType name '{name}' is invalid.")
144 self._name = name
145 if not isinstance(dimensions, DimensionGraph):
146 if universe is None:
147 raise ValueError("If dimensions is not a normalized DimensionGraph, "
148 "a universe must be provided.")
149 dimensions = universe.extract(dimensions)
150 self._dimensions = dimensions
151 if name in self._dimensions.universe.getGovernorDimensions().names:
152 raise ValueError(f"Governor dimension name {name} cannot be used as a dataset type name.")
153 if not isinstance(storageClass, (StorageClass, str)):
154 raise ValueError("StorageClass argument must be StorageClass or str. "
155 f"Got {storageClass}")
156 self._storageClass: Optional[StorageClass]
157 if isinstance(storageClass, StorageClass):
158 self._storageClass = storageClass
159 self._storageClassName = storageClass.name
160 else:
161 self._storageClass = None
162 self._storageClassName = storageClass
164 self._parentStorageClass: Optional[StorageClass] = None
165 self._parentStorageClassName: Optional[str] = None
166 if parentStorageClass is not None:
167 if not isinstance(storageClass, (StorageClass, str)):
168 raise ValueError("Parent StorageClass argument must be StorageClass or str. "
169 f"Got {parentStorageClass}")
171 # Only allowed for a component dataset type
172 _, componentName = self.splitDatasetTypeName(self._name)
173 if componentName is None:
174 raise ValueError("Can not specify a parent storage class if this is not a component"
175 f" ({self._name})")
176 if isinstance(parentStorageClass, StorageClass):
177 self._parentStorageClass = parentStorageClass
178 self._parentStorageClassName = parentStorageClass.name
179 else:
180 self._parentStorageClassName = parentStorageClass
182 # Ensure that parent storage class is specified when we have
183 # a component and is not specified when we don't
184 _, componentName = self.splitDatasetTypeName(self._name)
185 if parentStorageClass is None and componentName is not None:
186 raise ValueError(f"Component dataset type '{self._name}' constructed without parent"
187 " storage class")
188 if parentStorageClass is not None and componentName is None:
189 raise ValueError(f"Parent storage class specified by {self._name} is not a composite")
190 self._isCalibration = isCalibration
192 def __repr__(self) -> str:
193 extra = ""
194 if self._parentStorageClassName:
195 extra = f", parentStorageClass={self._parentStorageClassName}"
196 if self._isCalibration:
197 extra += ", isCalibration=True"
198 return f"DatasetType({self.name!r}, {self.dimensions}, {self._storageClassName}{extra})"
200 def __eq__(self, other: Any) -> bool:
201 if not isinstance(other, type(self)):
202 return False
203 if self._name != other._name:
204 return False
205 if self._dimensions != other._dimensions:
206 return False
207 if self._storageClass is not None and other._storageClass is not None:
208 if self._storageClass != other._storageClass:
209 return False
210 else:
211 if self._storageClassName != other._storageClassName:
212 return False
213 if self._isCalibration != other._isCalibration:
214 return False
215 if self._parentStorageClass is not None and other._parentStorageClass is not None:
216 return self._parentStorageClass == other._parentStorageClass
217 else:
218 return self._parentStorageClassName == other._parentStorageClassName
220 def __hash__(self) -> int:
221 """Hash DatasetType instance.
223 This only uses StorageClass name which is it consistent with the
224 implementation of StorageClass hash method.
225 """
226 return hash((self._name, self._dimensions, self._storageClassName,
227 self._parentStorageClassName))
229 def __lt__(self, other: Any) -> bool:
230 """Sort using the dataset type name.
231 """
232 if not isinstance(other, type(self)):
233 return NotImplemented
234 return self.name < other.name
236 @property
237 def name(self) -> str:
238 """A string name for the Dataset; must correspond to the same
239 `DatasetType` across all Registries.
240 """
241 return self._name
243 @property
244 def dimensions(self) -> DimensionGraph:
245 r"""The `Dimension`\ s that label and relate instances of this
246 `DatasetType` (`DimensionGraph`).
247 """
248 return self._dimensions
250 @property
251 def storageClass(self) -> StorageClass:
252 """`StorageClass` instance that defines how this `DatasetType`
253 is persisted. Note that if DatasetType was constructed with a name
254 of a StorageClass then Butler has to be initialized before using
255 this property.
256 """
257 if self._storageClass is None:
258 self._storageClass = StorageClassFactory().getStorageClass(self._storageClassName)
259 return self._storageClass
261 @property
262 def parentStorageClass(self) -> Optional[StorageClass]:
263 """`StorageClass` instance that defines how the composite associated
264 with this `DatasetType` is persisted.
266 Note that if DatasetType was constructed with a name of a
267 StorageClass then Butler has to be initialized before using this
268 property. Can be `None` if this is not a component of a composite.
269 Must be defined if this is a component.
270 """
271 if self._parentStorageClass is None and self._parentStorageClassName is None:
272 return None
273 if self._parentStorageClass is None and self._parentStorageClassName is not None:
274 self._parentStorageClass = StorageClassFactory().getStorageClass(self._parentStorageClassName)
275 return self._parentStorageClass
277 def isCalibration(self) -> bool:
278 """Return whether datasets of this type may be included in calibration
279 collections.
281 Returns
282 -------
283 flag : `bool`
284 `True` if datasets of this type may be included in calibration
285 collections.
286 """
287 return self._isCalibration
289 def finalizeParentStorageClass(self, newParent: StorageClass) -> None:
290 """Replace the current placeholder parent storage class with
291 the real parent.
293 Parameters
294 ----------
295 newParent : `StorageClass`
296 The new parent to be associated with this composite dataset
297 type. This replaces the temporary placeholder parent that
298 was specified during construction.
300 Raises
301 ------
302 ValueError
303 Raised if this dataset type is not a component of a composite.
304 Raised if a StorageClass is not given.
305 Raised if the parent currently associated with the dataset
306 type is not a placeholder.
307 """
308 if not self.isComponent():
309 raise ValueError("Can not set a parent storage class if this is not a component"
310 f" ({self.name})")
311 if self._parentStorageClass != self.PlaceholderParentStorageClass:
312 raise ValueError(f"This DatasetType has a parent of {self._parentStorageClassName} and"
313 " is not a placeholder.")
314 if not isinstance(newParent, StorageClass):
315 raise ValueError(f"Supplied parent must be a StorageClass. Got {newParent!r}")
316 self._parentStorageClass = newParent
317 self._parentStorageClassName = newParent.name
319 @staticmethod
320 def splitDatasetTypeName(datasetTypeName: str) -> Tuple[str, Optional[str]]:
321 """Given a dataset type name, return the root name and the component
322 name.
324 Parameters
325 ----------
326 datasetTypeName : `str`
327 The name of the dataset type, can include a component using
328 a "."-separator.
330 Returns
331 -------
332 rootName : `str`
333 Root name without any components.
334 componentName : `str`
335 The component if it has been specified, else `None`.
337 Notes
338 -----
339 If the dataset type name is ``a.b.c`` this method will return a
340 root name of ``a`` and a component name of ``b.c``.
341 """
342 comp = None
343 root = datasetTypeName
344 if "." in root:
345 # If there is doubt, the component is after the first "."
346 root, comp = root.split(".", maxsplit=1)
347 return root, comp
349 def nameAndComponent(self) -> Tuple[str, Optional[str]]:
350 """Return the root name of this dataset type and the component
351 name (if defined).
353 Returns
354 -------
355 rootName : `str`
356 Root name for this `DatasetType` without any components.
357 componentName : `str`
358 The component if it has been specified, else `None`.
359 """
360 return self.splitDatasetTypeName(self.name)
362 def component(self) -> Optional[str]:
363 """Component name (if defined)
365 Returns
366 -------
367 comp : `str`
368 Name of component part of DatasetType name. `None` if this
369 `DatasetType` is not associated with a component.
370 """
371 _, comp = self.nameAndComponent()
372 return comp
374 def componentTypeName(self, component: str) -> str:
375 """Given a component name, derive the datasetTypeName of that component
377 Parameters
378 ----------
379 component : `str`
380 Name of component
382 Returns
383 -------
384 derived : `str`
385 Compound name of this `DatasetType` and the component.
387 Raises
388 ------
389 KeyError
390 Requested component is not supported by this `DatasetType`.
391 """
392 if component in self.storageClass.allComponents():
393 return self.nameWithComponent(self.name, component)
394 raise KeyError("Requested component ({}) not understood by this DatasetType".format(component))
396 def makeComponentDatasetType(self, component: str) -> DatasetType:
397 """Return a DatasetType suitable for the given component, assuming the
398 same dimensions as the parent.
400 Parameters
401 ----------
402 component : `str`
403 Name of component
405 Returns
406 -------
407 datasetType : `DatasetType`
408 A new DatasetType instance.
409 """
410 # The component could be a read/write or read component
411 return DatasetType(self.componentTypeName(component), dimensions=self.dimensions,
412 storageClass=self.storageClass.allComponents()[component],
413 parentStorageClass=self.storageClass)
415 def makeAllComponentDatasetTypes(self) -> List[DatasetType]:
416 """Return all the component dataset types assocaited with this
417 dataset type.
419 Returns
420 -------
421 all : `list` of `DatasetType`
422 All the component dataset types. If this is not a composite
423 then returns an empty list.
424 """
425 return [self.makeComponentDatasetType(componentName)
426 for componentName in self.storageClass.allComponents()]
428 def isComponent(self) -> bool:
429 """Boolean indicating whether this `DatasetType` refers to a
430 component of a composite.
432 Returns
433 -------
434 isComponent : `bool`
435 `True` if this `DatasetType` is a component, `False` otherwise.
436 """
437 if self.component():
438 return True
439 return False
441 def isComposite(self) -> bool:
442 """Boolean indicating whether this `DatasetType` is a composite type.
444 Returns
445 -------
446 isComposite : `bool`
447 `True` if this `DatasetType` is a composite type, `False`
448 otherwise.
449 """
450 return self.storageClass.isComposite()
452 def _lookupNames(self) -> Tuple[LookupKey, ...]:
453 """Name keys to use when looking up this datasetType in a
454 configuration.
456 The names are returned in order of priority.
458 Returns
459 -------
460 names : `tuple` of `LookupKey`
461 Tuple of the `DatasetType` name and the `StorageClass` name.
462 If the name includes a component the name with the component
463 is first, then the name without the component and finally
464 the storage class name.
465 """
466 rootName, componentName = self.nameAndComponent()
467 lookups: Tuple[LookupKey, ...] = (LookupKey(name=self.name),)
468 if componentName is not None:
469 lookups = lookups + (LookupKey(name=rootName),)
471 if self.dimensions:
472 # Dimensions are a lower priority than dataset type name
473 lookups = lookups + (LookupKey(dimensions=self.dimensions),)
475 return lookups + self.storageClass._lookupNames()
477 def to_simple(self, minimal: bool = False) -> Union[Dict, str]:
478 """Convert this class to a simple python type suitable for
479 serialization.
481 Parameters
482 ----------
483 minimal : `bool`, optional
484 Use minimal serialization. Requires Registry to convert
485 back to a full type.
487 Returns
488 -------
489 simple : `dict` or `str`
490 The object converted to a dictionary or a simple string.
491 """
492 if minimal:
493 # Only needs the name.
494 return self.name
496 # Convert to a dict form
497 as_dict = {"name": self.name,
498 "storageClass": self._storageClassName,
499 "isCalibration": self._isCalibration,
500 "dimensions": self.dimensions.to_simple(),
501 }
503 if self._parentStorageClassName is not None:
504 as_dict["parentStorageClass"] = self._parentStorageClassName
505 return as_dict
507 @classmethod
508 def from_simple(cls, simple: Union[Dict, str],
509 universe: Optional[DimensionUniverse] = None,
510 registry: Optional[Registry] = None) -> DatasetType:
511 """Construct a new object from the data returned from the `to_simple`
512 method.
514 Parameters
515 ----------
516 simple : `dict` of [`str`, `Any`] or `str`
517 The value returned by `to_simple()`.
518 universe : `DimensionUniverse`
519 The special graph of all known dimensions of which this graph will
520 be a subset. Can be `None` if a registry is provided.
521 registry : `lsst.daf.butler.Registry`, optional
522 Registry to use to convert simple name of a DatasetType to
523 a full `DatasetType`. Can be `None` if a full description of
524 the type is provided along with a universe.
526 Returns
527 -------
528 datasetType : `DatasetType`
529 Newly-constructed object.
530 """
531 if isinstance(simple, str):
532 if registry is None:
533 raise ValueError(f"Unable to convert a DatasetType name '{simple}' to DatasetType"
534 " without a Registry")
535 return registry.getDatasetType(simple)
537 if universe is None and registry is None:
538 raise ValueError("One of universe or registry must be provided.")
540 if universe is None and registry is not None:
541 # registry should not be none by now but test helps mypy
542 universe = registry.dimensions
544 if universe is None:
545 # this is for mypy
546 raise ValueError("Unable to determine a usable universe")
548 return cls(name=simple["name"],
549 dimensions=DimensionGraph.from_simple(simple["dimensions"], universe=universe),
550 storageClass=simple["storageClass"],
551 isCalibration=simple.get("isCalibration", False),
552 parentStorageClass=simple.get("parentStorageClass"),
553 universe=universe)
555 to_json = to_json_generic
556 from_json = classmethod(from_json_generic)
558 def __reduce__(self) -> Tuple[Callable, Tuple[Type[DatasetType],
559 Tuple[str, DimensionGraph, str, Optional[str]],
560 Dict[str, bool]]]:
561 """Support pickling.
563 StorageClass instances can not normally be pickled, so we pickle
564 StorageClass name instead of instance.
565 """
566 return _unpickle_via_factory, (self.__class__, (self.name, self.dimensions, self._storageClassName,
567 self._parentStorageClassName),
568 {"isCalibration": self._isCalibration})
570 def __deepcopy__(self, memo: Any) -> DatasetType:
571 """Support for deep copy method.
573 Normally ``deepcopy`` will use pickle mechanism to make copies.
574 We want to avoid that to support (possibly degenerate) use case when
575 DatasetType is constructed with StorageClass instance which is not
576 registered with StorageClassFactory (this happens in unit tests).
577 Instead we re-implement ``__deepcopy__`` method.
578 """
579 return DatasetType(name=deepcopy(self.name, memo),
580 dimensions=deepcopy(self.dimensions, memo),
581 storageClass=deepcopy(self._storageClass or self._storageClassName, memo),
582 parentStorageClass=deepcopy(self._parentStorageClass
583 or self._parentStorageClassName, memo),
584 isCalibration=deepcopy(self._isCalibration, memo))
587def _unpickle_via_factory(factory: Callable, args: Any, kwargs: Any) -> DatasetType:
588 """Unpickle something by calling a factory
590 Allows subclasses to unpickle using `__reduce__` with keyword
591 arguments as well as positional arguments.
592 """
593 return factory(*args, **kwargs)