Coverage for python/lsst/daf/butler/core/datasets/type.py : 18%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ["DatasetType"]
26from copy import deepcopy
27import re
29from types import MappingProxyType
31from typing import (
32 TYPE_CHECKING,
33 Any,
34 Iterable,
35 List,
36 Mapping,
37 Optional,
38 Tuple,
39 Type,
40 Union,
41)
44from ..storageClass import StorageClass, StorageClassFactory
45from ..dimensions import DimensionGraph
46from ..configSupport import LookupKey
48if TYPE_CHECKING: 48 ↛ 49line 48 didn't jump to line 49, because the condition on line 48 was never true
49 from ..dimensions import Dimension, DimensionUniverse
52def _safeMakeMappingProxyType(data: Optional[Mapping]) -> Mapping:
53 if data is None:
54 data = {}
55 return MappingProxyType(data)
58class DatasetType:
59 r"""A named category of Datasets that defines how they are organized,
60 related, and stored.
62 A concrete, final class whose instances represent `DatasetType`\ s.
63 `DatasetType` instances may be constructed without a `Registry`,
64 but they must be registered
65 via `Registry.registerDatasetType()` before corresponding Datasets
66 may be added.
67 `DatasetType` instances are immutable.
69 Parameters
70 ----------
71 name : `str`
72 A string name for the Dataset; must correspond to the same
73 `DatasetType` across all Registries. Names must start with an
74 upper or lowercase letter, and may contain only letters, numbers,
75 and underscores. Component dataset types should contain a single
76 period separating the base dataset type name from the component name
77 (and may be recursive).
78 dimensions : `DimensionGraph` or iterable of `Dimension`
79 Dimensions used to label and relate instances of this `DatasetType`.
80 If not a `DimensionGraph`, ``universe`` must be provided as well.
81 storageClass : `StorageClass` or `str`
82 Instance of a `StorageClass` or name of `StorageClass` that defines
83 how this `DatasetType` is persisted.
84 parentStorageClass : `StorageClass` or `str`, optional
85 Instance of a `StorageClass` or name of `StorageClass` that defines
86 how the composite parent is persisted. Must be `None` if this
87 is not a component. Mandatory if it is a component but can be the
88 special temporary placeholder
89 (`DatasetType.PlaceholderParentStorageClass`) to allow
90 construction with an intent to finalize later.
91 universe : `DimensionUniverse`, optional
92 Set of all known dimensions, used to normalize ``dimensions`` if it
93 is not already a `DimensionGraph`.
94 """
96 __slots__ = ("_name", "_dimensions", "_storageClass", "_storageClassName",
97 "_parentStorageClass", "_parentStorageClassName")
99 VALID_NAME_REGEX = re.compile("^[a-zA-Z][a-zA-Z0-9_]*(\\.[a-zA-Z][a-zA-Z0-9_]*)*$")
101 PlaceholderParentStorageClass = StorageClass("PlaceHolder")
102 """Placeholder StorageClass that can be used temporarily for a
103 component.
105 This can be useful in pipeline construction where we are creating
106 dataset types without a registry.
107 """
109 @staticmethod
110 def nameWithComponent(datasetTypeName: str, componentName: str) -> str:
111 """Form a valid DatasetTypeName from a parent and component.
113 No validation is performed.
115 Parameters
116 ----------
117 datasetTypeName : `str`
118 Base type name.
119 componentName : `str`
120 Name of component.
122 Returns
123 -------
124 compTypeName : `str`
125 Name to use for component DatasetType.
126 """
127 return "{}.{}".format(datasetTypeName, componentName)
129 def __init__(self, name: str, dimensions: Union[DimensionGraph, Iterable[Dimension]],
130 storageClass: Union[StorageClass, str],
131 parentStorageClass: Optional[Union[StorageClass, str]] = None, *,
132 universe: Optional[DimensionUniverse] = None):
133 if self.VALID_NAME_REGEX.match(name) is None:
134 raise ValueError(f"DatasetType name '{name}' is invalid.")
135 self._name = name
136 if not isinstance(dimensions, DimensionGraph):
137 if universe is None:
138 raise ValueError("If dimensions is not a normalized DimensionGraph, "
139 "a universe must be provided.")
140 dimensions = universe.extract(dimensions)
141 self._dimensions = dimensions
142 if not isinstance(storageClass, (StorageClass, str)):
143 raise ValueError("StorageClass argument must be StorageClass or str. "
144 f"Got {storageClass}")
145 self._storageClass: Optional[StorageClass]
146 if isinstance(storageClass, StorageClass):
147 self._storageClass = storageClass
148 self._storageClassName = storageClass.name
149 else:
150 self._storageClass = None
151 self._storageClassName = storageClass
153 self._parentStorageClass: Optional[StorageClass] = None
154 self._parentStorageClassName: Optional[str] = None
155 if parentStorageClass is not None:
156 if not isinstance(storageClass, (StorageClass, str)):
157 raise ValueError("Parent StorageClass argument must be StorageClass or str. "
158 f"Got {parentStorageClass}")
160 # Only allowed for a component dataset type
161 _, componentName = self.splitDatasetTypeName(self._name)
162 if componentName is None:
163 raise ValueError("Can not specify a parent storage class if this is not a component"
164 f" ({self._name})")
165 if isinstance(parentStorageClass, StorageClass):
166 self._parentStorageClass = parentStorageClass
167 self._parentStorageClassName = parentStorageClass.name
168 else:
169 self._parentStorageClassName = parentStorageClass
171 # Ensure that parent storage class is specified when we have
172 # a component and is not specified when we don't
173 _, componentName = self.splitDatasetTypeName(self._name)
174 if parentStorageClass is None and componentName is not None:
175 raise ValueError(f"Component dataset type '{self._name}' constructed without parent"
176 " storage class")
177 if parentStorageClass is not None and componentName is None:
178 raise ValueError(f"Parent storage class specified by {self._name} is not a composite")
180 def __repr__(self) -> str:
181 parent = ""
182 if self._parentStorageClassName:
183 parent = f", parentStorageClass={self._parentStorageClassName}"
184 return f"DatasetType({self.name}, {self.dimensions}, {self._storageClassName}{parent})"
186 def __eq__(self, other: Any) -> bool:
187 if not isinstance(other, type(self)):
188 return False
189 if self._name != other._name:
190 return False
191 if self._dimensions != other._dimensions:
192 return False
193 if self._storageClass is not None and other._storageClass is not None:
194 if self._storageClass != other._storageClass:
195 return False
196 else:
197 if self._storageClassName != other._storageClassName:
198 return False
199 if self._parentStorageClass is not None and other._parentStorageClass is not None:
200 return self._parentStorageClass == other._parentStorageClass
201 else:
202 return self._parentStorageClassName == other._parentStorageClassName
204 def __hash__(self) -> int:
205 """Hash DatasetType instance.
207 This only uses StorageClass name which is it consistent with the
208 implementation of StorageClass hash method.
209 """
210 return hash((self._name, self._dimensions, self._storageClassName,
211 self._parentStorageClassName))
213 @property
214 def name(self) -> str:
215 """A string name for the Dataset; must correspond to the same
216 `DatasetType` across all Registries.
217 """
218 return self._name
220 @property
221 def dimensions(self) -> DimensionGraph:
222 r"""The `Dimension`\ s that label and relate instances of this
223 `DatasetType` (`DimensionGraph`).
224 """
225 return self._dimensions
227 @property
228 def storageClass(self) -> StorageClass:
229 """`StorageClass` instance that defines how this `DatasetType`
230 is persisted. Note that if DatasetType was constructed with a name
231 of a StorageClass then Butler has to be initialized before using
232 this property.
233 """
234 if self._storageClass is None:
235 self._storageClass = StorageClassFactory().getStorageClass(self._storageClassName)
236 return self._storageClass
238 @property
239 def parentStorageClass(self) -> Optional[StorageClass]:
240 """`StorageClass` instance that defines how the composite associated
241 with this `DatasetType` is persisted.
243 Note that if DatasetType was constructed with a name of a
244 StorageClass then Butler has to be initialized before using this
245 property. Can be `None` if this is not a component of a composite.
246 Must be defined if this is a component.
247 """
248 if self._parentStorageClass is None and self._parentStorageClassName is None:
249 return None
250 if self._parentStorageClass is None and self._parentStorageClassName is not None:
251 self._parentStorageClass = StorageClassFactory().getStorageClass(self._parentStorageClassName)
252 return self._parentStorageClass
254 def finalizeParentStorageClass(self, newParent: StorageClass) -> None:
255 """Replace the current placeholder parent storage class with
256 the real parent.
258 Parameters
259 ----------
260 newParent : `StorageClass`
261 The new parent to be associated with this composite dataset
262 type. This replaces the temporary placeholder parent that
263 was specified during construction.
265 Raises
266 ------
267 ValueError
268 Raised if this dataset type is not a component of a composite.
269 Raised if a StorageClass is not given.
270 Raised if the parent currently associated with the dataset
271 type is not a placeholder.
272 """
273 if not self.isComponent():
274 raise ValueError("Can not set a parent storage class if this is not a component"
275 f" ({self.name})")
276 if self._parentStorageClass != self.PlaceholderParentStorageClass:
277 raise ValueError(f"This DatasetType has a parent of {self._parentStorageClassName} and"
278 " is not a placeholder.")
279 if not isinstance(newParent, StorageClass):
280 raise ValueError(f"Supplied parent must be a StorageClass. Got {newParent!r}")
281 self._parentStorageClass = newParent
282 self._parentStorageClassName = newParent.name
284 @staticmethod
285 def splitDatasetTypeName(datasetTypeName: str) -> Tuple[str, Optional[str]]:
286 """Given a dataset type name, return the root name and the component
287 name.
289 Parameters
290 ----------
291 datasetTypeName : `str`
292 The name of the dataset type, can include a component using
293 a "."-separator.
295 Returns
296 -------
297 rootName : `str`
298 Root name without any components.
299 componentName : `str`
300 The component if it has been specified, else `None`.
302 Notes
303 -----
304 If the dataset type name is ``a.b.c`` this method will return a
305 root name of ``a`` and a component name of ``b.c``.
306 """
307 comp = None
308 root = datasetTypeName
309 if "." in root:
310 # If there is doubt, the component is after the first "."
311 root, comp = root.split(".", maxsplit=1)
312 return root, comp
314 def nameAndComponent(self) -> Tuple[str, Optional[str]]:
315 """Return the root name of this dataset type and the component
316 name (if defined).
318 Returns
319 -------
320 rootName : `str`
321 Root name for this `DatasetType` without any components.
322 componentName : `str`
323 The component if it has been specified, else `None`.
324 """
325 return self.splitDatasetTypeName(self.name)
327 def component(self) -> Optional[str]:
328 """Component name (if defined)
330 Returns
331 -------
332 comp : `str`
333 Name of component part of DatasetType name. `None` if this
334 `DatasetType` is not associated with a component.
335 """
336 _, comp = self.nameAndComponent()
337 return comp
339 def componentTypeName(self, component: str) -> str:
340 """Given a component name, derive the datasetTypeName of that component
342 Parameters
343 ----------
344 component : `str`
345 Name of component
347 Returns
348 -------
349 derived : `str`
350 Compound name of this `DatasetType` and the component.
352 Raises
353 ------
354 KeyError
355 Requested component is not supported by this `DatasetType`.
356 """
357 if component in self.storageClass.allComponents():
358 return self.nameWithComponent(self.name, component)
359 raise KeyError("Requested component ({}) not understood by this DatasetType".format(component))
361 def makeComponentDatasetType(self, component: str) -> DatasetType:
362 """Return a DatasetType suitable for the given component, assuming the
363 same dimensions as the parent.
365 Parameters
366 ----------
367 component : `str`
368 Name of component
370 Returns
371 -------
372 datasetType : `DatasetType`
373 A new DatasetType instance.
374 """
375 # The component could be a read/write or read component
376 return DatasetType(self.componentTypeName(component), dimensions=self.dimensions,
377 storageClass=self.storageClass.allComponents()[component],
378 parentStorageClass=self.storageClass)
380 def makeAllComponentDatasetTypes(self) -> List[DatasetType]:
381 """Return all the component dataset types assocaited with this
382 dataset type.
384 Returns
385 -------
386 all : `list` of `DatasetType`
387 All the component dataset types. If this is not a composite
388 then returns an empty list.
389 """
390 return [self.makeComponentDatasetType(componentName)
391 for componentName in self.storageClass.allComponents()]
393 def isComponent(self) -> bool:
394 """Boolean indicating whether this `DatasetType` refers to a
395 component of a composite.
397 Returns
398 -------
399 isComponent : `bool`
400 `True` if this `DatasetType` is a component, `False` otherwise.
401 """
402 if self.component():
403 return True
404 return False
406 def isComposite(self) -> bool:
407 """Boolean indicating whether this `DatasetType` is a composite type.
409 Returns
410 -------
411 isComposite : `bool`
412 `True` if this `DatasetType` is a composite type, `False`
413 otherwise.
414 """
415 return self.storageClass.isComposite()
417 def _lookupNames(self) -> Tuple[LookupKey, ...]:
418 """Name keys to use when looking up this datasetType in a
419 configuration.
421 The names are returned in order of priority.
423 Returns
424 -------
425 names : `tuple` of `LookupKey`
426 Tuple of the `DatasetType` name and the `StorageClass` name.
427 If the name includes a component the name with the component
428 is first, then the name without the component and finally
429 the storage class name.
430 """
431 rootName, componentName = self.nameAndComponent()
432 lookups: Tuple[LookupKey, ...] = (LookupKey(name=self.name),)
433 if componentName is not None:
434 lookups = lookups + (LookupKey(name=rootName),)
436 if self.dimensions:
437 # Dimensions are a lower priority than dataset type name
438 lookups = lookups + (LookupKey(dimensions=self.dimensions),)
440 return lookups + self.storageClass._lookupNames()
442 def __reduce__(self) -> Tuple[Type[DatasetType], Tuple[str, DimensionGraph, str, Optional[str]]]:
443 """Support pickling.
445 StorageClass instances can not normally be pickled, so we pickle
446 StorageClass name instead of instance.
447 """
448 return (DatasetType, (self.name, self.dimensions, self._storageClassName,
449 self._parentStorageClassName))
451 def __deepcopy__(self, memo: Any) -> DatasetType:
452 """Support for deep copy method.
454 Normally ``deepcopy`` will use pickle mechanism to make copies.
455 We want to avoid that to support (possibly degenerate) use case when
456 DatasetType is constructed with StorageClass instance which is not
457 registered with StorageClassFactory (this happens in unit tests).
458 Instead we re-implement ``__deepcopy__`` method.
459 """
460 return DatasetType(name=deepcopy(self.name, memo),
461 dimensions=deepcopy(self.dimensions, memo),
462 storageClass=deepcopy(self._storageClass or self._storageClassName, memo),
463 parentStorageClass=deepcopy(self._parentStorageClass
464 or self._parentStorageClassName, memo))