Coverage for python/lsst/daf/butler/core/datasets/type.py : 26%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ["DatasetType"]
26from copy import deepcopy
27import re
29from types import MappingProxyType
31from typing import (
32 TYPE_CHECKING,
33 Any,
34 Iterable,
35 Mapping,
36 Optional,
37 Tuple,
38 Type,
39 Union,
40)
43from ..storageClass import StorageClass, StorageClassFactory
44from ..dimensions import DimensionGraph
45from ..configSupport import LookupKey
47if TYPE_CHECKING: 47 ↛ 48line 47 didn't jump to line 48, because the condition on line 47 was never true
48 from ..dimensions import Dimension, DimensionUniverse
51def _safeMakeMappingProxyType(data: Optional[Mapping]) -> Mapping:
52 if data is None:
53 data = {}
54 return MappingProxyType(data)
57class DatasetType:
58 r"""A named category of Datasets that defines how they are organized,
59 related, and stored.
61 A concrete, final class whose instances represent `DatasetType`\ s.
62 `DatasetType` instances may be constructed without a `Registry`,
63 but they must be registered
64 via `Registry.registerDatasetType()` before corresponding Datasets
65 may be added.
66 `DatasetType` instances are immutable.
68 Parameters
69 ----------
70 name : `str`
71 A string name for the Dataset; must correspond to the same
72 `DatasetType` across all Registries. Names must start with an
73 upper or lowercase letter, and may contain only letters, numbers,
74 and underscores. Component dataset types should contain a single
75 period separating the base dataset type name from the component name
76 (and may be recursive).
77 dimensions : `DimensionGraph` or iterable of `Dimension`
78 Dimensions used to label and relate instances of this `DatasetType`.
79 If not a `DimensionGraph`, ``universe`` must be provided as well.
80 storageClass : `StorageClass` or `str`
81 Instance of a `StorageClass` or name of `StorageClass` that defines
82 how this `DatasetType` is persisted.
83 universe : `DimensionUniverse`, optional
84 Set of all known dimensions, used to normalize ``dimensions`` if it
85 is not already a `DimensionGraph`.
86 """
88 __slots__ = ("_name", "_dimensions", "_storageClass", "_storageClassName")
90 VALID_NAME_REGEX = re.compile("^[a-zA-Z][a-zA-Z0-9_]*(\\.[a-zA-Z][a-zA-Z0-9_]*)*$")
92 @staticmethod
93 def nameWithComponent(datasetTypeName: str, componentName: str) -> str:
94 """Form a valid DatasetTypeName from a parent and component.
96 No validation is performed.
98 Parameters
99 ----------
100 datasetTypeName : `str`
101 Base type name.
102 componentName : `str`
103 Name of component.
105 Returns
106 -------
107 compTypeName : `str`
108 Name to use for component DatasetType.
109 """
110 return "{}.{}".format(datasetTypeName, componentName)
112 def __init__(self, name: str, dimensions: Union[DimensionGraph, Iterable[Dimension]],
113 storageClass: Union[StorageClass, str],
114 *, universe: DimensionUniverse = None):
115 if self.VALID_NAME_REGEX.match(name) is None:
116 raise ValueError(f"DatasetType name '{name}' is invalid.")
117 self._name = name
118 if not isinstance(dimensions, DimensionGraph):
119 if universe is None:
120 raise ValueError("If dimensions is not a normalized DimensionGraph, "
121 "a universe must be provided.")
122 dimensions = universe.extract(dimensions)
123 self._dimensions = dimensions
124 assert isinstance(storageClass, (StorageClass, str))
125 self._storageClass: Optional[StorageClass]
126 if isinstance(storageClass, StorageClass):
127 self._storageClass = storageClass
128 self._storageClassName = storageClass.name
129 else:
130 self._storageClass = None
131 self._storageClassName = storageClass
133 def __repr__(self) -> str:
134 return "DatasetType({}, {}, {})".format(self.name, self.dimensions, self._storageClassName)
136 def __eq__(self, other: Any) -> bool:
137 if not isinstance(other, type(self)):
138 return False
139 if self._name != other._name:
140 return False
141 if self._dimensions != other._dimensions:
142 return False
143 if self._storageClass is not None and other._storageClass is not None:
144 return self._storageClass == other._storageClass
145 else:
146 return self._storageClassName == other._storageClassName
148 def __hash__(self) -> int:
149 """Hash DatasetType instance.
151 This only uses StorageClass name which is it consistent with the
152 implementation of StorageClass hash method.
153 """
154 return hash((self._name, self._dimensions, self._storageClassName))
156 @property
157 def name(self) -> str:
158 """A string name for the Dataset; must correspond to the same
159 `DatasetType` across all Registries.
160 """
161 return self._name
163 @property
164 def dimensions(self) -> DimensionGraph:
165 r"""The `Dimension`\ s that label and relate instances of this
166 `DatasetType` (`DimensionGraph`).
167 """
168 return self._dimensions
170 @property
171 def storageClass(self) -> StorageClass:
172 """`StorageClass` instance that defines how this `DatasetType`
173 is persisted. Note that if DatasetType was constructed with a name
174 of a StorageClass then Butler has to be initialized before using
175 this property.
176 """
177 if self._storageClass is None:
178 self._storageClass = StorageClassFactory().getStorageClass(self._storageClassName)
179 return self._storageClass
181 @staticmethod
182 def splitDatasetTypeName(datasetTypeName: str) -> Tuple[str, Optional[str]]:
183 """Given a dataset type name, return the root name and the component
184 name.
186 Parameters
187 ----------
188 datasetTypeName : `str`
189 The name of the dataset type, can include a component using
190 a "."-separator.
192 Returns
193 -------
194 rootName : `str`
195 Root name without any components.
196 componentName : `str`
197 The component if it has been specified, else `None`.
199 Notes
200 -----
201 If the dataset type name is ``a.b.c`` this method will return a
202 root name of ``a`` and a component name of ``b.c``.
203 """
204 comp = None
205 root = datasetTypeName
206 if "." in root:
207 # If there is doubt, the component is after the first "."
208 root, comp = root.split(".", maxsplit=1)
209 return root, comp
211 def nameAndComponent(self) -> Tuple[str, Optional[str]]:
212 """Return the root name of this dataset type and the component
213 name (if defined).
215 Returns
216 -------
217 rootName : `str`
218 Root name for this `DatasetType` without any components.
219 componentName : `str`
220 The component if it has been specified, else `None`.
221 """
222 return self.splitDatasetTypeName(self.name)
224 def component(self) -> Optional[str]:
225 """Component name (if defined)
227 Returns
228 -------
229 comp : `str`
230 Name of component part of DatasetType name. `None` if this
231 `DatasetType` is not associated with a component.
232 """
233 _, comp = self.nameAndComponent()
234 return comp
236 def componentTypeName(self, component: str) -> str:
237 """Given a component name, derive the datasetTypeName of that component
239 Parameters
240 ----------
241 component : `str`
242 Name of component
244 Returns
245 -------
246 derived : `str`
247 Compound name of this `DatasetType` and the component.
249 Raises
250 ------
251 KeyError
252 Requested component is not supported by this `DatasetType`.
253 """
254 if component in self.storageClass.components:
255 return self.nameWithComponent(self.name, component)
256 raise KeyError("Requested component ({}) not understood by this DatasetType".format(component))
258 def makeComponentDatasetType(self, component: str) -> DatasetType:
259 """Return a DatasetType suitable for the given component, assuming the
260 same dimensions as the parent.
262 Parameters
263 ----------
264 component : `str`
265 Name of component
267 Returns
268 -------
269 datasetType : `DatasetType`
270 A new DatasetType instance.
271 """
272 return DatasetType(self.componentTypeName(component), dimensions=self.dimensions,
273 storageClass=self.storageClass.components[component])
275 def isComponent(self) -> bool:
276 """Boolean indicating whether this `DatasetType` refers to a
277 component of a composite.
279 Returns
280 -------
281 isComponent : `bool`
282 `True` if this `DatasetType` is a component, `False` otherwise.
283 """
284 if self.component():
285 return True
286 return False
288 def isComposite(self) -> bool:
289 """Boolean indicating whether this `DatasetType` is a composite type.
291 Returns
292 -------
293 isComposite : `bool`
294 `True` if this `DatasetType` is a composite type, `False`
295 otherwise.
296 """
297 return self.storageClass.isComposite()
299 def _lookupNames(self) -> Tuple[LookupKey, ...]:
300 """Name keys to use when looking up this datasetType in a
301 configuration.
303 The names are returned in order of priority.
305 Returns
306 -------
307 names : `tuple` of `LookupKey`
308 Tuple of the `DatasetType` name and the `StorageClass` name.
309 If the name includes a component the name with the component
310 is first, then the name without the component and finally
311 the storage class name.
312 """
313 rootName, componentName = self.nameAndComponent()
314 lookups: Tuple[LookupKey, ...] = (LookupKey(name=self.name),)
315 if componentName is not None:
316 lookups = lookups + (LookupKey(name=rootName),)
318 if self.dimensions:
319 # Dimensions are a lower priority than dataset type name
320 lookups = lookups + (LookupKey(dimensions=self.dimensions),)
322 return lookups + self.storageClass._lookupNames()
324 def __reduce__(self) -> Tuple[Type[DatasetType], Tuple[str, DimensionGraph, str]]:
325 """Support pickling.
327 StorageClass instances can not normally be pickled, so we pickle
328 StorageClass name instead of instance.
329 """
330 return (DatasetType, (self.name, self.dimensions, self._storageClassName))
332 def __deepcopy__(self, memo: Any) -> DatasetType:
333 """Support for deep copy method.
335 Normally ``deepcopy`` will use pickle mechanism to make copies.
336 We want to avoid that to support (possibly degenerate) use case when
337 DatasetType is constructed with StorageClass instance which is not
338 registered with StorageClassFactory (this happens in unit tests).
339 Instead we re-implement ``__deepcopy__`` method.
340 """
341 return DatasetType(name=deepcopy(self.name, memo),
342 dimensions=deepcopy(self.dimensions, memo),
343 storageClass=deepcopy(self._storageClass or self._storageClassName, memo))