Coverage for python/lsst/daf/butler/core/datasets/ref.py : 33%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ["AmbiguousDatasetError", "DatasetRef"]
25from typing import (
26 Any,
27 Iterable,
28 List,
29 Optional,
30 Tuple,
31)
33from ..dimensions import DataCoordinate, DimensionGraph
34from ..configSupport import LookupKey
35from ..utils import immutable
36from ..named import NamedKeyDict
37from .type import DatasetType
40class AmbiguousDatasetError(Exception):
41 """Exception raised when a `DatasetRef` is not resolved (has no ID or run),
42 but the requested operation requires one of them.
43 """
46@immutable
47class DatasetRef:
48 """Reference to a Dataset in a `Registry`.
50 A `DatasetRef` may point to a Dataset that currently does not yet exist
51 (e.g., because it is a predicted input for provenance).
53 Parameters
54 ----------
55 datasetType : `DatasetType`
56 The `DatasetType` for this Dataset.
57 dataId : `DataCoordinate`
58 A mapping of dimensions that labels the Dataset within a Collection.
59 id : `int`, optional
60 The unique integer identifier assigned when the dataset is created.
61 run : `str`, optional
62 The name of the run this dataset was associated with when it was
63 created. Must be provided if ``id`` is.
64 conform : `bool`, optional
65 If `True` (default), call `DataCoordinate.standardize` to ensure that
66 the data ID's dimensions are consistent with the dataset type's.
67 `DatasetRef` instances for which those dimensions are not equal should
68 not be created in new code, but are still supported for backwards
69 compatibility. New code should only pass `False` if it can guarantee
70 that the dimensions are already consistent.
71 hasParentId : `bool`, optional
72 If `True` this `DatasetRef` is a component that has the ``id``
73 of the composite parent.
75 Raises
76 ------
77 ValueError
78 Raised if ``run`` is provided but ``id`` is not, or if ``id`` is
79 provided but ``run`` is not.
80 """
82 __slots__ = ("id", "datasetType", "dataId", "run", "hasParentId")
84 def __init__(
85 self,
86 datasetType: DatasetType, dataId: DataCoordinate, *,
87 id: Optional[int] = None,
88 run: Optional[str] = None,
89 hasParentId: bool = False,
90 conform: bool = True
91 ):
92 self.id = id
93 self.datasetType = datasetType
94 self.hasParentId = hasParentId
95 if conform:
96 self.dataId = DataCoordinate.standardize(dataId, graph=datasetType.dimensions)
97 else:
98 self.dataId = dataId
99 if self.id is not None:
100 if run is None:
101 raise ValueError(f"Cannot provide id without run for dataset with id={id}, "
102 f"type={datasetType}, and dataId={dataId}.")
103 self.run = run
104 else:
105 if run is not None:
106 raise ValueError("'run' cannot be provided unless 'id' is.")
107 self.run = None
109 def __eq__(self, other: Any) -> bool:
110 try:
111 return (self.datasetType, self.dataId, self.id) == (other.datasetType, other.dataId, other.id)
112 except AttributeError:
113 return NotImplemented
115 def __hash__(self) -> int:
116 return hash((self.datasetType, self.dataId, self.id))
118 @property
119 def dimensions(self) -> DimensionGraph:
120 """The dimensions associated with the underlying `DatasetType`
121 """
122 return self.datasetType.dimensions
124 def __repr__(self) -> str:
125 # We delegate to __str__ (i.e use "!s") for the data ID) below because
126 # DataCoordinate's __repr__ - while adhering to the guidelines for
127 # __repr__ - is much harder to users to read, while its __str__ just
128 # produces a dict that can also be passed to DatasetRef's constructor.
129 if self.id is not None:
130 return (f"DatasetRef({self.datasetType!r}, {self.dataId!s}, id={self.id}, run={self.run!r})")
131 else:
132 return f"DatasetRef({self.datasetType!r}, {self.dataId!s})"
134 def __str__(self) -> str:
135 s = f"{self.datasetType.name}@{self.dataId!s}, sc={self.datasetType.storageClass.name}]"
136 if self.id is not None:
137 s += f" (id={self.id})"
138 return s
140 def __lt__(self, other: Any) -> bool:
141 # Sort by run, DatasetType name and then by DataCoordinate
142 # The __str__ representation is probably close enough but we
143 # need to ensure that sorting a DatasetRef matches what you would
144 # get if you sorted DatasetType+DataCoordinate
145 if not isinstance(other, type(self)):
146 return NotImplemented
148 # Group by run if defined, takes precedence over DatasetType
149 self_run = "" if self.run is None else self.run
150 other_run = "" if other.run is None else other.run
152 # Compare tuples in the priority order
153 return (self_run, self.datasetType, self.dataId) < (other_run, other.datasetType, other.dataId)
155 @classmethod
156 def _unpickle(
157 cls,
158 datasetType: DatasetType,
159 dataId: DataCoordinate,
160 id: Optional[int],
161 run: Optional[str],
162 hasParentId: bool,
163 ) -> DatasetRef:
164 """A custom factory method for use by `__reduce__` as a workaround for
165 its lack of support for keyword arguments.
166 """
167 return cls(datasetType, dataId, id=id, run=run, hasParentId=hasParentId)
169 def __reduce__(self) -> tuple:
170 return (self._unpickle, (self.datasetType, self.dataId, self.id, self.run, self.hasParentId))
172 def __deepcopy__(self, memo: dict) -> DatasetRef:
173 # DatasetRef is recursively immutable; see note in @immutable
174 # decorator.
175 return self
177 def resolved(self, id: int, run: str) -> DatasetRef:
178 """Return a new `DatasetRef` with the same data ID and dataset type
179 and the given ID and run.
181 Parameters
182 ----------
183 id : `int`
184 The unique integer identifier assigned when the dataset is created.
185 run : `str`
186 The run this dataset was associated with when it was created.
188 Returns
189 -------
190 ref : `DatasetRef`
191 A new `DatasetRef`.
192 """
193 return DatasetRef(datasetType=self.datasetType, dataId=self.dataId,
194 id=id, run=run, conform=False)
196 def unresolved(self) -> DatasetRef:
197 """Return a new `DatasetRef` with the same data ID and dataset type,
198 but no ID or run.
200 Returns
201 -------
202 ref : `DatasetRef`
203 A new `DatasetRef`.
205 Notes
206 -----
207 This can be used to compare only the data ID and dataset type of a
208 pair of `DatasetRef` instances, regardless of whether either is
209 resolved::
211 if ref1.unresolved() == ref2.unresolved():
212 ...
213 """
214 return DatasetRef(datasetType=self.datasetType, dataId=self.dataId, conform=False)
216 def expanded(self, dataId: DataCoordinate) -> DatasetRef:
217 """Return a new `DatasetRef` with the given expanded data ID.
219 Parameters
220 ----------
221 dataId : `DataCoordinate`
222 Data ID for the new `DatasetRef`. Must compare equal to the
223 original data ID.
225 Returns
226 -------
227 ref : `DatasetRef`
228 A new `DatasetRef` with the given data ID.
229 """
230 assert dataId == self.dataId
231 return DatasetRef(datasetType=self.datasetType, dataId=dataId,
232 id=self.id, run=self.run,
233 conform=False)
235 def isComponent(self) -> bool:
236 """Boolean indicating whether this `DatasetRef` refers to a
237 component of a composite.
239 Returns
240 -------
241 isComponent : `bool`
242 `True` if this `DatasetRef` is a component, `False` otherwise.
243 """
244 return self.datasetType.isComponent()
246 def isComposite(self) -> bool:
247 """Boolean indicating whether this `DatasetRef` is a composite type.
249 Returns
250 -------
251 isComposite : `bool`
252 `True` if this `DatasetRef` is a composite type, `False`
253 otherwise.
254 """
255 return self.datasetType.isComposite()
257 def _lookupNames(self) -> Tuple[LookupKey, ...]:
258 """Name keys to use when looking up this DatasetRef in a configuration.
260 The names are returned in order of priority.
262 Returns
263 -------
264 names : `tuple` of `LookupKey`
265 Tuple of the `DatasetType` name and the `StorageClass` name.
266 If ``instrument`` is defined in the dataId, each of those names
267 is added to the start of the tuple with a key derived from the
268 value of ``instrument``.
269 """
270 # Special case the instrument Dimension since we allow configs
271 # to include the instrument name in the hierarchy.
272 names: Tuple[LookupKey, ...] = self.datasetType._lookupNames()
274 # mypy doesn't think this could return True, because even though
275 # __contains__ can take an object of any type, it seems hard-coded to
276 # assume it will return False if the type doesn't match the key type
277 # of the Mapping.
278 if "instrument" in self.dataId: # type: ignore
279 names = tuple(n.clone(dataId={"instrument": self.dataId["instrument"]})
280 for n in names) + names
282 return names
284 @staticmethod
285 def groupByType(refs: Iterable[DatasetRef]) -> NamedKeyDict[DatasetType, List[DatasetRef]]:
286 """Group an iterable of `DatasetRef` by `DatasetType`.
288 Parameters
289 ----------
290 refs : `Iterable` [ `DatasetRef` ]
291 `DatasetRef` instances to group.
293 Returns
294 -------
295 grouped : `NamedKeyDict` [ `DatasetType`, `list` [ `DatasetRef` ] ]
296 Grouped `DatasetRef` instances.
297 """
298 result: NamedKeyDict[DatasetType, List[DatasetRef]] = NamedKeyDict()
299 for ref in refs:
300 result.setdefault(ref.datasetType, []).append(ref)
301 return result
303 def getCheckedId(self) -> int:
304 """Return ``self.id``, or raise if it is `None`.
306 This trivial method exists to allow operations that would otherwise be
307 natural list comprehensions to check that the ID is not `None` as well.
309 Returns
310 -------
311 id : `int`
312 ``self.id`` if it is not `None`.
314 Raises
315 ------
316 AmbiguousDatasetError
317 Raised if ``ref.id`` is `None`.
318 """
319 if self.id is None:
320 raise AmbiguousDatasetError(f"ID for dataset {self} is `None`; "
321 f"a resolved reference is required.")
322 return self.id
324 def makeComponentRef(self, name: str) -> DatasetRef:
325 """Create a `DatasetRef` that corresponds to a component of this
326 dataset.
328 Parameters
329 ----------
330 name : `str`
331 Name of the component.
333 Returns
334 -------
335 ref : `DatasetRef`
336 A `DatasetRef` with a dataset type that corresponds to the given
337 component, with ``hasParentId=True``, and the same ID and run
338 (which may be `None`, if they are `None` in ``self``).
339 """
340 return DatasetRef(self.datasetType.makeComponentDatasetType(name), self.dataId,
341 id=self.id, run=self.run, hasParentId=True)
343 datasetType: DatasetType
344 """The definition of this dataset (`DatasetType`).
346 Cannot be changed after a `DatasetRef` is constructed.
347 """
349 dataId: DataCoordinate
350 """A mapping of `Dimension` primary key values that labels the dataset
351 within a Collection (`DataCoordinate`).
353 Cannot be changed after a `DatasetRef` is constructed.
354 """
356 run: Optional[str]
357 """The name of the run that produced the dataset.
359 Cannot be changed after a `DatasetRef` is constructed; use `resolved` or
360 `unresolved` to add or remove this information when creating a new
361 `DatasetRef`.
362 """
364 id: Optional[int]
365 """Primary key of the dataset (`int` or `None`).
367 Cannot be changed after a `DatasetRef` is constructed; use `resolved` or
368 `unresolved` to add or remove this information when creating a new
369 `DatasetRef`.
370 """