Coverage for python/lsst/daf/butler/core/datasets/ref.py : 32%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ["AmbiguousDatasetError", "DatasetRef"]
25from typing import (
26 Any,
27 Dict,
28 Iterable,
29 List,
30 Optional,
31 Tuple,
32)
34from ..dimensions import DataCoordinate, DimensionGraph
35from ..configSupport import LookupKey
36from ..utils import immutable
37from ..named import NamedKeyDict
38from .type import DatasetType
41class AmbiguousDatasetError(Exception):
42 """Exception raised when a `DatasetRef` is not resolved (has no ID or run),
43 but the requested operation requires one of them.
44 """
47@immutable
48class DatasetRef:
49 """Reference to a Dataset in a `Registry`.
51 A `DatasetRef` may point to a Dataset that currently does not yet exist
52 (e.g., because it is a predicted input for provenance).
54 Parameters
55 ----------
56 datasetType : `DatasetType`
57 The `DatasetType` for this Dataset.
58 dataId : `DataCoordinate`
59 A mapping of dimensions that labels the Dataset within a Collection.
60 id : `int`, optional
61 The unique integer identifier assigned when the dataset is created.
62 run : `str`, optional
63 The name of the run this dataset was associated with when it was
64 created. Must be provided if ``id`` is.
65 conform : `bool`, optional
66 If `True` (default), call `DataCoordinate.standardize` to ensure that
67 the data ID's dimensions are consistent with the dataset type's.
68 `DatasetRef` instances for which those dimensions are not equal should
69 not be created in new code, but are still supported for backwards
70 compatibility. New code should only pass `False` if it can guarantee
71 that the dimensions are already consistent.
72 hasParentId : `bool`, optional
73 If `True` this `DatasetRef` is a component that has the ``id``
74 of the composite parent.
76 Raises
77 ------
78 ValueError
79 Raised if ``run`` is provided but ``id`` is not, or if ``id`` is
80 provided but ``run`` is not.
81 """
83 __slots__ = ("id", "datasetType", "dataId", "run", "hasParentId")
85 def __new__(cls, datasetType: DatasetType, dataId: DataCoordinate, *,
86 id: Optional[int] = None,
87 run: Optional[str] = None,
88 hasParentId: bool = False,
89 conform: bool = True) -> DatasetRef:
90 self = super().__new__(cls)
91 assert isinstance(datasetType, DatasetType)
92 self.id = id
93 self.datasetType = datasetType
94 self.hasParentId = hasParentId
95 if conform:
96 self.dataId = DataCoordinate.standardize(dataId, graph=datasetType.dimensions)
97 else:
98 self.dataId = dataId
99 if self.id is not None:
100 if run is None:
101 raise ValueError(f"Cannot provide id without run for dataset with id={id}, "
102 f"type={datasetType}, and dataId={dataId}.")
103 self.run = run
104 else:
105 if run is not None:
106 raise ValueError("'run' cannot be provided unless 'id' is.")
107 self.run = None
108 return self
110 def __eq__(self, other: Any) -> bool:
111 try:
112 return (self.datasetType, self.dataId, self.id) == (other.datasetType, other.dataId, other.id)
113 except AttributeError:
114 return NotImplemented
116 def __hash__(self) -> int:
117 return hash((self.datasetType, self.dataId, self.id))
119 @property
120 def dimensions(self) -> DimensionGraph:
121 """The dimensions associated with the underlying `DatasetType`
122 """
123 return self.datasetType.dimensions
125 def __repr__(self) -> str:
126 # We delegate to __str__ (i.e use "!s") for the data ID) below because
127 # DataCoordinate's __repr__ - while adhering to the guidelines for
128 # __repr__ - is much harder to users to read, while its __str__ just
129 # produces a dict that can also be passed to DatasetRef's constructor.
130 if self.id is not None:
131 return (f"DatasetRef({self.datasetType!r}, {self.dataId!s}, id={self.id}, run={self.run!r})")
132 else:
133 return f"DatasetRef({self.datasetType!r}, {self.dataId!s})"
135 def __str__(self) -> str:
136 s = f"{self.datasetType.name}@{self.dataId!s}, sc={self.datasetType.storageClass.name}]"
137 if self.id is not None:
138 s += f" (id={self.id})"
139 return s
141 def __lt__(self, other: Any) -> bool:
142 # Sort by run, DatasetType name and then by DataCoordinate
143 # The __str__ representation is probably close enough but we
144 # need to ensure that sorting a DatasetRef matches what you would
145 # get if you sorted DatasetType+DataCoordinate
146 if not isinstance(other, type(self)):
147 return NotImplemented
149 # Group by run if defined, takes precedence over DatasetType
150 self_run = "" if self.run is None else self.run
151 other_run = "" if other.run is None else other.run
153 # Compare tuples in the priority order
154 return (self_run, self.datasetType, self.dataId) < (other_run, other.datasetType, other.dataId)
156 def __getnewargs_ex__(self) -> Tuple[Tuple[Any, ...], Dict[str, Any]]:
157 return ((self.datasetType, self.dataId), {"id": self.id, "run": self.run})
159 def resolved(self, id: int, run: str) -> DatasetRef:
160 """Return a new `DatasetRef` with the same data ID and dataset type
161 and the given ID and run.
163 Parameters
164 ----------
165 id : `int`
166 The unique integer identifier assigned when the dataset is created.
167 run : `str`
168 The run this dataset was associated with when it was created.
170 Returns
171 -------
172 ref : `DatasetRef`
173 A new `DatasetRef`.
174 """
175 return DatasetRef(datasetType=self.datasetType, dataId=self.dataId,
176 id=id, run=run, conform=False)
178 def unresolved(self) -> DatasetRef:
179 """Return a new `DatasetRef` with the same data ID and dataset type,
180 but no ID or run.
182 Returns
183 -------
184 ref : `DatasetRef`
185 A new `DatasetRef`.
187 Notes
188 -----
189 This can be used to compare only the data ID and dataset type of a
190 pair of `DatasetRef` instances, regardless of whether either is
191 resolved::
193 if ref1.unresolved() == ref2.unresolved():
194 ...
195 """
196 return DatasetRef(datasetType=self.datasetType, dataId=self.dataId, conform=False)
198 def expanded(self, dataId: DataCoordinate) -> DatasetRef:
199 """Return a new `DatasetRef` with the given expanded data ID.
201 Parameters
202 ----------
203 dataId : `DataCoordinate`
204 Data ID for the new `DatasetRef`. Must compare equal to the
205 original data ID.
207 Returns
208 -------
209 ref : `DatasetRef`
210 A new `DatasetRef` with the given data ID.
211 """
212 assert dataId == self.dataId
213 return DatasetRef(datasetType=self.datasetType, dataId=dataId,
214 id=self.id, run=self.run,
215 conform=False)
217 def isComponent(self) -> bool:
218 """Boolean indicating whether this `DatasetRef` refers to a
219 component of a composite.
221 Returns
222 -------
223 isComponent : `bool`
224 `True` if this `DatasetRef` is a component, `False` otherwise.
225 """
226 return self.datasetType.isComponent()
228 def isComposite(self) -> bool:
229 """Boolean indicating whether this `DatasetRef` is a composite type.
231 Returns
232 -------
233 isComposite : `bool`
234 `True` if this `DatasetRef` is a composite type, `False`
235 otherwise.
236 """
237 return self.datasetType.isComposite()
239 def _lookupNames(self) -> Tuple[LookupKey, ...]:
240 """Name keys to use when looking up this DatasetRef in a configuration.
242 The names are returned in order of priority.
244 Returns
245 -------
246 names : `tuple` of `LookupKey`
247 Tuple of the `DatasetType` name and the `StorageClass` name.
248 If ``instrument`` is defined in the dataId, each of those names
249 is added to the start of the tuple with a key derived from the
250 value of ``instrument``.
251 """
252 # Special case the instrument Dimension since we allow configs
253 # to include the instrument name in the hierarchy.
254 names: Tuple[LookupKey, ...] = self.datasetType._lookupNames()
256 # mypy doesn't think this could return True, because even though
257 # __contains__ can take an object of any type, it seems hard-coded to
258 # assume it will return False if the type doesn't match the key type
259 # of the Mapping.
260 if "instrument" in self.dataId: # type: ignore
261 names = tuple(n.clone(dataId={"instrument": self.dataId["instrument"]})
262 for n in names) + names
264 return names
266 @staticmethod
267 def groupByType(refs: Iterable[DatasetRef]) -> NamedKeyDict[DatasetType, List[DatasetRef]]:
268 """Group an iterable of `DatasetRef` by `DatasetType`.
270 Parameters
271 ----------
272 refs : `Iterable` [ `DatasetRef` ]
273 `DatasetRef` instances to group.
275 Returns
276 -------
277 grouped : `NamedKeyDict` [ `DatasetType`, `list` [ `DatasetRef` ] ]
278 Grouped `DatasetRef` instances.
279 """
280 result: NamedKeyDict[DatasetType, List[DatasetRef]] = NamedKeyDict()
281 for ref in refs:
282 result.setdefault(ref.datasetType, []).append(ref)
283 return result
285 def getCheckedId(self) -> int:
286 """Return ``self.id``, or raise if it is `None`.
288 This trivial method exists to allow operations that would otherwise be
289 natural list comprehensions to check that the ID is not `None` as well.
291 Returns
292 -------
293 id : `int`
294 ``self.id`` if it is not `None`.
296 Raises
297 ------
298 AmbiguousDatasetError
299 Raised if ``ref.id`` is `None`.
300 """
301 if self.id is None:
302 raise AmbiguousDatasetError(f"ID for dataset {self} is `None`; "
303 f"a resolved reference is required.")
304 return self.id
306 def makeComponentRef(self, name: str) -> DatasetRef:
307 """Create a `DatasetRef` that corresponds to a component of this
308 dataset.
310 Parameters
311 ----------
312 name : `str`
313 Name of the component.
315 Returns
316 -------
317 ref : `DatasetRef`
318 A `DatasetRef` with a dataset type that corresponds to the given
319 component, with ``hasParentId=True``, and the same ID and run
320 (which may be `None`, if they are `None` in ``self``).
321 """
322 return DatasetRef(self.datasetType.makeComponentDatasetType(name), self.dataId,
323 id=self.id, run=self.run, hasParentId=True)
325 datasetType: DatasetType
326 """The definition of this dataset (`DatasetType`).
328 Cannot be changed after a `DatasetRef` is constructed.
329 """
331 dataId: DataCoordinate
332 """A mapping of `Dimension` primary key values that labels the dataset
333 within a Collection (`DataCoordinate`).
335 Cannot be changed after a `DatasetRef` is constructed.
336 """
338 run: Optional[str]
339 """The name of the run that produced the dataset.
341 Cannot be changed after a `DatasetRef` is constructed; use `resolved` or
342 `unresolved` to add or remove this information when creating a new
343 `DatasetRef`.
344 """
346 id: Optional[int]
347 """Primary key of the dataset (`int` or `None`).
349 Cannot be changed after a `DatasetRef` is constructed; use `resolved` or
350 `unresolved` to add or remove this information when creating a new
351 `DatasetRef`.
352 """