Coverage for python/lsst/daf/butler/core/datasets/ref.py : 33%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ["AmbiguousDatasetError", "DatasetRef"]
25from typing import (
26 Any,
27 Dict,
28 Iterable,
29 List,
30 Optional,
31 Tuple,
32)
34from ..dimensions import DataCoordinate, DimensionGraph
35from ..configSupport import LookupKey
36from ..utils import immutable
37from ..named import NamedKeyDict
38from .type import DatasetType
41class AmbiguousDatasetError(Exception):
42 """Exception raised when a `DatasetRef` is not resolved (has no ID or run),
43 but the requested operation requires one of them.
44 """
47@immutable
48class DatasetRef:
49 """Reference to a Dataset in a `Registry`.
51 A `DatasetRef` may point to a Dataset that currently does not yet exist
52 (e.g., because it is a predicted input for provenance).
54 Parameters
55 ----------
56 datasetType : `DatasetType`
57 The `DatasetType` for this Dataset.
58 dataId : `DataCoordinate`
59 A mapping of dimensions that labels the Dataset within a Collection.
60 id : `int`, optional
61 The unique integer identifier assigned when the dataset is created.
62 run : `str`, optional
63 The name of the run this dataset was associated with when it was
64 created. Must be provided if ``id`` is.
65 conform : `bool`, optional
66 If `True` (default), call `DataCoordinate.standardize` to ensure that
67 the data ID's dimensions are consistent with the dataset type's.
68 `DatasetRef` instances for which those dimensions are not equal should
69 not be created in new code, but are still supported for backwards
70 compatibility. New code should only pass `False` if it can guarantee
71 that the dimensions are already consistent.
72 hasParentId : `bool`, optional
73 If `True` this `DatasetRef` is a component that has the ``id``
74 of the composite parent.
76 Raises
77 ------
78 ValueError
79 Raised if ``run`` is provided but ``id`` is not, or if ``id`` is
80 provided but ``run`` is not.
81 """
83 __slots__ = ("id", "datasetType", "dataId", "run", "hasParentId")
85 def __new__(cls, datasetType: DatasetType, dataId: DataCoordinate, *,
86 id: Optional[int] = None,
87 run: Optional[str] = None,
88 hasParentId: bool = False,
89 conform: bool = True) -> DatasetRef:
90 self = super().__new__(cls)
91 assert isinstance(datasetType, DatasetType)
92 self.id = id
93 self.datasetType = datasetType
94 self.hasParentId = hasParentId
95 if conform:
96 self.dataId = DataCoordinate.standardize(dataId, graph=datasetType.dimensions)
97 else:
98 self.dataId = dataId
99 if self.id is not None:
100 if run is None:
101 raise ValueError(f"Cannot provide id without run for dataset with id={id}, "
102 f"type={datasetType}, and dataId={dataId}.")
103 self.run = run
104 else:
105 if run is not None:
106 raise ValueError("'run' cannot be provided unless 'id' is.")
107 self.run = None
108 return self
110 def __eq__(self, other: Any) -> bool:
111 try:
112 return (self.datasetType, self.dataId, self.id) == (other.datasetType, other.dataId, other.id)
113 except AttributeError:
114 return NotImplemented
116 def __hash__(self) -> int:
117 return hash((self.datasetType, self.dataId, self.id))
119 @property
120 def dimensions(self) -> DimensionGraph:
121 """The dimensions associated with the underlying `DatasetType`
122 """
123 return self.datasetType.dimensions
125 def __repr__(self) -> str:
126 # We delegate to __str__ (i.e use "!s") for the data ID) below because
127 # DataCoordinate's __repr__ - while adhering to the guidelines for
128 # __repr__ - is much harder to users to read, while its __str__ just
129 # produces a dict that can also be passed to DatasetRef's constructor.
130 if self.id is not None:
131 return (f"DatasetRef({self.datasetType!r}, {self.dataId!s}, id={self.id}, run={self.run!r})")
132 else:
133 return f"DatasetRef({self.datasetType!r}, {self.dataId!s})"
135 def __str__(self) -> str:
136 s = f"{self.datasetType.name}@{self.dataId!s}, sc={self.datasetType.storageClass.name}]"
137 if self.id is not None:
138 s += f" (id={self.id})"
139 return s
141 def __getnewargs_ex__(self) -> Tuple[Tuple[Any, ...], Dict[str, Any]]:
142 return ((self.datasetType, self.dataId), {"id": self.id, "run": self.run})
144 def resolved(self, id: int, run: str) -> DatasetRef:
145 """Return a new `DatasetRef` with the same data ID and dataset type
146 and the given ID and run.
148 Parameters
149 ----------
150 id : `int`
151 The unique integer identifier assigned when the dataset is created.
152 run : `str`
153 The run this dataset was associated with when it was created.
155 Returns
156 -------
157 ref : `DatasetRef`
158 A new `DatasetRef`.
159 """
160 return DatasetRef(datasetType=self.datasetType, dataId=self.dataId,
161 id=id, run=run, conform=False)
163 def unresolved(self) -> DatasetRef:
164 """Return a new `DatasetRef` with the same data ID and dataset type,
165 but no ID or run.
167 Returns
168 -------
169 ref : `DatasetRef`
170 A new `DatasetRef`.
172 Notes
173 -----
174 This can be used to compare only the data ID and dataset type of a
175 pair of `DatasetRef` instances, regardless of whether either is
176 resolved::
178 if ref1.unresolved() == ref2.unresolved():
179 ...
180 """
181 return DatasetRef(datasetType=self.datasetType, dataId=self.dataId, conform=False)
183 def expanded(self, dataId: DataCoordinate) -> DatasetRef:
184 """Return a new `DatasetRef` with the given expanded data ID.
186 Parameters
187 ----------
188 dataId : `DataCoordinate`
189 Data ID for the new `DatasetRef`. Must compare equal to the
190 original data ID.
192 Returns
193 -------
194 ref : `DatasetRef`
195 A new `DatasetRef` with the given data ID.
196 """
197 assert dataId == self.dataId
198 return DatasetRef(datasetType=self.datasetType, dataId=dataId,
199 id=self.id, run=self.run,
200 conform=False)
202 def isComponent(self) -> bool:
203 """Boolean indicating whether this `DatasetRef` refers to a
204 component of a composite.
206 Returns
207 -------
208 isComponent : `bool`
209 `True` if this `DatasetRef` is a component, `False` otherwise.
210 """
211 return self.datasetType.isComponent()
213 def isComposite(self) -> bool:
214 """Boolean indicating whether this `DatasetRef` is a composite type.
216 Returns
217 -------
218 isComposite : `bool`
219 `True` if this `DatasetRef` is a composite type, `False`
220 otherwise.
221 """
222 return self.datasetType.isComposite()
224 def _lookupNames(self) -> Tuple[LookupKey, ...]:
225 """Name keys to use when looking up this DatasetRef in a configuration.
227 The names are returned in order of priority.
229 Returns
230 -------
231 names : `tuple` of `LookupKey`
232 Tuple of the `DatasetType` name and the `StorageClass` name.
233 If ``instrument`` is defined in the dataId, each of those names
234 is added to the start of the tuple with a key derived from the
235 value of ``instrument``.
236 """
237 # Special case the instrument Dimension since we allow configs
238 # to include the instrument name in the hierarchy.
239 names: Tuple[LookupKey, ...] = self.datasetType._lookupNames()
241 # mypy doesn't think this could return True, because even though
242 # __contains__ can take an object of any type, it seems hard-coded to
243 # assume it will return False if the type doesn't match the key type
244 # of the Mapping.
245 if "instrument" in self.dataId: # type: ignore
246 names = tuple(n.clone(dataId={"instrument": self.dataId["instrument"]})
247 for n in names) + names
249 return names
251 @staticmethod
252 def groupByType(refs: Iterable[DatasetRef]) -> NamedKeyDict[DatasetType, List[DatasetRef]]:
253 """Group an iterable of `DatasetRef` by `DatasetType`.
255 Parameters
256 ----------
257 refs : `Iterable` [ `DatasetRef` ]
258 `DatasetRef` instances to group.
260 Returns
261 -------
262 grouped : `NamedKeyDict` [ `DatasetType`, `list` [ `DatasetRef` ] ]
263 Grouped `DatasetRef` instances.
264 """
265 result: NamedKeyDict[DatasetType, List[DatasetRef]] = NamedKeyDict()
266 for ref in refs:
267 result.setdefault(ref.datasetType, []).append(ref)
268 return result
270 def getCheckedId(self) -> int:
271 """Return ``self.id``, or raise if it is `None`.
273 This trivial method exists to allow operations that would otherwise be
274 natural list comprehensions to check that the ID is not `None` as well.
276 Returns
277 -------
278 id : `int`
279 ``self.id`` if it is not `None`.
281 Raises
282 ------
283 AmbiguousDatasetError
284 Raised if ``ref.id`` is `None`.
285 """
286 if self.id is None:
287 raise AmbiguousDatasetError(f"ID for dataset {self} is `None`; "
288 f"a resolved reference is required.")
289 return self.id
291 def makeComponentRef(self, name: str) -> DatasetRef:
292 """Create a `DatasetRef` that corresponds to a component of this
293 dataset.
295 Parameters
296 ----------
297 name : `str`
298 Name of the component.
300 Returns
301 -------
302 ref : `DatasetRef`
303 A `DatasetRef` with a dataset type that corresponds to the given
304 component, with ``hasParentId=True``, and the same ID and run
305 (which may be `None`, if they are `None` in ``self``).
306 """
307 return DatasetRef(self.datasetType.makeComponentDatasetType(name), self.dataId,
308 id=self.id, run=self.run, hasParentId=True)
310 datasetType: DatasetType
311 """The definition of this dataset (`DatasetType`).
313 Cannot be changed after a `DatasetRef` is constructed.
314 """
316 dataId: DataCoordinate
317 """A mapping of `Dimension` primary key values that labels the dataset
318 within a Collection (`DataCoordinate`).
320 Cannot be changed after a `DatasetRef` is constructed.
321 """
323 run: Optional[str]
324 """The name of the run that produced the dataset.
326 Cannot be changed after a `DatasetRef` is constructed; use `resolved` or
327 `unresolved` to add or remove this information when creating a new
328 `DatasetRef`.
329 """
331 id: Optional[int]
332 """Primary key of the dataset (`int` or `None`).
334 Cannot be changed after a `DatasetRef` is constructed; use `resolved` or
335 `unresolved` to add or remove this information when creating a new
336 `DatasetRef`.
337 """