Coverage for python/lsst/daf/butler/core/datasets/ref.py : 29%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ["AmbiguousDatasetError", "DatasetRef"]
25from typing import (
26 TYPE_CHECKING,
27 Any,
28 Dict,
29 Iterable,
30 List,
31 Optional,
32 Tuple,
33)
35from ..dimensions import DataCoordinate, DimensionGraph, DimensionUniverse
36from ..configSupport import LookupKey
37from ..utils import immutable
38from ..named import NamedKeyDict
39from .type import DatasetType
40from ..json import from_json_generic, to_json_generic
42if TYPE_CHECKING: 42 ↛ 43line 42 didn't jump to line 43, because the condition on line 42 was never true
43 from ...registry import Registry
46class AmbiguousDatasetError(Exception):
47 """Raised when a `DatasetRef` is not resolved but should be.
49 This happens when the `DatasetRef` has no ID or run but the requested
50 operation requires one of them.
51 """
54@immutable
55class DatasetRef:
56 """Reference to a Dataset in a `Registry`.
58 A `DatasetRef` may point to a Dataset that currently does not yet exist
59 (e.g., because it is a predicted input for provenance).
61 Parameters
62 ----------
63 datasetType : `DatasetType`
64 The `DatasetType` for this Dataset.
65 dataId : `DataCoordinate`
66 A mapping of dimensions that labels the Dataset within a Collection.
67 id : `int`, optional
68 The unique integer identifier assigned when the dataset is created.
69 run : `str`, optional
70 The name of the run this dataset was associated with when it was
71 created. Must be provided if ``id`` is.
72 conform : `bool`, optional
73 If `True` (default), call `DataCoordinate.standardize` to ensure that
74 the data ID's dimensions are consistent with the dataset type's.
75 `DatasetRef` instances for which those dimensions are not equal should
76 not be created in new code, but are still supported for backwards
77 compatibility. New code should only pass `False` if it can guarantee
78 that the dimensions are already consistent.
80 Raises
81 ------
82 ValueError
83 Raised if ``run`` is provided but ``id`` is not, or if ``id`` is
84 provided but ``run`` is not.
85 """
87 __slots__ = ("id", "datasetType", "dataId", "run",)
89 def __init__(
90 self,
91 datasetType: DatasetType, dataId: DataCoordinate, *,
92 id: Optional[int] = None,
93 run: Optional[str] = None,
94 conform: bool = True
95 ):
96 self.id = id
97 self.datasetType = datasetType
98 if conform:
99 self.dataId = DataCoordinate.standardize(dataId, graph=datasetType.dimensions)
100 else:
101 self.dataId = dataId
102 if self.id is not None:
103 if run is None:
104 raise ValueError(f"Cannot provide id without run for dataset with id={id}, "
105 f"type={datasetType}, and dataId={dataId}.")
106 self.run = run
107 else:
108 if run is not None:
109 raise ValueError("'run' cannot be provided unless 'id' is.")
110 self.run = None
112 def __eq__(self, other: Any) -> bool:
113 try:
114 return (self.datasetType, self.dataId, self.id) == (other.datasetType, other.dataId, other.id)
115 except AttributeError:
116 return NotImplemented
118 def __hash__(self) -> int:
119 return hash((self.datasetType, self.dataId, self.id))
121 @property
122 def dimensions(self) -> DimensionGraph:
123 """Dimensions associated with the underlying `DatasetType`."""
124 return self.datasetType.dimensions
126 def __repr__(self) -> str:
127 # We delegate to __str__ (i.e use "!s") for the data ID) below because
128 # DataCoordinate's __repr__ - while adhering to the guidelines for
129 # __repr__ - is much harder to users to read, while its __str__ just
130 # produces a dict that can also be passed to DatasetRef's constructor.
131 if self.id is not None:
132 return (f"DatasetRef({self.datasetType!r}, {self.dataId!s}, id={self.id}, run={self.run!r})")
133 else:
134 return f"DatasetRef({self.datasetType!r}, {self.dataId!s})"
136 def __str__(self) -> str:
137 s = f"{self.datasetType.name}@{self.dataId!s}, sc={self.datasetType.storageClass.name}]"
138 if self.id is not None:
139 s += f" (id={self.id})"
140 return s
142 def __lt__(self, other: Any) -> bool:
143 # Sort by run, DatasetType name and then by DataCoordinate
144 # The __str__ representation is probably close enough but we
145 # need to ensure that sorting a DatasetRef matches what you would
146 # get if you sorted DatasetType+DataCoordinate
147 if not isinstance(other, type(self)):
148 return NotImplemented
150 # Group by run if defined, takes precedence over DatasetType
151 self_run = "" if self.run is None else self.run
152 other_run = "" if other.run is None else other.run
154 # Compare tuples in the priority order
155 return (self_run, self.datasetType, self.dataId) < (other_run, other.datasetType, other.dataId)
157 def to_simple(self, minimal: bool = False) -> Dict:
158 """Convert this class to a simple python type.
160 This makes it suitable for serialization.
162 Parameters
163 ----------
164 minimal : `bool`, optional
165 Use minimal serialization. Requires Registry to convert
166 back to a full type.
168 Returns
169 -------
170 simple : `dict` or `int`
171 The object converted to a dictionary.
172 """
173 if minimal and self.id is not None:
174 # The only thing needed to uniquely define a DatasetRef
175 # is the integer id so that can be used directly if it is
176 # resolved and if it is not a component DatasetRef.
177 # Store is in a dict to allow us to easily add the planned
178 # origin information later without having to support
179 # an int and dict in simple form.
180 simple: Dict[str, Any] = {"id": self.id}
181 if self.isComponent():
182 # We can still be a little minimalist with a component
183 # but we will also need to record the datasetType component
184 simple["component"] = self.datasetType.component()
185 return simple
187 # Convert to a dict form
188 as_dict: Dict[str, Any] = {"datasetType": self.datasetType.to_simple(minimal=minimal),
189 "dataId": self.dataId.to_simple(),
190 }
192 # Only include the id entry if it is defined
193 if self.id is not None:
194 as_dict["run"] = self.run
195 as_dict["id"] = self.id
197 return as_dict
199 @classmethod
200 def from_simple(cls, simple: Dict,
201 universe: Optional[DimensionUniverse] = None,
202 registry: Optional[Registry] = None) -> DatasetRef:
203 """Construct a new object from simplified form.
205 Generally this is data returned from the `to_simple` method.
207 Parameters
208 ----------
209 simple : `dict` of [`str`, `Any`]
210 The value returned by `to_simple()`.
211 universe : `DimensionUniverse`
212 The special graph of all known dimensions.
213 Can be `None` if a registry is provided.
214 registry : `lsst.daf.butler.Registry`, optional
215 Registry to use to convert simple form of a DatasetRef to
216 a full `DatasetRef`. Can be `None` if a full description of
217 the type is provided along with a universe.
219 Returns
220 -------
221 ref : `DatasetRef`
222 Newly-constructed object.
223 """
224 # Minimalist component will just specify component and id and
225 # require registry to reconstruct
226 if set(simple).issubset({"id", "component"}):
227 if registry is None:
228 raise ValueError("Registry is required to construct component DatasetRef from integer id")
229 ref = registry.getDataset(simple["id"])
230 if ref is None:
231 raise RuntimeError(f"No matching dataset found in registry for id {simple['id']}")
232 if "component" in simple:
233 ref = ref.makeComponentRef(simple["component"])
234 return ref
236 if universe is None and registry is None:
237 raise ValueError("One of universe or registry must be provided.")
239 if universe is None and registry is not None:
240 universe = registry.dimensions
242 if universe is None:
243 # this is for mypy
244 raise ValueError("Unable to determine a usable universe")
246 datasetType = DatasetType.from_simple(simple["datasetType"], universe=universe, registry=registry)
247 dataId = DataCoordinate.from_simple(simple["dataId"], universe=universe)
248 return cls(datasetType, dataId,
249 id=simple["id"], run=simple["run"])
251 to_json = to_json_generic
252 from_json = classmethod(from_json_generic)
254 @classmethod
255 def _unpickle(
256 cls,
257 datasetType: DatasetType,
258 dataId: DataCoordinate,
259 id: Optional[int],
260 run: Optional[str],
261 ) -> DatasetRef:
262 """Create new `DatasetRef`.
264 A custom factory method for use by `__reduce__` as a workaround for
265 its lack of support for keyword arguments.
266 """
267 return cls(datasetType, dataId, id=id, run=run)
269 def __reduce__(self) -> tuple:
270 return (self._unpickle, (self.datasetType, self.dataId, self.id, self.run))
272 def __deepcopy__(self, memo: dict) -> DatasetRef:
273 # DatasetRef is recursively immutable; see note in @immutable
274 # decorator.
275 return self
277 def resolved(self, id: int, run: str) -> DatasetRef:
278 """Return resolved `DatasetRef`.
280 This is a new `DatasetRef` with the same data ID and dataset type
281 and the given ID and run.
283 Parameters
284 ----------
285 id : `int`
286 The unique integer identifier assigned when the dataset is created.
287 run : `str`
288 The run this dataset was associated with when it was created.
290 Returns
291 -------
292 ref : `DatasetRef`
293 A new `DatasetRef`.
294 """
295 return DatasetRef(datasetType=self.datasetType, dataId=self.dataId,
296 id=id, run=run, conform=False)
298 def unresolved(self) -> DatasetRef:
299 """Return unresolved `DatasetRef`.
301 This is a new `DatasetRef` with the same data ID and dataset type,
302 but no ID or run.
304 Returns
305 -------
306 ref : `DatasetRef`
307 A new `DatasetRef`.
309 Notes
310 -----
311 This can be used to compare only the data ID and dataset type of a
312 pair of `DatasetRef` instances, regardless of whether either is
313 resolved::
315 if ref1.unresolved() == ref2.unresolved():
316 ...
317 """
318 return DatasetRef(datasetType=self.datasetType, dataId=self.dataId, conform=False)
320 def expanded(self, dataId: DataCoordinate) -> DatasetRef:
321 """Return a new `DatasetRef` with the given expanded data ID.
323 Parameters
324 ----------
325 dataId : `DataCoordinate`
326 Data ID for the new `DatasetRef`. Must compare equal to the
327 original data ID.
329 Returns
330 -------
331 ref : `DatasetRef`
332 A new `DatasetRef` with the given data ID.
333 """
334 assert dataId == self.dataId
335 return DatasetRef(datasetType=self.datasetType, dataId=dataId,
336 id=self.id, run=self.run,
337 conform=False)
339 def isComponent(self) -> bool:
340 """Indicate whether this `DatasetRef` refers to a component.
342 Returns
343 -------
344 isComponent : `bool`
345 `True` if this `DatasetRef` is a component, `False` otherwise.
346 """
347 return self.datasetType.isComponent()
349 def isComposite(self) -> bool:
350 """Boolean indicating whether this `DatasetRef` is a composite type.
352 Returns
353 -------
354 isComposite : `bool`
355 `True` if this `DatasetRef` is a composite type, `False`
356 otherwise.
357 """
358 return self.datasetType.isComposite()
360 def _lookupNames(self) -> Tuple[LookupKey, ...]:
361 """Name keys to use when looking up this DatasetRef in a configuration.
363 The names are returned in order of priority.
365 Returns
366 -------
367 names : `tuple` of `LookupKey`
368 Tuple of the `DatasetType` name and the `StorageClass` name.
369 If ``instrument`` is defined in the dataId, each of those names
370 is added to the start of the tuple with a key derived from the
371 value of ``instrument``.
372 """
373 # Special case the instrument Dimension since we allow configs
374 # to include the instrument name in the hierarchy.
375 names: Tuple[LookupKey, ...] = self.datasetType._lookupNames()
377 # mypy doesn't think this could return True, because even though
378 # __contains__ can take an object of any type, it seems hard-coded to
379 # assume it will return False if the type doesn't match the key type
380 # of the Mapping.
381 if "instrument" in self.dataId: # type: ignore
382 names = tuple(n.clone(dataId={"instrument": self.dataId["instrument"]})
383 for n in names) + names
385 return names
387 @staticmethod
388 def groupByType(refs: Iterable[DatasetRef]) -> NamedKeyDict[DatasetType, List[DatasetRef]]:
389 """Group an iterable of `DatasetRef` by `DatasetType`.
391 Parameters
392 ----------
393 refs : `Iterable` [ `DatasetRef` ]
394 `DatasetRef` instances to group.
396 Returns
397 -------
398 grouped : `NamedKeyDict` [ `DatasetType`, `list` [ `DatasetRef` ] ]
399 Grouped `DatasetRef` instances.
400 """
401 result: NamedKeyDict[DatasetType, List[DatasetRef]] = NamedKeyDict()
402 for ref in refs:
403 result.setdefault(ref.datasetType, []).append(ref)
404 return result
406 def getCheckedId(self) -> int:
407 """Return ``self.id``, or raise if it is `None`.
409 This trivial method exists to allow operations that would otherwise be
410 natural list comprehensions to check that the ID is not `None` as well.
412 Returns
413 -------
414 id : `int`
415 ``self.id`` if it is not `None`.
417 Raises
418 ------
419 AmbiguousDatasetError
420 Raised if ``ref.id`` is `None`.
421 """
422 if self.id is None:
423 raise AmbiguousDatasetError(f"ID for dataset {self} is `None`; "
424 f"a resolved reference is required.")
425 return self.id
427 def makeCompositeRef(self) -> DatasetRef:
428 """Create a `DatasetRef` of the composite from a component ref.
430 Requires that this `DatasetRef` is a component.
432 Returns
433 -------
434 ref : `DatasetRef`
435 A `DatasetRef` with a dataset type that corresponds to the
436 composite parent of this component, and the same ID and run
437 (which may be `None`, if they are `None` in ``self``).
438 """
439 # Assume that the data ID does not need to be standardized
440 # and should match whatever this ref already has.
441 return DatasetRef(self.datasetType.makeCompositeDatasetType(), self.dataId,
442 id=self.id, run=self.run, conform=False)
444 def makeComponentRef(self, name: str) -> DatasetRef:
445 """Create a `DatasetRef` that corresponds to a component.
447 Parameters
448 ----------
449 name : `str`
450 Name of the component.
452 Returns
453 -------
454 ref : `DatasetRef`
455 A `DatasetRef` with a dataset type that corresponds to the given
456 component, and the same ID and run
457 (which may be `None`, if they are `None` in ``self``).
458 """
459 # Assume that the data ID does not need to be standardized
460 # and should match whatever this ref already has.
461 return DatasetRef(self.datasetType.makeComponentDatasetType(name), self.dataId,
462 id=self.id, run=self.run, conform=False)
464 datasetType: DatasetType
465 """The definition of this dataset (`DatasetType`).
467 Cannot be changed after a `DatasetRef` is constructed.
468 """
470 dataId: DataCoordinate
471 """A mapping of `Dimension` primary key values that labels the dataset
472 within a Collection (`DataCoordinate`).
474 Cannot be changed after a `DatasetRef` is constructed.
475 """
477 run: Optional[str]
478 """The name of the run that produced the dataset.
480 Cannot be changed after a `DatasetRef` is constructed; use `resolved` or
481 `unresolved` to add or remove this information when creating a new
482 `DatasetRef`.
483 """
485 id: Optional[int]
486 """Primary key of the dataset (`int` or `None`).
488 Cannot be changed after a `DatasetRef` is constructed; use `resolved` or
489 `unresolved` to add or remove this information when creating a new
490 `DatasetRef`.
491 """