Coverage for python/lsst/daf/butler/core/datasets/ref.py: 29%
239 statements
« prev ^ index » next coverage.py v7.2.4, created at 2023-04-29 02:58 -0700
« prev ^ index » next coverage.py v7.2.4, created at 2023-04-29 02:58 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = [
24 "AmbiguousDatasetError",
25 "DatasetId",
26 "DatasetIdFactory",
27 "DatasetIdGenEnum",
28 "DatasetRef",
29 "SerializedDatasetRef",
30 "UnresolvedRefWarning",
31]
33import enum
34import inspect
35import uuid
36import warnings
37from typing import TYPE_CHECKING, Any, ClassVar, Dict, Iterable, List, Optional, Tuple, Union
39from deprecated.sphinx import deprecated
40from lsst.utils.classes import immutable
41from pydantic import BaseModel, ConstrainedInt, StrictStr, validator
43from ..configSupport import LookupKey
44from ..dimensions import DataCoordinate, DimensionGraph, DimensionUniverse, SerializedDataCoordinate
45from ..json import from_json_pydantic, to_json_pydantic
46from ..named import NamedKeyDict
47from .type import DatasetType, SerializedDatasetType
49if TYPE_CHECKING:
50 from ...registry import Registry
51 from ..storageClass import StorageClass
54class UnresolvedRefWarning(FutureWarning):
55 """Warnings concerning the usage of unresolved DatasetRefs."""
58class AmbiguousDatasetError(Exception):
59 """Raised when a `DatasetRef` is not resolved but should be.
61 This happens when the `DatasetRef` has no ID or run but the requested
62 operation requires one of them.
63 """
66class PositiveInt(ConstrainedInt):
67 ge = 0
68 strict = True
71def _find_outside_stacklevel() -> int:
72 """Find the stacklevel for outside of lsst.daf.butler"""
73 stacklevel = 1
74 for i, s in enumerate(inspect.stack()):
75 module = inspect.getmodule(s.frame)
76 # Stack frames sometimes hang around so explicitly delete.
77 del s
78 if module is None:
79 continue
80 if not module.__name__.startswith("lsst.daf.butler"):
81 # 0 will be this function.
82 # 1 will be the caller
83 # and so does not need adjustment.
84 stacklevel = i
85 break
87 return stacklevel
90class DatasetIdGenEnum(enum.Enum):
91 """This enum is used to specify dataset ID generation options."""
93 UNIQUE = 0
94 """Unique mode generates unique ID for each inserted dataset, e.g.
95 auto-generated by database or random UUID.
96 """
98 DATAID_TYPE = 1
99 """In this mode ID is computed deterministically from a combination of
100 dataset type and dataId.
101 """
103 DATAID_TYPE_RUN = 2
104 """In this mode ID is computed deterministically from a combination of
105 dataset type, dataId, and run collection name.
106 """
109class DatasetIdFactory:
110 """Factory for dataset IDs (UUIDs).
112 For now the logic is hard-coded and is controlled by the user-provided
113 value of `DatasetIdGenEnum`. In the future we may implement a configurable
114 logic that can guess `DatasetIdGenEnum` value from other parameters.
115 """
117 NS_UUID = uuid.UUID("840b31d9-05cd-5161-b2c8-00d32b280d0f")
118 """Namespace UUID used for UUID5 generation. Do not change. This was
119 produced by `uuid.uuid5(uuid.NAMESPACE_DNS, "lsst.org")`.
120 """
122 def makeDatasetId(
123 self,
124 run: str,
125 datasetType: DatasetType,
126 dataId: DataCoordinate,
127 idGenerationMode: DatasetIdGenEnum,
128 ) -> uuid.UUID:
129 """Generate dataset ID for a dataset.
131 Parameters
132 ----------
133 run : `str`
134 Name of the RUN collection for the dataset.
135 datasetType : `DatasetType`
136 Dataset type.
137 dataId : `DataCoordinate`
138 Expanded data ID for the dataset.
139 idGenerationMode : `DatasetIdGenEnum`
140 ID generation option. `~DatasetIdGenEnum.UNIQUE` makes a random
141 UUID4-type ID. `~DatasetIdGenEnum.DATAID_TYPE` makes a
142 deterministic UUID5-type ID based on a dataset type name and
143 ``dataId``. `~DatasetIdGenEnum.DATAID_TYPE_RUN` makes a
144 deterministic UUID5-type ID based on a dataset type name, run
145 collection name, and ``dataId``.
147 Returns
148 -------
149 datasetId : `uuid.UUID`
150 Dataset identifier.
151 """
152 if idGenerationMode is DatasetIdGenEnum.UNIQUE:
153 return uuid.uuid4()
154 else:
155 # WARNING: If you modify this code make sure that the order of
156 # items in the `items` list below never changes.
157 items: list[tuple[str, str]] = []
158 if idGenerationMode is DatasetIdGenEnum.DATAID_TYPE:
159 items = [
160 ("dataset_type", datasetType.name),
161 ]
162 elif idGenerationMode is DatasetIdGenEnum.DATAID_TYPE_RUN:
163 items = [
164 ("dataset_type", datasetType.name),
165 ("run", run),
166 ]
167 else:
168 raise ValueError(f"Unexpected ID generation mode: {idGenerationMode}")
170 for name, value in sorted(dataId.byName().items()):
171 items.append((name, str(value)))
172 data = ",".join(f"{key}={value}" for key, value in items)
173 return uuid.uuid5(self.NS_UUID, data)
175 @deprecated(
176 "This method will soon be removed since it will be impossible to create an unresolved ref.",
177 version="26.0",
178 category=UnresolvedRefWarning,
179 )
180 def resolveRef(
181 self,
182 ref: DatasetRef,
183 run: str,
184 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE,
185 ) -> DatasetRef:
186 """Generate resolved dataset reference for predicted datasets.
188 Parameters
189 ----------
190 ref : `DatasetRef`
191 Dataset ref, can be already resolved.
192 run : `str`
193 Name of the RUN collection for the dataset.
194 idGenerationMode : `DatasetIdGenEnum`
195 ID generation option. `~DatasetIdGenEnum.UNIQUE` makes a random
196 UUID4-type ID. `~DatasetIdGenEnum.DATAID_TYPE` makes a
197 deterministic UUID5-type ID based on a dataset type name and
198 ``dataId``. `~DatasetIdGenEnum.DATAID_TYPE_RUN` makes a
199 deterministic UUID5-type ID based on a dataset type name, run
200 collection name, and ``dataId``.
202 Returns
203 -------
204 resolved : `DatasetRef`
205 Resolved dataset ref, if input reference is already resolved it
206 is returned without modification.
208 Notes
209 -----
210 This method can only be used for predicted dataset references that do
211 not exist yet in the database. It does not resolve existing dataset
212 references already stored in registry.
213 """
214 if ref.id is not None:
215 return ref
216 datasetId = self.makeDatasetId(run, ref.datasetType, ref.dataId, idGenerationMode)
217 # Hide the warning coming from ref.resolved()
218 with warnings.catch_warnings():
219 warnings.simplefilter("ignore", category=UnresolvedRefWarning)
220 resolved = ref.resolved(datasetId, run)
221 return resolved
224class SerializedDatasetRef(BaseModel):
225 """Simplified model of a `DatasetRef` suitable for serialization."""
227 # DO NOT change order in the Union, pydantic is sensitive to that!
228 id: uuid.UUID | None = None
229 datasetType: Optional[SerializedDatasetType] = None
230 dataId: Optional[SerializedDataCoordinate] = None
231 run: Optional[StrictStr] = None
232 component: Optional[StrictStr] = None
234 @validator("dataId")
235 def _check_dataId(cls, v: Any, values: Dict[str, Any]) -> Any: # noqa: N805
236 if (d := "datasetType") in values and values[d] is None:
237 raise ValueError("Can not specify 'dataId' without specifying 'datasetType'")
238 return v
240 @validator("run")
241 def _check_run(cls, v: Any, values: Dict[str, Any]) -> Any: # noqa: N805
242 if v and (i := "id") in values and values[i] is None:
243 raise ValueError("'run' cannot be provided unless 'id' is.")
244 return v
246 @validator("component")
247 def _check_component(cls, v: Any, values: Dict[str, Any]) -> Any: # noqa: N805
248 # Component should not be given if datasetType is given
249 if v and (d := "datasetType") in values and values[d] is not None:
250 raise ValueError(f"datasetType ({values[d]}) can not be set if component is given ({v}).")
251 return v
253 @classmethod
254 def direct(
255 cls,
256 *,
257 id: Optional[Union[str, int]] = None,
258 datasetType: Optional[Dict[str, Any]] = None,
259 dataId: Optional[Dict[str, Any]] = None,
260 run: str | None = None,
261 component: Optional[str] = None,
262 ) -> SerializedDatasetRef:
263 """Construct a `SerializedDatasetRef` directly without validators.
265 This differs from the pydantic "construct" method in that the arguments
266 are explicitly what the model requires, and it will recurse through
267 members, constructing them from their corresponding `direct` methods.
269 This method should only be called when the inputs are trusted.
270 """
271 node = SerializedDatasetRef.__new__(cls)
272 setter = object.__setattr__
273 setter(node, "id", uuid.UUID(id) if isinstance(id, str) else id)
274 setter(
275 node,
276 "datasetType",
277 datasetType if datasetType is None else SerializedDatasetType.direct(**datasetType),
278 )
279 setter(node, "dataId", dataId if dataId is None else SerializedDataCoordinate.direct(**dataId))
280 setter(node, "run", run)
281 setter(node, "component", component)
282 setter(node, "__fields_set__", {"id", "datasetType", "dataId", "run", "component"})
283 return node
286DatasetId = uuid.UUID
287"""A type-annotation alias for dataset ID providing typing flexibility.
288"""
291@immutable
292class DatasetRef:
293 """Reference to a Dataset in a `Registry`.
295 A `DatasetRef` may point to a Dataset that currently does not yet exist
296 (e.g., because it is a predicted input for provenance).
298 Parameters
299 ----------
300 datasetType : `DatasetType`
301 The `DatasetType` for this Dataset.
302 dataId : `DataCoordinate`
303 A mapping of dimensions that labels the Dataset within a Collection.
304 id : `DatasetId`, optional
305 The unique identifier assigned when the dataset is created. If ``run``
306 is specified and ``id`` is not specified, an ID will be created.
307 run : `str`, optional
308 The name of the run this dataset was associated with when it was
309 created. Must be provided if ``id`` is.
310 conform : `bool`, optional
311 If `True` (default), call `DataCoordinate.standardize` to ensure that
312 the data ID's dimensions are consistent with the dataset type's.
313 `DatasetRef` instances for which those dimensions are not equal should
314 not be created in new code, but are still supported for backwards
315 compatibility. New code should only pass `False` if it can guarantee
316 that the dimensions are already consistent.
317 id_generation_mode : `DatasetIdGenEnum`
318 ID generation option. `~DatasetIdGenEnum.UNIQUE` makes a random
319 UUID4-type ID. `~DatasetIdGenEnum.DATAID_TYPE` makes a
320 deterministic UUID5-type ID based on a dataset type name and
321 ``dataId``. `~DatasetIdGenEnum.DATAID_TYPE_RUN` makes a
322 deterministic UUID5-type ID based on a dataset type name, run
323 collection name, and ``dataId``.
325 Raises
326 ------
327 ValueError
328 Raised if ``run`` is provided but ``id`` is not, or if ``id`` is
329 provided but ``run`` is not.
331 See Also
332 --------
333 :ref:`daf_butler_organizing_datasets`
334 """
336 _serializedType = SerializedDatasetRef
337 __slots__ = (
338 "id",
339 "datasetType",
340 "dataId",
341 "run",
342 )
344 def __init__(
345 self,
346 datasetType: DatasetType,
347 dataId: DataCoordinate,
348 *,
349 id: Optional[DatasetId] = None,
350 run: Optional[str] = None,
351 conform: bool = True,
352 id_generation_mode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE,
353 ):
354 self.datasetType = datasetType
355 if conform:
356 self.dataId = DataCoordinate.standardize(dataId, graph=datasetType.dimensions)
357 else:
358 self.dataId = dataId
359 if id is not None:
360 if run is None:
361 raise ValueError(
362 f"Cannot provide id without run for dataset with id={id}, "
363 f"type={datasetType}, and dataId={dataId}."
364 )
365 self.run = run
366 self.id = id
367 else:
368 if run is not None:
369 self.run = run
370 self.id = DatasetIdFactory().makeDatasetId(
371 self.run, self.datasetType, self.dataId, id_generation_mode
372 )
373 else:
374 self.id = None
375 self.run = None
376 warnings.warn(
377 "Support for creating unresolved refs will soon be removed. Please contact the "
378 "middleware team for advice on modifying your code to use resolved refs.",
379 category=UnresolvedRefWarning,
380 stacklevel=_find_outside_stacklevel(),
381 )
383 def __eq__(self, other: Any) -> bool:
384 try:
385 return (self.datasetType, self.dataId, self.id) == (other.datasetType, other.dataId, other.id)
386 except AttributeError:
387 return NotImplemented
389 def __hash__(self) -> int:
390 return hash((self.datasetType, self.dataId, self.id))
392 @property
393 def dimensions(self) -> DimensionGraph:
394 """Dimensions associated with the underlying `DatasetType`."""
395 return self.datasetType.dimensions
397 def __repr__(self) -> str:
398 # We delegate to __str__ (i.e use "!s") for the data ID) below because
399 # DataCoordinate's __repr__ - while adhering to the guidelines for
400 # __repr__ - is much harder to users to read, while its __str__ just
401 # produces a dict that can also be passed to DatasetRef's constructor.
402 if self.id is not None:
403 return f"DatasetRef({self.datasetType!r}, {self.dataId!s}, id={self.id}, run={self.run!r})"
404 else:
405 return f"DatasetRef({self.datasetType!r}, {self.dataId!s})"
407 def __str__(self) -> str:
408 s = f"{self.datasetType.name}@{self.dataId!s}, sc={self.datasetType.storageClass_name}]"
409 if self.id is not None:
410 s += f" (id={self.id})"
411 return s
413 def __lt__(self, other: Any) -> bool:
414 # Sort by run, DatasetType name and then by DataCoordinate
415 # The __str__ representation is probably close enough but we
416 # need to ensure that sorting a DatasetRef matches what you would
417 # get if you sorted DatasetType+DataCoordinate
418 if not isinstance(other, type(self)):
419 return NotImplemented
421 # Group by run if defined, takes precedence over DatasetType
422 self_run = "" if self.run is None else self.run
423 other_run = "" if other.run is None else other.run
425 # Compare tuples in the priority order
426 return (self_run, self.datasetType, self.dataId) < (other_run, other.datasetType, other.dataId)
428 def to_simple(self, minimal: bool = False) -> SerializedDatasetRef:
429 """Convert this class to a simple python type.
431 This makes it suitable for serialization.
433 Parameters
434 ----------
435 minimal : `bool`, optional
436 Use minimal serialization. Requires Registry to convert
437 back to a full type.
439 Returns
440 -------
441 simple : `dict` or `int`
442 The object converted to a dictionary.
443 """
444 if minimal and self.id is not None:
445 # The only thing needed to uniquely define a DatasetRef
446 # is its id so that can be used directly if it is
447 # resolved and if it is not a component DatasetRef.
448 # Store is in a dict to allow us to easily add the planned
449 # origin information later without having to support
450 # an int and dict in simple form.
451 simple: Dict[str, Any] = {"id": self.id}
452 if self.isComponent():
453 # We can still be a little minimalist with a component
454 # but we will also need to record the datasetType component
455 simple["component"] = self.datasetType.component()
456 return SerializedDatasetRef(**simple)
458 # Convert to a dict form
459 as_dict: Dict[str, Any] = {
460 "datasetType": self.datasetType.to_simple(minimal=minimal),
461 "dataId": self.dataId.to_simple(),
462 }
464 # Only include the id entry if it is defined
465 if self.id is not None:
466 as_dict["run"] = self.run
467 as_dict["id"] = self.id
469 return SerializedDatasetRef(**as_dict)
471 @classmethod
472 def from_simple(
473 cls,
474 simple: SerializedDatasetRef,
475 universe: Optional[DimensionUniverse] = None,
476 registry: Optional[Registry] = None,
477 datasetType: Optional[DatasetType] = None,
478 ) -> DatasetRef:
479 """Construct a new object from simplified form.
481 Generally this is data returned from the `to_simple` method.
483 Parameters
484 ----------
485 simple : `dict` of [`str`, `Any`]
486 The value returned by `to_simple()`.
487 universe : `DimensionUniverse`
488 The special graph of all known dimensions.
489 Can be `None` if a registry is provided.
490 registry : `lsst.daf.butler.Registry`, optional
491 Registry to use to convert simple form of a DatasetRef to
492 a full `DatasetRef`. Can be `None` if a full description of
493 the type is provided along with a universe.
494 datasetType : DatasetType, optional
495 If datasetType is supplied, this will be used as the datasetType
496 object in the resulting DatasetRef instead of being read from
497 the `SerializedDatasetRef`. This is useful when many refs share
498 the same type as memory can be saved. Defaults to None.
500 Returns
501 -------
502 ref : `DatasetRef`
503 Newly-constructed object.
504 """
505 # Minimalist component will just specify component and id and
506 # require registry to reconstruct
507 if set(simple.dict(exclude_unset=True, exclude_defaults=True)).issubset({"id", "component"}):
508 if registry is None:
509 raise ValueError("Registry is required to construct component DatasetRef from integer id")
510 if simple.id is None:
511 raise ValueError("For minimal DatasetRef the ID must be defined.")
512 ref = registry.getDataset(simple.id)
513 if ref is None:
514 raise RuntimeError(f"No matching dataset found in registry for id {simple.id}")
515 if simple.component:
516 ref = ref.makeComponentRef(simple.component)
517 return ref
519 if universe is None and registry is None:
520 raise ValueError("One of universe or registry must be provided.")
522 if universe is None and registry is not None:
523 universe = registry.dimensions
525 if universe is None:
526 # this is for mypy
527 raise ValueError("Unable to determine a usable universe")
529 if simple.datasetType is None and datasetType is None:
530 # mypy
531 raise ValueError("The DatasetType must be specified to construct a DatasetRef")
532 if datasetType is None:
533 if simple.datasetType is None:
534 raise ValueError("Cannot determine Dataset type of this serialized class")
535 datasetType = DatasetType.from_simple(simple.datasetType, universe=universe, registry=registry)
537 if simple.dataId is None:
538 # mypy
539 raise ValueError("The DataId must be specified to construct a DatasetRef")
540 dataId = DataCoordinate.from_simple(simple.dataId, universe=universe)
542 # Issue our own warning that could be more explicit.
543 if simple.id is None and simple.run is None:
544 warnings.warn(
545 "Attempting to create an unresolved ref from simple form is deprecated. "
546 f"Encountered with {simple!r}.",
547 category=UnresolvedRefWarning,
548 stacklevel=_find_outside_stacklevel(),
549 )
551 with warnings.catch_warnings():
552 warnings.simplefilter("ignore", category=UnresolvedRefWarning)
553 return cls(datasetType, dataId, id=simple.id, run=simple.run)
555 to_json = to_json_pydantic
556 from_json: ClassVar = classmethod(from_json_pydantic)
558 @classmethod
559 def _unpickle(
560 cls,
561 datasetType: DatasetType,
562 dataId: DataCoordinate,
563 id: Optional[DatasetId],
564 run: Optional[str],
565 ) -> DatasetRef:
566 """Create new `DatasetRef`.
568 A custom factory method for use by `__reduce__` as a workaround for
569 its lack of support for keyword arguments.
570 """
571 return cls(datasetType, dataId, id=id, run=run)
573 def __reduce__(self) -> tuple:
574 return (self._unpickle, (self.datasetType, self.dataId, self.id, self.run))
576 def __deepcopy__(self, memo: dict) -> DatasetRef:
577 # DatasetRef is recursively immutable; see note in @immutable
578 # decorator.
579 return self
581 @deprecated(
582 "This method will soon be a no-op since it will be impossible to create an unresolved ref.",
583 version="26.0",
584 category=UnresolvedRefWarning,
585 )
586 def resolved(self, id: DatasetId, run: str) -> DatasetRef:
587 """Return resolved `DatasetRef`.
589 This is a new `DatasetRef` with the same data ID and dataset type
590 and the given ID and run.
592 Parameters
593 ----------
594 id : `DatasetId`
595 The unique identifier assigned when the dataset is created.
596 run : `str`
597 The run this dataset was associated with when it was created.
599 Returns
600 -------
601 ref : `DatasetRef`
602 A new `DatasetRef`.
603 """
604 return DatasetRef(datasetType=self.datasetType, dataId=self.dataId, id=id, run=run, conform=False)
606 @deprecated(
607 "Support for unresolved refs will soon be removed. Please contact middleware developers with"
608 " advice on how to modify your code.",
609 category=UnresolvedRefWarning,
610 version="26.0",
611 )
612 def unresolved(self) -> DatasetRef:
613 """Return unresolved `DatasetRef`.
615 This is a new `DatasetRef` with the same data ID and dataset type,
616 but no ID or run.
618 Returns
619 -------
620 ref : `DatasetRef`
621 A new `DatasetRef`.
623 Notes
624 -----
625 This can be used to compare only the data ID and dataset type of a
626 pair of `DatasetRef` instances, regardless of whether either is
627 resolved::
629 if ref1.unresolved() == ref2.unresolved():
630 ...
631 """
632 # We have already warned about this so no need to warn again.
633 with warnings.catch_warnings():
634 warnings.simplefilter("ignore", category=UnresolvedRefWarning)
635 return DatasetRef(datasetType=self.datasetType, dataId=self.dataId, conform=False)
637 def expanded(self, dataId: DataCoordinate) -> DatasetRef:
638 """Return a new `DatasetRef` with the given expanded data ID.
640 Parameters
641 ----------
642 dataId : `DataCoordinate`
643 Data ID for the new `DatasetRef`. Must compare equal to the
644 original data ID.
646 Returns
647 -------
648 ref : `DatasetRef`
649 A new `DatasetRef` with the given data ID.
650 """
651 assert dataId == self.dataId
652 return DatasetRef(
653 datasetType=self.datasetType, dataId=dataId, id=self.id, run=self.run, conform=False
654 )
656 def isComponent(self) -> bool:
657 """Indicate whether this `DatasetRef` refers to a component.
659 Returns
660 -------
661 isComponent : `bool`
662 `True` if this `DatasetRef` is a component, `False` otherwise.
663 """
664 return self.datasetType.isComponent()
666 def isComposite(self) -> bool:
667 """Boolean indicating whether this `DatasetRef` is a composite type.
669 Returns
670 -------
671 isComposite : `bool`
672 `True` if this `DatasetRef` is a composite type, `False`
673 otherwise.
674 """
675 return self.datasetType.isComposite()
677 def _lookupNames(self) -> Tuple[LookupKey, ...]:
678 """Name keys to use when looking up this DatasetRef in a configuration.
680 The names are returned in order of priority.
682 Returns
683 -------
684 names : `tuple` of `LookupKey`
685 Tuple of the `DatasetType` name and the `StorageClass` name.
686 If ``instrument`` is defined in the dataId, each of those names
687 is added to the start of the tuple with a key derived from the
688 value of ``instrument``.
689 """
690 # Special case the instrument Dimension since we allow configs
691 # to include the instrument name in the hierarchy.
692 names: Tuple[LookupKey, ...] = self.datasetType._lookupNames()
694 if "instrument" in self.dataId:
695 names = tuple(n.clone(dataId={"instrument": self.dataId["instrument"]}) for n in names) + names
697 return names
699 @staticmethod
700 def groupByType(refs: Iterable[DatasetRef]) -> NamedKeyDict[DatasetType, List[DatasetRef]]:
701 """Group an iterable of `DatasetRef` by `DatasetType`.
703 Parameters
704 ----------
705 refs : `Iterable` [ `DatasetRef` ]
706 `DatasetRef` instances to group.
708 Returns
709 -------
710 grouped : `NamedKeyDict` [ `DatasetType`, `list` [ `DatasetRef` ] ]
711 Grouped `DatasetRef` instances.
712 """
713 result: NamedKeyDict[DatasetType, List[DatasetRef]] = NamedKeyDict()
714 for ref in refs:
715 result.setdefault(ref.datasetType, []).append(ref)
716 return result
718 def getCheckedId(self) -> DatasetId:
719 """Return ``self.id``, or raise if it is `None`.
721 This trivial method exists to allow operations that would otherwise be
722 natural list comprehensions to check that the ID is not `None` as well.
724 Returns
725 -------
726 id : `DatasetId`
727 ``self.id`` if it is not `None`.
729 Raises
730 ------
731 AmbiguousDatasetError
732 Raised if ``ref.id`` is `None`.
733 """
734 if self.id is None:
735 raise AmbiguousDatasetError(f"ID for dataset {self} is `None`; a resolved reference is required.")
736 return self.id
738 def makeCompositeRef(self) -> DatasetRef:
739 """Create a `DatasetRef` of the composite from a component ref.
741 Requires that this `DatasetRef` is a component.
743 Returns
744 -------
745 ref : `DatasetRef`
746 A `DatasetRef` with a dataset type that corresponds to the
747 composite parent of this component, and the same ID and run
748 (which may be `None`, if they are `None` in ``self``).
749 """
750 # Assume that the data ID does not need to be standardized
751 # and should match whatever this ref already has.
752 return DatasetRef(
753 self.datasetType.makeCompositeDatasetType(), self.dataId, id=self.id, run=self.run, conform=False
754 )
756 def makeComponentRef(self, name: str) -> DatasetRef:
757 """Create a `DatasetRef` that corresponds to a component.
759 Parameters
760 ----------
761 name : `str`
762 Name of the component.
764 Returns
765 -------
766 ref : `DatasetRef`
767 A `DatasetRef` with a dataset type that corresponds to the given
768 component, and the same ID and run
769 (which may be `None`, if they are `None` in ``self``).
770 """
771 # Assume that the data ID does not need to be standardized
772 # and should match whatever this ref already has.
773 return DatasetRef(
774 self.datasetType.makeComponentDatasetType(name),
775 self.dataId,
776 id=self.id,
777 run=self.run,
778 conform=False,
779 )
781 def overrideStorageClass(self, storageClass: str | StorageClass) -> DatasetRef:
782 """Create a new `DatasetRef` from this one, but with a modified
783 `DatasetType` that has a different `StorageClass`.
785 Parameters
786 ----------
787 storageClass : `str` or `StorageClass`
788 The new storage class.
790 Returns
791 -------
792 modified : `DatasetRef`
793 A new dataset reference that is the same as the current one but
794 with a different storage class in the `DatasetType`.
795 """
796 return DatasetRef(
797 datasetType=self.datasetType.overrideStorageClass(storageClass),
798 dataId=self.dataId,
799 id=self.id,
800 run=self.run,
801 conform=False,
802 )
804 datasetType: DatasetType
805 """The definition of this dataset (`DatasetType`).
807 Cannot be changed after a `DatasetRef` is constructed.
808 """
810 dataId: DataCoordinate
811 """A mapping of `Dimension` primary key values that labels the dataset
812 within a Collection (`DataCoordinate`).
814 Cannot be changed after a `DatasetRef` is constructed.
815 """
817 run: Optional[str]
818 """The name of the run that produced the dataset.
820 Cannot be changed after a `DatasetRef` is constructed; use `resolved` or
821 `unresolved` to add or remove this information when creating a new
822 `DatasetRef`.
823 """
825 id: Optional[DatasetId]
826 """Primary key of the dataset (`DatasetId` or `None`).
828 Cannot be changed after a `DatasetRef` is constructed; use `resolved` or
829 `unresolved` to add or remove this information when creating a new
830 `DatasetRef`.
831 """