Coverage for python/lsst/daf/butler/core/datasets/ref.py: 29%
242 statements
« prev ^ index » next coverage.py v7.2.5, created at 2023-05-06 09:33 +0000
« prev ^ index » next coverage.py v7.2.5, created at 2023-05-06 09:33 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = [
24 "AmbiguousDatasetError",
25 "DatasetId",
26 "DatasetIdFactory",
27 "DatasetIdGenEnum",
28 "DatasetRef",
29 "SerializedDatasetRef",
30 "UnresolvedRefWarning",
31]
33import enum
34import inspect
35import uuid
36import warnings
37from typing import TYPE_CHECKING, Any, ClassVar, Dict, Iterable, List, Optional, Tuple, Union
39from deprecated.sphinx import deprecated
40from lsst.utils.classes import immutable
41from pydantic import BaseModel, ConstrainedInt, StrictStr, validator
43from ..configSupport import LookupKey
44from ..dimensions import DataCoordinate, DimensionGraph, DimensionUniverse, SerializedDataCoordinate
45from ..json import from_json_pydantic, to_json_pydantic
46from ..named import NamedKeyDict
47from .type import DatasetType, SerializedDatasetType
49if TYPE_CHECKING:
50 from ...registry import Registry
51 from ..storageClass import StorageClass
54class UnresolvedRefWarning(FutureWarning):
55 """Warnings concerning the usage of unresolved DatasetRefs."""
58class AmbiguousDatasetError(Exception):
59 """Raised when a `DatasetRef` is not resolved but should be.
61 This happens when the `DatasetRef` has no ID or run but the requested
62 operation requires one of them.
63 """
66class PositiveInt(ConstrainedInt):
67 ge = 0
68 strict = True
71def _find_outside_stacklevel() -> int:
72 """Find the stacklevel for outside of lsst.daf.butler"""
73 stacklevel = 1
74 for i, s in enumerate(inspect.stack()):
75 module = inspect.getmodule(s.frame)
76 # Stack frames sometimes hang around so explicitly delete.
77 del s
78 if module is None:
79 continue
80 if not module.__name__.startswith("lsst.daf.butler"):
81 # 0 will be this function.
82 # 1 will be the caller
83 # and so does not need adjustment.
84 stacklevel = i
85 break
87 return stacklevel
90class DatasetIdGenEnum(enum.Enum):
91 """This enum is used to specify dataset ID generation options."""
93 UNIQUE = 0
94 """Unique mode generates unique ID for each inserted dataset, e.g.
95 auto-generated by database or random UUID.
96 """
98 DATAID_TYPE = 1
99 """In this mode ID is computed deterministically from a combination of
100 dataset type and dataId.
101 """
103 DATAID_TYPE_RUN = 2
104 """In this mode ID is computed deterministically from a combination of
105 dataset type, dataId, and run collection name.
106 """
109class DatasetIdFactory:
110 """Factory for dataset IDs (UUIDs).
112 For now the logic is hard-coded and is controlled by the user-provided
113 value of `DatasetIdGenEnum`. In the future we may implement a configurable
114 logic that can guess `DatasetIdGenEnum` value from other parameters.
115 """
117 NS_UUID = uuid.UUID("840b31d9-05cd-5161-b2c8-00d32b280d0f")
118 """Namespace UUID used for UUID5 generation. Do not change. This was
119 produced by `uuid.uuid5(uuid.NAMESPACE_DNS, "lsst.org")`.
120 """
122 def makeDatasetId(
123 self,
124 run: str,
125 datasetType: DatasetType,
126 dataId: DataCoordinate,
127 idGenerationMode: DatasetIdGenEnum,
128 ) -> uuid.UUID:
129 """Generate dataset ID for a dataset.
131 Parameters
132 ----------
133 run : `str`
134 Name of the RUN collection for the dataset.
135 datasetType : `DatasetType`
136 Dataset type.
137 dataId : `DataCoordinate`
138 Expanded data ID for the dataset.
139 idGenerationMode : `DatasetIdGenEnum`
140 ID generation option. `~DatasetIdGenEnum.UNIQUE` makes a random
141 UUID4-type ID. `~DatasetIdGenEnum.DATAID_TYPE` makes a
142 deterministic UUID5-type ID based on a dataset type name and
143 ``dataId``. `~DatasetIdGenEnum.DATAID_TYPE_RUN` makes a
144 deterministic UUID5-type ID based on a dataset type name, run
145 collection name, and ``dataId``.
147 Returns
148 -------
149 datasetId : `uuid.UUID`
150 Dataset identifier.
151 """
152 if idGenerationMode is DatasetIdGenEnum.UNIQUE:
153 return uuid.uuid4()
154 else:
155 # WARNING: If you modify this code make sure that the order of
156 # items in the `items` list below never changes.
157 items: list[tuple[str, str]] = []
158 if idGenerationMode is DatasetIdGenEnum.DATAID_TYPE:
159 items = [
160 ("dataset_type", datasetType.name),
161 ]
162 elif idGenerationMode is DatasetIdGenEnum.DATAID_TYPE_RUN:
163 items = [
164 ("dataset_type", datasetType.name),
165 ("run", run),
166 ]
167 else:
168 raise ValueError(f"Unexpected ID generation mode: {idGenerationMode}")
170 for name, value in sorted(dataId.byName().items()):
171 items.append((name, str(value)))
172 data = ",".join(f"{key}={value}" for key, value in items)
173 return uuid.uuid5(self.NS_UUID, data)
175 @deprecated(
176 "This method will soon be removed since it will be impossible to create an unresolved ref.",
177 version="26.0",
178 category=UnresolvedRefWarning,
179 )
180 def resolveRef(
181 self,
182 ref: DatasetRef,
183 run: str,
184 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE,
185 ) -> DatasetRef:
186 """Generate resolved dataset reference for predicted datasets.
188 Parameters
189 ----------
190 ref : `DatasetRef`
191 Dataset ref, can be already resolved.
192 run : `str`
193 Name of the RUN collection for the dataset.
194 idGenerationMode : `DatasetIdGenEnum`
195 ID generation option. `~DatasetIdGenEnum.UNIQUE` makes a random
196 UUID4-type ID. `~DatasetIdGenEnum.DATAID_TYPE` makes a
197 deterministic UUID5-type ID based on a dataset type name and
198 ``dataId``. `~DatasetIdGenEnum.DATAID_TYPE_RUN` makes a
199 deterministic UUID5-type ID based on a dataset type name, run
200 collection name, and ``dataId``.
202 Returns
203 -------
204 resolved : `DatasetRef`
205 Resolved dataset ref, if input reference is already resolved it
206 is returned without modification.
208 Notes
209 -----
210 This method can only be used for predicted dataset references that do
211 not exist yet in the database. It does not resolve existing dataset
212 references already stored in registry.
213 """
214 if ref.id is not None:
215 return ref
216 datasetId = self.makeDatasetId(run, ref.datasetType, ref.dataId, idGenerationMode)
217 # Hide the warning coming from ref.resolved()
218 with warnings.catch_warnings():
219 warnings.simplefilter("ignore", category=UnresolvedRefWarning)
220 resolved = ref.resolved(datasetId, run)
221 return resolved
224class SerializedDatasetRef(BaseModel):
225 """Simplified model of a `DatasetRef` suitable for serialization."""
227 # DO NOT change order in the Union, pydantic is sensitive to that!
228 id: uuid.UUID | None = None
229 datasetType: Optional[SerializedDatasetType] = None
230 dataId: Optional[SerializedDataCoordinate] = None
231 run: Optional[StrictStr] = None
232 component: Optional[StrictStr] = None
234 @validator("dataId")
235 def _check_dataId(cls, v: Any, values: Dict[str, Any]) -> Any: # noqa: N805
236 if (d := "datasetType") in values and values[d] is None:
237 raise ValueError("Can not specify 'dataId' without specifying 'datasetType'")
238 return v
240 @validator("run")
241 def _check_run(cls, v: Any, values: Dict[str, Any]) -> Any: # noqa: N805
242 if v and (i := "id") in values and values[i] is None:
243 raise ValueError("'run' cannot be provided unless 'id' is.")
244 return v
246 @validator("component")
247 def _check_component(cls, v: Any, values: Dict[str, Any]) -> Any: # noqa: N805
248 # Component should not be given if datasetType is given
249 if v and (d := "datasetType") in values and values[d] is not None:
250 raise ValueError(f"datasetType ({values[d]}) can not be set if component is given ({v}).")
251 return v
253 @classmethod
254 def direct(
255 cls,
256 *,
257 id: Optional[Union[str, int]] = None,
258 datasetType: Optional[Dict[str, Any]] = None,
259 dataId: Optional[Dict[str, Any]] = None,
260 run: str | None = None,
261 component: Optional[str] = None,
262 ) -> SerializedDatasetRef:
263 """Construct a `SerializedDatasetRef` directly without validators.
265 This differs from the pydantic "construct" method in that the arguments
266 are explicitly what the model requires, and it will recurse through
267 members, constructing them from their corresponding `direct` methods.
269 This method should only be called when the inputs are trusted.
270 """
271 node = SerializedDatasetRef.__new__(cls)
272 setter = object.__setattr__
273 setter(node, "id", uuid.UUID(id) if isinstance(id, str) else id)
274 setter(
275 node,
276 "datasetType",
277 datasetType if datasetType is None else SerializedDatasetType.direct(**datasetType),
278 )
279 setter(node, "dataId", dataId if dataId is None else SerializedDataCoordinate.direct(**dataId))
280 setter(node, "run", run)
281 setter(node, "component", component)
282 setter(node, "__fields_set__", {"id", "datasetType", "dataId", "run", "component"})
283 return node
286DatasetId = uuid.UUID
287"""A type-annotation alias for dataset ID providing typing flexibility.
288"""
291@immutable
292class DatasetRef:
293 """Reference to a Dataset in a `Registry`.
295 A `DatasetRef` may point to a Dataset that currently does not yet exist
296 (e.g., because it is a predicted input for provenance).
298 Parameters
299 ----------
300 datasetType : `DatasetType`
301 The `DatasetType` for this Dataset.
302 dataId : `DataCoordinate`
303 A mapping of dimensions that labels the Dataset within a Collection.
304 id : `DatasetId`, optional
305 The unique identifier assigned when the dataset is created. If ``run``
306 is specified and ``id`` is not specified, an ID will be created.
307 run : `str`, optional
308 The name of the run this dataset was associated with when it was
309 created. Must be provided if ``id`` is.
310 conform : `bool`, optional
311 If `True` (default), call `DataCoordinate.standardize` to ensure that
312 the data ID's dimensions are consistent with the dataset type's.
313 `DatasetRef` instances for which those dimensions are not equal should
314 not be created in new code, but are still supported for backwards
315 compatibility. New code should only pass `False` if it can guarantee
316 that the dimensions are already consistent.
317 id_generation_mode : `DatasetIdGenEnum`
318 ID generation option. `~DatasetIdGenEnum.UNIQUE` makes a random
319 UUID4-type ID. `~DatasetIdGenEnum.DATAID_TYPE` makes a
320 deterministic UUID5-type ID based on a dataset type name and
321 ``dataId``. `~DatasetIdGenEnum.DATAID_TYPE_RUN` makes a
322 deterministic UUID5-type ID based on a dataset type name, run
323 collection name, and ``dataId``.
325 Raises
326 ------
327 ValueError
328 Raised if ``run`` is provided but ``id`` is not, or if ``id`` is
329 provided but ``run`` is not.
331 See Also
332 --------
333 :ref:`daf_butler_organizing_datasets`
334 """
336 _serializedType = SerializedDatasetRef
337 __slots__ = (
338 "id",
339 "datasetType",
340 "dataId",
341 "run",
342 )
344 def __init__(
345 self,
346 datasetType: DatasetType,
347 dataId: DataCoordinate,
348 *,
349 id: Optional[DatasetId] = None,
350 run: Optional[str] = None,
351 conform: bool = True,
352 id_generation_mode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE,
353 ):
354 self.datasetType = datasetType
355 if conform:
356 self.dataId = DataCoordinate.standardize(dataId, graph=datasetType.dimensions)
357 else:
358 self.dataId = dataId
359 if id is not None:
360 if run is None:
361 raise ValueError(
362 f"Cannot provide id without run for dataset with id={id}, "
363 f"type={datasetType}, and dataId={dataId}."
364 )
365 self.run = run
366 self.id = id
367 else:
368 if run is not None:
369 self.run = run
370 self.id = DatasetIdFactory().makeDatasetId(
371 self.run, self.datasetType, self.dataId, id_generation_mode
372 )
373 else:
374 self.id = None
375 self.run = None
376 warnings.warn(
377 "Support for creating unresolved refs will soon be removed. Please contact the "
378 "middleware team for advice on modifying your code to use resolved refs.",
379 category=UnresolvedRefWarning,
380 stacklevel=_find_outside_stacklevel(),
381 )
383 def __eq__(self, other: Any) -> bool:
384 try:
385 return (self.datasetType, self.dataId, self.id) == (other.datasetType, other.dataId, other.id)
386 except AttributeError:
387 return NotImplemented
389 def __hash__(self) -> int:
390 return hash((self.datasetType, self.dataId, self.id))
392 @property
393 def dimensions(self) -> DimensionGraph:
394 """Dimensions associated with the underlying `DatasetType`."""
395 return self.datasetType.dimensions
397 def __repr__(self) -> str:
398 # We delegate to __str__ (i.e use "!s") for the data ID) below because
399 # DataCoordinate's __repr__ - while adhering to the guidelines for
400 # __repr__ - is much harder to users to read, while its __str__ just
401 # produces a dict that can also be passed to DatasetRef's constructor.
402 if self.id is not None:
403 return f"DatasetRef({self.datasetType!r}, {self.dataId!s}, id={self.id}, run={self.run!r})"
404 else:
405 return f"DatasetRef({self.datasetType!r}, {self.dataId!s})"
407 def __str__(self) -> str:
408 s = f"{self.datasetType.name}@{self.dataId!s}, sc={self.datasetType.storageClass_name}]"
409 if self.id is not None:
410 s += f" (id={self.id})"
411 return s
413 def __lt__(self, other: Any) -> bool:
414 # Sort by run, DatasetType name and then by DataCoordinate
415 # The __str__ representation is probably close enough but we
416 # need to ensure that sorting a DatasetRef matches what you would
417 # get if you sorted DatasetType+DataCoordinate
418 if not isinstance(other, type(self)):
419 return NotImplemented
421 # Group by run if defined, takes precedence over DatasetType
422 self_run = "" if self.run is None else self.run
423 other_run = "" if other.run is None else other.run
425 # Compare tuples in the priority order
426 return (self_run, self.datasetType, self.dataId) < (other_run, other.datasetType, other.dataId)
428 def to_simple(self, minimal: bool = False) -> SerializedDatasetRef:
429 """Convert this class to a simple python type.
431 This makes it suitable for serialization.
433 Parameters
434 ----------
435 minimal : `bool`, optional
436 Use minimal serialization. Requires Registry to convert
437 back to a full type.
439 Returns
440 -------
441 simple : `dict` or `int`
442 The object converted to a dictionary.
443 """
444 if minimal and self.id is not None:
445 # The only thing needed to uniquely define a DatasetRef
446 # is its id so that can be used directly if it is
447 # resolved and if it is not a component DatasetRef.
448 # Store is in a dict to allow us to easily add the planned
449 # origin information later without having to support
450 # an int and dict in simple form.
451 simple: Dict[str, Any] = {"id": self.id}
452 if self.isComponent():
453 # We can still be a little minimalist with a component
454 # but we will also need to record the datasetType component
455 simple["component"] = self.datasetType.component()
456 return SerializedDatasetRef(**simple)
458 # Convert to a dict form
459 as_dict: Dict[str, Any] = {
460 "datasetType": self.datasetType.to_simple(minimal=minimal),
461 "dataId": self.dataId.to_simple(),
462 }
464 # Only include the id entry if it is defined
465 if self.id is not None:
466 as_dict["run"] = self.run
467 as_dict["id"] = self.id
469 return SerializedDatasetRef(**as_dict)
471 @classmethod
472 def from_simple(
473 cls,
474 simple: SerializedDatasetRef,
475 universe: Optional[DimensionUniverse] = None,
476 registry: Optional[Registry] = None,
477 datasetType: Optional[DatasetType] = None,
478 ) -> DatasetRef:
479 """Construct a new object from simplified form.
481 Generally this is data returned from the `to_simple` method.
483 Parameters
484 ----------
485 simple : `dict` of [`str`, `Any`]
486 The value returned by `to_simple()`.
487 universe : `DimensionUniverse`
488 The special graph of all known dimensions.
489 Can be `None` if a registry is provided.
490 registry : `lsst.daf.butler.Registry`, optional
491 Registry to use to convert simple form of a DatasetRef to
492 a full `DatasetRef`. Can be `None` if a full description of
493 the type is provided along with a universe.
494 datasetType : DatasetType, optional
495 If datasetType is supplied, this will be used as the datasetType
496 object in the resulting DatasetRef instead of being read from
497 the `SerializedDatasetRef`. This is useful when many refs share
498 the same type as memory can be saved. Defaults to None.
500 Returns
501 -------
502 ref : `DatasetRef`
503 Newly-constructed object.
504 """
505 # Minimalist component will just specify component and id and
506 # require registry to reconstruct
507 if set(simple.dict(exclude_unset=True, exclude_defaults=True)).issubset({"id", "component"}):
508 if registry is None:
509 raise ValueError("Registry is required to construct component DatasetRef from integer id")
510 if simple.id is None:
511 raise ValueError("For minimal DatasetRef the ID must be defined.")
512 ref = registry.getDataset(simple.id)
513 if ref is None:
514 raise RuntimeError(f"No matching dataset found in registry for id {simple.id}")
515 if simple.component:
516 ref = ref.makeComponentRef(simple.component)
517 return ref
519 if universe is None and registry is None:
520 raise ValueError("One of universe or registry must be provided.")
522 if universe is None and registry is not None:
523 universe = registry.dimensions
525 if universe is None:
526 # this is for mypy
527 raise ValueError("Unable to determine a usable universe")
529 if simple.datasetType is None and datasetType is None:
530 # mypy
531 raise ValueError("The DatasetType must be specified to construct a DatasetRef")
532 if datasetType is None:
533 if simple.datasetType is None:
534 raise ValueError("Cannot determine Dataset type of this serialized class")
535 datasetType = DatasetType.from_simple(simple.datasetType, universe=universe, registry=registry)
537 if simple.dataId is None:
538 # mypy
539 raise ValueError("The DataId must be specified to construct a DatasetRef")
540 dataId = DataCoordinate.from_simple(simple.dataId, universe=universe)
542 # Issue our own warning that could be more explicit.
543 if simple.id is None and simple.run is None:
544 dstr = ""
545 if simple.datasetType is None:
546 dstr = f" (datasetType={datasetType.name!r})"
547 warnings.warn(
548 "Attempting to create an unresolved ref from simple form is deprecated. "
549 f"Encountered with {simple!r}{dstr}.",
550 category=UnresolvedRefWarning,
551 stacklevel=_find_outside_stacklevel(),
552 )
554 with warnings.catch_warnings():
555 warnings.simplefilter("ignore", category=UnresolvedRefWarning)
556 return cls(datasetType, dataId, id=simple.id, run=simple.run)
558 to_json = to_json_pydantic
559 from_json: ClassVar = classmethod(from_json_pydantic)
561 @classmethod
562 def _unpickle(
563 cls,
564 datasetType: DatasetType,
565 dataId: DataCoordinate,
566 id: Optional[DatasetId],
567 run: Optional[str],
568 ) -> DatasetRef:
569 """Create new `DatasetRef`.
571 A custom factory method for use by `__reduce__` as a workaround for
572 its lack of support for keyword arguments.
573 """
574 return cls(datasetType, dataId, id=id, run=run)
576 def __reduce__(self) -> tuple:
577 return (self._unpickle, (self.datasetType, self.dataId, self.id, self.run))
579 def __deepcopy__(self, memo: dict) -> DatasetRef:
580 # DatasetRef is recursively immutable; see note in @immutable
581 # decorator.
582 return self
584 @deprecated(
585 "This method will soon be a no-op since it will be impossible to create an unresolved ref.",
586 version="26.0",
587 category=UnresolvedRefWarning,
588 )
589 def resolved(self, id: DatasetId, run: str) -> DatasetRef:
590 """Return resolved `DatasetRef`.
592 This is a new `DatasetRef` with the same data ID and dataset type
593 and the given ID and run.
595 Parameters
596 ----------
597 id : `DatasetId`
598 The unique identifier assigned when the dataset is created.
599 run : `str`
600 The run this dataset was associated with when it was created.
602 Returns
603 -------
604 ref : `DatasetRef`
605 A new `DatasetRef`.
606 """
607 return DatasetRef(datasetType=self.datasetType, dataId=self.dataId, id=id, run=run, conform=False)
609 @deprecated(
610 "Support for unresolved refs will soon be removed. Please contact middleware developers with"
611 " advice on how to modify your code.",
612 category=UnresolvedRefWarning,
613 version="26.0",
614 )
615 def unresolved(self) -> DatasetRef:
616 """Return unresolved `DatasetRef`.
618 This is a new `DatasetRef` with the same data ID and dataset type,
619 but no ID or run.
621 Returns
622 -------
623 ref : `DatasetRef`
624 A new `DatasetRef`.
626 Notes
627 -----
628 This can be used to compare only the data ID and dataset type of a
629 pair of `DatasetRef` instances, regardless of whether either is
630 resolved::
632 if ref1.unresolved() == ref2.unresolved():
633 ...
634 """
635 # We have already warned about this so no need to warn again.
636 with warnings.catch_warnings():
637 warnings.simplefilter("ignore", category=UnresolvedRefWarning)
638 return DatasetRef(datasetType=self.datasetType, dataId=self.dataId, conform=False)
640 def expanded(self, dataId: DataCoordinate) -> DatasetRef:
641 """Return a new `DatasetRef` with the given expanded data ID.
643 Parameters
644 ----------
645 dataId : `DataCoordinate`
646 Data ID for the new `DatasetRef`. Must compare equal to the
647 original data ID.
649 Returns
650 -------
651 ref : `DatasetRef`
652 A new `DatasetRef` with the given data ID.
653 """
654 assert dataId == self.dataId
655 return DatasetRef(
656 datasetType=self.datasetType, dataId=dataId, id=self.id, run=self.run, conform=False
657 )
659 def isComponent(self) -> bool:
660 """Indicate whether this `DatasetRef` refers to a component.
662 Returns
663 -------
664 isComponent : `bool`
665 `True` if this `DatasetRef` is a component, `False` otherwise.
666 """
667 return self.datasetType.isComponent()
669 def isComposite(self) -> bool:
670 """Boolean indicating whether this `DatasetRef` is a composite type.
672 Returns
673 -------
674 isComposite : `bool`
675 `True` if this `DatasetRef` is a composite type, `False`
676 otherwise.
677 """
678 return self.datasetType.isComposite()
680 def _lookupNames(self) -> Tuple[LookupKey, ...]:
681 """Name keys to use when looking up this DatasetRef in a configuration.
683 The names are returned in order of priority.
685 Returns
686 -------
687 names : `tuple` of `LookupKey`
688 Tuple of the `DatasetType` name and the `StorageClass` name.
689 If ``instrument`` is defined in the dataId, each of those names
690 is added to the start of the tuple with a key derived from the
691 value of ``instrument``.
692 """
693 # Special case the instrument Dimension since we allow configs
694 # to include the instrument name in the hierarchy.
695 names: Tuple[LookupKey, ...] = self.datasetType._lookupNames()
697 if "instrument" in self.dataId:
698 names = tuple(n.clone(dataId={"instrument": self.dataId["instrument"]}) for n in names) + names
700 return names
702 @staticmethod
703 def groupByType(refs: Iterable[DatasetRef]) -> NamedKeyDict[DatasetType, List[DatasetRef]]:
704 """Group an iterable of `DatasetRef` by `DatasetType`.
706 Parameters
707 ----------
708 refs : `Iterable` [ `DatasetRef` ]
709 `DatasetRef` instances to group.
711 Returns
712 -------
713 grouped : `NamedKeyDict` [ `DatasetType`, `list` [ `DatasetRef` ] ]
714 Grouped `DatasetRef` instances.
715 """
716 result: NamedKeyDict[DatasetType, List[DatasetRef]] = NamedKeyDict()
717 for ref in refs:
718 result.setdefault(ref.datasetType, []).append(ref)
719 return result
721 def getCheckedId(self) -> DatasetId:
722 """Return ``self.id``, or raise if it is `None`.
724 This trivial method exists to allow operations that would otherwise be
725 natural list comprehensions to check that the ID is not `None` as well.
727 Returns
728 -------
729 id : `DatasetId`
730 ``self.id`` if it is not `None`.
732 Raises
733 ------
734 AmbiguousDatasetError
735 Raised if ``ref.id`` is `None`.
736 """
737 if self.id is None:
738 raise AmbiguousDatasetError(f"ID for dataset {self} is `None`; a resolved reference is required.")
739 return self.id
741 def makeCompositeRef(self) -> DatasetRef:
742 """Create a `DatasetRef` of the composite from a component ref.
744 Requires that this `DatasetRef` is a component.
746 Returns
747 -------
748 ref : `DatasetRef`
749 A `DatasetRef` with a dataset type that corresponds to the
750 composite parent of this component, and the same ID and run
751 (which may be `None`, if they are `None` in ``self``).
752 """
753 # Assume that the data ID does not need to be standardized
754 # and should match whatever this ref already has.
755 return DatasetRef(
756 self.datasetType.makeCompositeDatasetType(), self.dataId, id=self.id, run=self.run, conform=False
757 )
759 def makeComponentRef(self, name: str) -> DatasetRef:
760 """Create a `DatasetRef` that corresponds to a component.
762 Parameters
763 ----------
764 name : `str`
765 Name of the component.
767 Returns
768 -------
769 ref : `DatasetRef`
770 A `DatasetRef` with a dataset type that corresponds to the given
771 component, and the same ID and run
772 (which may be `None`, if they are `None` in ``self``).
773 """
774 # Assume that the data ID does not need to be standardized
775 # and should match whatever this ref already has.
776 return DatasetRef(
777 self.datasetType.makeComponentDatasetType(name),
778 self.dataId,
779 id=self.id,
780 run=self.run,
781 conform=False,
782 )
784 def overrideStorageClass(self, storageClass: str | StorageClass) -> DatasetRef:
785 """Create a new `DatasetRef` from this one, but with a modified
786 `DatasetType` that has a different `StorageClass`.
788 Parameters
789 ----------
790 storageClass : `str` or `StorageClass`
791 The new storage class.
793 Returns
794 -------
795 modified : `DatasetRef`
796 A new dataset reference that is the same as the current one but
797 with a different storage class in the `DatasetType`.
798 """
799 return DatasetRef(
800 datasetType=self.datasetType.overrideStorageClass(storageClass),
801 dataId=self.dataId,
802 id=self.id,
803 run=self.run,
804 conform=False,
805 )
807 datasetType: DatasetType
808 """The definition of this dataset (`DatasetType`).
810 Cannot be changed after a `DatasetRef` is constructed.
811 """
813 dataId: DataCoordinate
814 """A mapping of `Dimension` primary key values that labels the dataset
815 within a Collection (`DataCoordinate`).
817 Cannot be changed after a `DatasetRef` is constructed.
818 """
820 run: Optional[str]
821 """The name of the run that produced the dataset.
823 Cannot be changed after a `DatasetRef` is constructed; use `resolved` or
824 `unresolved` to add or remove this information when creating a new
825 `DatasetRef`.
826 """
828 id: Optional[DatasetId]
829 """Primary key of the dataset (`DatasetId` or `None`).
831 Cannot be changed after a `DatasetRef` is constructed; use `resolved` or
832 `unresolved` to add or remove this information when creating a new
833 `DatasetRef`.
834 """