Coverage for python/lsst/daf/butler/core/datasets/ref.py: 33%
230 statements
« prev ^ index » next coverage.py v7.3.1, created at 2023-10-02 08:00 +0000
« prev ^ index » next coverage.py v7.3.1, created at 2023-10-02 08:00 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
27from __future__ import annotations
29__all__ = [
30 "AmbiguousDatasetError",
31 "DatasetId",
32 "DatasetIdFactory",
33 "DatasetIdGenEnum",
34 "DatasetRef",
35 "SerializedDatasetRef",
36]
38import enum
39import sys
40import uuid
41from collections.abc import Iterable
42from typing import TYPE_CHECKING, Any, ClassVar, Protocol, TypeAlias, runtime_checkable
44import pydantic
45from lsst.daf.butler._compat import PYDANTIC_V2, _BaseModelCompat
46from lsst.utils.classes import immutable
47from pydantic import StrictStr
49from ..configSupport import LookupKey
50from ..dimensions import DataCoordinate, DimensionGraph, DimensionUniverse, SerializedDataCoordinate
51from ..json import from_json_pydantic, to_json_pydantic
52from ..named import NamedKeyDict
53from ..persistenceContext import PersistenceContextVars
54from .type import DatasetType, SerializedDatasetType
56if TYPE_CHECKING:
57 from ...registry import Registry
58 from ..storageClass import StorageClass
61class AmbiguousDatasetError(Exception):
62 """Raised when a `DatasetRef` is not resolved but should be.
64 This happens when the `DatasetRef` has no ID or run but the requested
65 operation requires one of them.
66 """
69@runtime_checkable
70class _DatasetRefGroupedIterable(Protocol):
71 """A package-private interface for iterables of `DatasetRef` that know how
72 to efficiently group their contents by `DatasetType`.
74 """
76 def _iter_by_dataset_type(self) -> Iterable[tuple[DatasetType, Iterable[DatasetRef]]]:
77 """Iterate over `DatasetRef` instances, one `DatasetType` at a time.
79 Returns
80 -------
81 grouped : `~collections.abc.Iterable` [ `tuple` [ `DatasetType`, \
82 `~collections.abc.Iterable` [ `DatasetRef` ]
83 An iterable of tuples, in which the first element is a dataset type
84 and the second is an iterable of `DatasetRef` objects with exactly
85 that dataset type.
86 """
87 ...
90class DatasetIdGenEnum(enum.Enum):
91 """Enum used to specify dataset ID generation options."""
93 UNIQUE = 0
94 """Unique mode generates unique ID for each inserted dataset, e.g.
95 auto-generated by database or random UUID.
96 """
98 DATAID_TYPE = 1
99 """In this mode ID is computed deterministically from a combination of
100 dataset type and dataId.
101 """
103 DATAID_TYPE_RUN = 2
104 """In this mode ID is computed deterministically from a combination of
105 dataset type, dataId, and run collection name.
106 """
109class DatasetIdFactory:
110 """Factory for dataset IDs (UUIDs).
112 For now the logic is hard-coded and is controlled by the user-provided
113 value of `DatasetIdGenEnum`. In the future we may implement a configurable
114 logic that can guess `DatasetIdGenEnum` value from other parameters.
115 """
117 NS_UUID = uuid.UUID("840b31d9-05cd-5161-b2c8-00d32b280d0f")
118 """Namespace UUID used for UUID5 generation. Do not change. This was
119 produced by `uuid.uuid5(uuid.NAMESPACE_DNS, "lsst.org")`.
120 """
122 def makeDatasetId(
123 self,
124 run: str,
125 datasetType: DatasetType,
126 dataId: DataCoordinate,
127 idGenerationMode: DatasetIdGenEnum,
128 ) -> uuid.UUID:
129 """Generate dataset ID for a dataset.
131 Parameters
132 ----------
133 run : `str`
134 Name of the RUN collection for the dataset.
135 datasetType : `DatasetType`
136 Dataset type.
137 dataId : `DataCoordinate`
138 Expanded data ID for the dataset.
139 idGenerationMode : `DatasetIdGenEnum`
140 ID generation option. `~DatasetIdGenEnum.UNIQUE` makes a random
141 UUID4-type ID. `~DatasetIdGenEnum.DATAID_TYPE` makes a
142 deterministic UUID5-type ID based on a dataset type name and
143 ``dataId``. `~DatasetIdGenEnum.DATAID_TYPE_RUN` makes a
144 deterministic UUID5-type ID based on a dataset type name, run
145 collection name, and ``dataId``.
147 Returns
148 -------
149 datasetId : `uuid.UUID`
150 Dataset identifier.
151 """
152 if idGenerationMode is DatasetIdGenEnum.UNIQUE:
153 return uuid.uuid4()
154 else:
155 # WARNING: If you modify this code make sure that the order of
156 # items in the `items` list below never changes.
157 items: list[tuple[str, str]] = []
158 if idGenerationMode is DatasetIdGenEnum.DATAID_TYPE:
159 items = [
160 ("dataset_type", datasetType.name),
161 ]
162 elif idGenerationMode is DatasetIdGenEnum.DATAID_TYPE_RUN:
163 items = [
164 ("dataset_type", datasetType.name),
165 ("run", run),
166 ]
167 else:
168 raise ValueError(f"Unexpected ID generation mode: {idGenerationMode}")
170 for name, value in sorted(dataId.byName().items()):
171 items.append((name, str(value)))
172 data = ",".join(f"{key}={value}" for key, value in items)
173 return uuid.uuid5(self.NS_UUID, data)
176# This is constant, so don't recreate a set for each instance
177_serializedDatasetRefFieldsSet = {"id", "datasetType", "dataId", "run", "component"}
180class SerializedDatasetRef(_BaseModelCompat):
181 """Simplified model of a `DatasetRef` suitable for serialization."""
183 id: uuid.UUID
184 datasetType: SerializedDatasetType | None = None
185 dataId: SerializedDataCoordinate | None = None
186 run: StrictStr | None = None
187 component: StrictStr | None = None
189 if PYDANTIC_V2: 189 ↛ 192line 189 didn't jump to line 192, because the condition on line 189 was never true
190 # Can not use "after" validator since in some cases the validator
191 # seems to trigger with the datasetType field not yet set.
192 @pydantic.model_validator(mode="before") # type: ignore[attr-defined]
193 @classmethod
194 def check_consistent_parameters(cls, data: dict[str, Any]) -> dict[str, Any]:
195 has_datasetType = data.get("datasetType") is not None
196 has_dataId = data.get("dataId") is not None
197 if has_datasetType is not has_dataId:
198 raise ValueError("If specifying datasetType or dataId, must specify both.")
200 if data.get("component") is not None and has_datasetType:
201 raise ValueError("datasetType can not be set if component is given.")
202 return data
204 else:
206 @pydantic.validator("dataId")
207 def _check_dataId(cls, v: Any, values: dict[str, Any]) -> Any: # noqa: N805
208 if v and (d := "datasetType") in values and values[d] is None:
209 raise ValueError("Can not specify 'dataId' without specifying 'datasetType'")
210 return v
212 @pydantic.validator("component")
213 def _check_component(cls, v: Any, values: dict[str, Any]) -> Any: # noqa: N805
214 # Component should not be given if datasetType is given
215 if v and (d := "datasetType") in values and values[d] is not None:
216 raise ValueError(f"datasetType ({values[d]}) can not be set if component is given ({v}).")
217 return v
219 @classmethod
220 def direct(
221 cls,
222 *,
223 id: str,
224 run: str,
225 datasetType: dict[str, Any] | None = None,
226 dataId: dict[str, Any] | None = None,
227 component: str | None = None,
228 ) -> SerializedDatasetRef:
229 """Construct a `SerializedDatasetRef` directly without validators.
231 Notes
232 -----
233 This differs from the pydantic "construct" method in that the arguments
234 are explicitly what the model requires, and it will recurse through
235 members, constructing them from their corresponding `direct` methods.
237 The ``id`` parameter is a string representation of dataset ID, it is
238 converted to UUID by this method.
240 This method should only be called when the inputs are trusted.
241 """
242 serialized_datasetType = (
243 SerializedDatasetType.direct(**datasetType) if datasetType is not None else None
244 )
245 serialized_dataId = SerializedDataCoordinate.direct(**dataId) if dataId is not None else None
247 node = cls.model_construct(
248 _fields_set=_serializedDatasetRefFieldsSet,
249 id=uuid.UUID(id),
250 datasetType=serialized_datasetType,
251 dataId=serialized_dataId,
252 run=sys.intern(run),
253 component=component,
254 )
256 return node
259DatasetId: TypeAlias = uuid.UUID
260"""A type-annotation alias for dataset ID providing typing flexibility.
261"""
264@immutable
265class DatasetRef:
266 """Reference to a Dataset in a `Registry`.
268 A `DatasetRef` may point to a Dataset that currently does not yet exist
269 (e.g., because it is a predicted input for provenance).
271 Parameters
272 ----------
273 datasetType : `DatasetType`
274 The `DatasetType` for this Dataset.
275 dataId : `DataCoordinate`
276 A mapping of dimensions that labels the Dataset within a Collection.
277 run : `str`
278 The name of the run this dataset was associated with when it was
279 created.
280 id : `DatasetId`, optional
281 The unique identifier assigned when the dataset is created. If ``id``
282 is not specified, a new unique ID will be created.
283 conform : `bool`, optional
284 If `True` (default), call `DataCoordinate.standardize` to ensure that
285 the data ID's dimensions are consistent with the dataset type's.
286 `DatasetRef` instances for which those dimensions are not equal should
287 not be created in new code, but are still supported for backwards
288 compatibility. New code should only pass `False` if it can guarantee
289 that the dimensions are already consistent.
290 id_generation_mode : `DatasetIdGenEnum`
291 ID generation option. `~DatasetIdGenEnum.UNIQUE` makes a random
292 UUID4-type ID. `~DatasetIdGenEnum.DATAID_TYPE` makes a
293 deterministic UUID5-type ID based on a dataset type name and
294 ``dataId``. `~DatasetIdGenEnum.DATAID_TYPE_RUN` makes a
295 deterministic UUID5-type ID based on a dataset type name, run
296 collection name, and ``dataId``.
298 See Also
299 --------
300 :ref:`daf_butler_organizing_datasets`
301 """
303 _serializedType = SerializedDatasetRef
304 __slots__ = (
305 "_id",
306 "datasetType",
307 "dataId",
308 "run",
309 )
311 def __init__(
312 self,
313 datasetType: DatasetType,
314 dataId: DataCoordinate,
315 run: str,
316 *,
317 id: DatasetId | None = None,
318 conform: bool = True,
319 id_generation_mode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE,
320 ):
321 self.datasetType = datasetType
322 if conform:
323 self.dataId = DataCoordinate.standardize(dataId, graph=datasetType.dimensions)
324 else:
325 self.dataId = dataId
326 self.run = run
327 if id is not None:
328 self._id = id.int
329 else:
330 self._id = (
331 DatasetIdFactory()
332 .makeDatasetId(self.run, self.datasetType, self.dataId, id_generation_mode)
333 .int
334 )
336 @property
337 def id(self) -> DatasetId:
338 """Primary key of the dataset (`DatasetId`).
340 Cannot be changed after a `DatasetRef` is constructed.
341 """
342 return uuid.UUID(int=self._id)
344 def __eq__(self, other: Any) -> bool:
345 try:
346 return (self.datasetType, self.dataId, self.id) == (other.datasetType, other.dataId, other.id)
347 except AttributeError:
348 return NotImplemented
350 def __hash__(self) -> int:
351 return hash((self.datasetType, self.dataId, self.id))
353 @property
354 def dimensions(self) -> DimensionGraph:
355 """Dimensions associated with the underlying `DatasetType`."""
356 return self.datasetType.dimensions
358 def __repr__(self) -> str:
359 # We delegate to __str__ (i.e use "!s") for the data ID) below because
360 # DataCoordinate's __repr__ - while adhering to the guidelines for
361 # __repr__ - is much harder to users to read, while its __str__ just
362 # produces a dict that can also be passed to DatasetRef's constructor.
363 return f"DatasetRef({self.datasetType!r}, {self.dataId!s}, run={self.run!r}, id={self.id})"
365 def __str__(self) -> str:
366 s = (
367 f"{self.datasetType.name}@{self.dataId!s} [sc={self.datasetType.storageClass_name}]"
368 f" (run={self.run} id={self.id})"
369 )
370 return s
372 def __lt__(self, other: Any) -> bool:
373 # Sort by run, DatasetType name and then by DataCoordinate
374 # The __str__ representation is probably close enough but we
375 # need to ensure that sorting a DatasetRef matches what you would
376 # get if you sorted DatasetType+DataCoordinate
377 if not isinstance(other, type(self)):
378 return NotImplemented
380 # Group by run if defined, takes precedence over DatasetType
381 self_run = "" if self.run is None else self.run
382 other_run = "" if other.run is None else other.run
384 # Compare tuples in the priority order
385 return (self_run, self.datasetType, self.dataId) < (other_run, other.datasetType, other.dataId)
387 def to_simple(self, minimal: bool = False) -> SerializedDatasetRef:
388 """Convert this class to a simple python type.
390 This makes it suitable for serialization.
392 Parameters
393 ----------
394 minimal : `bool`, optional
395 Use minimal serialization. Requires Registry to convert
396 back to a full type.
398 Returns
399 -------
400 simple : `dict` or `int`
401 The object converted to a dictionary.
402 """
403 if minimal:
404 # The only thing needed to uniquely define a DatasetRef is its id
405 # so that can be used directly if it is not a component DatasetRef.
406 # Store is in a dict to allow us to easily add the planned origin
407 # information later without having to support an int and dict in
408 # simple form.
409 simple: dict[str, Any] = {"id": self.id}
410 if self.isComponent():
411 # We can still be a little minimalist with a component
412 # but we will also need to record the datasetType component
413 simple["component"] = self.datasetType.component()
414 return SerializedDatasetRef(**simple)
416 return SerializedDatasetRef(
417 datasetType=self.datasetType.to_simple(minimal=minimal),
418 dataId=self.dataId.to_simple(),
419 run=self.run,
420 id=self.id,
421 )
423 @classmethod
424 def from_simple(
425 cls,
426 simple: SerializedDatasetRef,
427 universe: DimensionUniverse | None = None,
428 registry: Registry | None = None,
429 datasetType: DatasetType | None = None,
430 ) -> DatasetRef:
431 """Construct a new object from simplified form.
433 Generally this is data returned from the `to_simple` method.
435 Parameters
436 ----------
437 simple : `dict` of [`str`, `Any`]
438 The value returned by `to_simple()`.
439 universe : `DimensionUniverse`
440 The special graph of all known dimensions.
441 Can be `None` if a registry is provided.
442 registry : `lsst.daf.butler.Registry`, optional
443 Registry to use to convert simple form of a DatasetRef to
444 a full `DatasetRef`. Can be `None` if a full description of
445 the type is provided along with a universe.
446 datasetType : DatasetType, optional
447 If datasetType is supplied, this will be used as the datasetType
448 object in the resulting DatasetRef instead of being read from
449 the `SerializedDatasetRef`. This is useful when many refs share
450 the same type as memory can be saved. Defaults to None.
452 Returns
453 -------
454 ref : `DatasetRef`
455 Newly-constructed object.
456 """
457 cache = PersistenceContextVars.datasetRefs.get()
458 localName = sys.intern(
459 datasetType.name
460 if datasetType is not None
461 else (x.name if (x := simple.datasetType) is not None else "")
462 )
463 key = (simple.id.int, localName)
464 if cache is not None and (cachedRef := cache.get(key, None)) is not None:
465 return cachedRef
466 # Minimalist component will just specify component and id and
467 # require registry to reconstruct
468 if not (simple.datasetType is not None or simple.dataId is not None or simple.run is not None):
469 if registry is None:
470 raise ValueError("Registry is required to construct component DatasetRef from integer id")
471 if simple.id is None:
472 raise ValueError("For minimal DatasetRef the ID must be defined.")
473 ref = registry.getDataset(simple.id)
474 if ref is None:
475 raise RuntimeError(f"No matching dataset found in registry for id {simple.id}")
476 if simple.component:
477 ref = ref.makeComponentRef(simple.component)
478 if cache is not None:
479 cache[key] = ref
480 return ref
482 if universe is None and registry is None:
483 raise ValueError("One of universe or registry must be provided.")
485 if universe is None and registry is not None:
486 universe = registry.dimensions
488 if universe is None:
489 # this is for mypy
490 raise ValueError("Unable to determine a usable universe")
492 if simple.datasetType is None and datasetType is None:
493 # mypy
494 raise ValueError("The DatasetType must be specified to construct a DatasetRef")
495 if datasetType is None:
496 if simple.datasetType is None:
497 raise ValueError("Cannot determine Dataset type of this serialized class")
498 datasetType = DatasetType.from_simple(simple.datasetType, universe=universe, registry=registry)
500 if simple.dataId is None:
501 # mypy
502 raise ValueError("The DataId must be specified to construct a DatasetRef")
503 dataId = DataCoordinate.from_simple(simple.dataId, universe=universe)
505 # Check that simple ref is resolved.
506 if simple.run is None:
507 dstr = ""
508 if simple.datasetType is None:
509 dstr = f" (datasetType={datasetType.name!r})"
510 raise ValueError(
511 "Run collection name is missing from serialized representation. "
512 f"Encountered with {simple!r}{dstr}."
513 )
515 newRef = cls(datasetType, dataId, id=simple.id, run=simple.run)
516 if cache is not None:
517 cache[key] = newRef
518 return newRef
520 to_json = to_json_pydantic
521 from_json: ClassVar = classmethod(from_json_pydantic)
523 @classmethod
524 def _unpickle(
525 cls,
526 datasetType: DatasetType,
527 dataId: DataCoordinate,
528 id: DatasetId,
529 run: str,
530 ) -> DatasetRef:
531 """Create new `DatasetRef`.
533 A custom factory method for use by `__reduce__` as a workaround for
534 its lack of support for keyword arguments.
535 """
536 return cls(datasetType, dataId, id=id, run=run)
538 def __reduce__(self) -> tuple:
539 return (self._unpickle, (self.datasetType, self.dataId, self.id, self.run))
541 def __deepcopy__(self, memo: dict) -> DatasetRef:
542 # DatasetRef is recursively immutable; see note in @immutable
543 # decorator.
544 return self
546 def expanded(self, dataId: DataCoordinate) -> DatasetRef:
547 """Return a new `DatasetRef` with the given expanded data ID.
549 Parameters
550 ----------
551 dataId : `DataCoordinate`
552 Data ID for the new `DatasetRef`. Must compare equal to the
553 original data ID.
555 Returns
556 -------
557 ref : `DatasetRef`
558 A new `DatasetRef` with the given data ID.
559 """
560 assert dataId == self.dataId
561 return DatasetRef(
562 datasetType=self.datasetType, dataId=dataId, id=self.id, run=self.run, conform=False
563 )
565 def isComponent(self) -> bool:
566 """Indicate whether this `DatasetRef` refers to a component.
568 Returns
569 -------
570 isComponent : `bool`
571 `True` if this `DatasetRef` is a component, `False` otherwise.
572 """
573 return self.datasetType.isComponent()
575 def isComposite(self) -> bool:
576 """Boolean indicating whether this `DatasetRef` is a composite type.
578 Returns
579 -------
580 isComposite : `bool`
581 `True` if this `DatasetRef` is a composite type, `False`
582 otherwise.
583 """
584 return self.datasetType.isComposite()
586 def _lookupNames(self) -> tuple[LookupKey, ...]:
587 """Name keys to use when looking up this DatasetRef in a configuration.
589 The names are returned in order of priority.
591 Returns
592 -------
593 names : `tuple` of `LookupKey`
594 Tuple of the `DatasetType` name and the `StorageClass` name.
595 If ``instrument`` is defined in the dataId, each of those names
596 is added to the start of the tuple with a key derived from the
597 value of ``instrument``.
598 """
599 # Special case the instrument Dimension since we allow configs
600 # to include the instrument name in the hierarchy.
601 names: tuple[LookupKey, ...] = self.datasetType._lookupNames()
603 if "instrument" in self.dataId:
604 names = tuple(n.clone(dataId={"instrument": self.dataId["instrument"]}) for n in names) + names
606 return names
608 @staticmethod
609 def groupByType(refs: Iterable[DatasetRef]) -> NamedKeyDict[DatasetType, list[DatasetRef]]:
610 """Group an iterable of `DatasetRef` by `DatasetType`.
612 Parameters
613 ----------
614 refs : `~collections.abc.Iterable` [ `DatasetRef` ]
615 `DatasetRef` instances to group.
617 Returns
618 -------
619 grouped : `NamedKeyDict` [ `DatasetType`, `list` [ `DatasetRef` ] ]
620 Grouped `DatasetRef` instances.
622 Notes
623 -----
624 When lazy item-iterables are acceptable instead of a full mapping,
625 `iter_by_type` can in some cases be far more efficient.
626 """
627 result: NamedKeyDict[DatasetType, list[DatasetRef]] = NamedKeyDict()
628 for ref in refs:
629 result.setdefault(ref.datasetType, []).append(ref)
630 return result
632 @staticmethod
633 def iter_by_type(
634 refs: Iterable[DatasetRef],
635 ) -> Iterable[tuple[DatasetType, Iterable[DatasetRef]]]:
636 """Group an iterable of `DatasetRef` by `DatasetType` with special
637 hooks for custom iterables that can do this efficiently.
639 Parameters
640 ----------
641 refs : `~collections.abc.Iterable` [ `DatasetRef` ]
642 `DatasetRef` instances to group. If this satisfies the
643 `_DatasetRefGroupedIterable` protocol, its
644 `~_DatasetRefGroupedIterable._iter_by_dataset_type` method will
645 be called.
647 Returns
648 -------
649 grouped : `~collections.abc.Iterable` [ `tuple` [ `DatasetType`, \
650 `Iterable` [ `DatasetRef` ] ]]
651 Grouped `DatasetRef` instances.
652 """
653 if isinstance(refs, _DatasetRefGroupedIterable):
654 return refs._iter_by_dataset_type()
655 return DatasetRef.groupByType(refs).items()
657 def makeCompositeRef(self) -> DatasetRef:
658 """Create a `DatasetRef` of the composite from a component ref.
660 Requires that this `DatasetRef` is a component.
662 Returns
663 -------
664 ref : `DatasetRef`
665 A `DatasetRef` with a dataset type that corresponds to the
666 composite parent of this component, and the same ID and run
667 (which may be `None`, if they are `None` in ``self``).
668 """
669 # Assume that the data ID does not need to be standardized
670 # and should match whatever this ref already has.
671 return DatasetRef(
672 self.datasetType.makeCompositeDatasetType(), self.dataId, id=self.id, run=self.run, conform=False
673 )
675 def makeComponentRef(self, name: str) -> DatasetRef:
676 """Create a `DatasetRef` that corresponds to a component.
678 Parameters
679 ----------
680 name : `str`
681 Name of the component.
683 Returns
684 -------
685 ref : `DatasetRef`
686 A `DatasetRef` with a dataset type that corresponds to the given
687 component, and the same ID and run
688 (which may be `None`, if they are `None` in ``self``).
689 """
690 # Assume that the data ID does not need to be standardized
691 # and should match whatever this ref already has.
692 return DatasetRef(
693 self.datasetType.makeComponentDatasetType(name),
694 self.dataId,
695 id=self.id,
696 run=self.run,
697 conform=False,
698 )
700 def overrideStorageClass(self, storageClass: str | StorageClass) -> DatasetRef:
701 """Create a new `DatasetRef` from this one, but with a modified
702 `DatasetType` that has a different `StorageClass`.
704 Parameters
705 ----------
706 storageClass : `str` or `StorageClass`
707 The new storage class.
709 Returns
710 -------
711 modified : `DatasetRef`
712 A new dataset reference that is the same as the current one but
713 with a different storage class in the `DatasetType`.
714 """
715 return self.replace(storage_class=storageClass)
717 def replace(
718 self,
719 *,
720 id: DatasetId | None = None,
721 run: str | None = None,
722 storage_class: str | StorageClass | None = None,
723 ) -> DatasetRef:
724 """Create a new `DatasetRef` from this one, but with some modified
725 attributes.
727 Parameters
728 ----------
729 id : `DatasetId` or `None`
730 If not `None` then update dataset ID.
731 run : `str` or `None`
732 If not `None` then update run collection name. If ``dataset_id`` is
733 `None` then this will also cause new dataset ID to be generated.
734 storage_class : `str` or `StorageClass` or `None`.
735 The new storage class. If not `None`, replaces existing storage
736 class.
738 Returns
739 -------
740 modified : `DatasetRef`
741 A new dataset reference with updated attributes.
742 """
743 if storage_class is None:
744 datasetType = self.datasetType
745 else:
746 datasetType = self.datasetType.overrideStorageClass(storage_class)
747 if run is None:
748 run = self.run
749 # Do not regenerate dataset ID if run is the same.
750 if id is None:
751 id = self.id
752 return DatasetRef(
753 datasetType=datasetType,
754 dataId=self.dataId,
755 run=run,
756 id=id,
757 conform=False,
758 )
760 def is_compatible_with(self, ref: DatasetRef) -> bool:
761 """Determine if the given `DatasetRef` is compatible with this one.
763 Parameters
764 ----------
765 other : `DatasetRef`
766 Dataset ref to check.
768 Returns
769 -------
770 is_compatible : `bool`
771 Returns `True` if the other dataset ref is either the same as this
772 or the dataset type associated with the other is compatible with
773 this one and the dataId and dataset ID match.
775 Notes
776 -----
777 Compatibility requires that the dataId and dataset ID match and the
778 `DatasetType` is compatible. Compatibility is defined as the storage
779 class associated with the dataset type of the other ref can be
780 converted to this storage class.
782 Specifically this means that if you have done:
784 .. code-block:: py
786 new_ref = ref.overrideStorageClass(sc)
788 and this is successful, then the guarantee is that:
790 .. code-block:: py
792 assert ref.is_compatible_with(new_ref) is True
794 since we know that the python type associated with the new ref can
795 be converted to the original python type. The reverse is not guaranteed
796 and depends on whether bidirectional converters have been registered.
797 """
798 if self.id != ref.id:
799 return False
800 if self.dataId != ref.dataId:
801 return False
802 if self.run != ref.run:
803 return False
804 return self.datasetType.is_compatible_with(ref.datasetType)
806 datasetType: DatasetType
807 """The definition of this dataset (`DatasetType`).
809 Cannot be changed after a `DatasetRef` is constructed.
810 """
812 dataId: DataCoordinate
813 """A mapping of `Dimension` primary key values that labels the dataset
814 within a Collection (`DataCoordinate`).
816 Cannot be changed after a `DatasetRef` is constructed.
817 """
819 run: str
820 """The name of the run that produced the dataset.
822 Cannot be changed after a `DatasetRef` is constructed.
823 """