Coverage for python/lsst/daf/butler/core/datasets/ref.py: 36%
214 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-21 09:55 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-21 09:55 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = [
24 "AmbiguousDatasetError",
25 "DatasetId",
26 "DatasetIdFactory",
27 "DatasetIdGenEnum",
28 "DatasetRef",
29 "SerializedDatasetRef",
30]
32import enum
33import sys
34import uuid
35from collections.abc import Iterable
36from typing import TYPE_CHECKING, Any, ClassVar, Protocol, TypeAlias, runtime_checkable
38from lsst.daf.butler._compat import _BaseModelCompat
39from lsst.utils.classes import immutable
40from pydantic import StrictStr, validator
42from ..configSupport import LookupKey
43from ..dimensions import DataCoordinate, DimensionGraph, DimensionUniverse, SerializedDataCoordinate
44from ..json import from_json_pydantic, to_json_pydantic
45from ..named import NamedKeyDict
46from ..persistenceContext import PersistenceContextVars
47from .type import DatasetType, SerializedDatasetType
49if TYPE_CHECKING:
50 from ...registry import Registry
51 from ..storageClass import StorageClass
54class AmbiguousDatasetError(Exception):
55 """Raised when a `DatasetRef` is not resolved but should be.
57 This happens when the `DatasetRef` has no ID or run but the requested
58 operation requires one of them.
59 """
62@runtime_checkable
63class _DatasetRefGroupedIterable(Protocol):
64 """A package-private interface for iterables of `DatasetRef` that know how
65 to efficiently group their contents by `DatasetType`.
67 """
69 def _iter_by_dataset_type(self) -> Iterable[tuple[DatasetType, Iterable[DatasetRef]]]:
70 """Iterate over `DatasetRef` instances, one `DatasetType` at a time.
72 Returns
73 -------
74 grouped : `~collections.abc.Iterable` [ `tuple` [ `DatasetType`, \
75 `~collections.abc.Iterable` [ `DatasetRef` ]
76 An iterable of tuples, in which the first element is a dataset type
77 and the second is an iterable of `DatasetRef` objects with exactly
78 that dataset type.
79 """
80 ...
83class DatasetIdGenEnum(enum.Enum):
84 """Enum used to specify dataset ID generation options."""
86 UNIQUE = 0
87 """Unique mode generates unique ID for each inserted dataset, e.g.
88 auto-generated by database or random UUID.
89 """
91 DATAID_TYPE = 1
92 """In this mode ID is computed deterministically from a combination of
93 dataset type and dataId.
94 """
96 DATAID_TYPE_RUN = 2
97 """In this mode ID is computed deterministically from a combination of
98 dataset type, dataId, and run collection name.
99 """
102class DatasetIdFactory:
103 """Factory for dataset IDs (UUIDs).
105 For now the logic is hard-coded and is controlled by the user-provided
106 value of `DatasetIdGenEnum`. In the future we may implement a configurable
107 logic that can guess `DatasetIdGenEnum` value from other parameters.
108 """
110 NS_UUID = uuid.UUID("840b31d9-05cd-5161-b2c8-00d32b280d0f")
111 """Namespace UUID used for UUID5 generation. Do not change. This was
112 produced by `uuid.uuid5(uuid.NAMESPACE_DNS, "lsst.org")`.
113 """
115 def makeDatasetId(
116 self,
117 run: str,
118 datasetType: DatasetType,
119 dataId: DataCoordinate,
120 idGenerationMode: DatasetIdGenEnum,
121 ) -> uuid.UUID:
122 """Generate dataset ID for a dataset.
124 Parameters
125 ----------
126 run : `str`
127 Name of the RUN collection for the dataset.
128 datasetType : `DatasetType`
129 Dataset type.
130 dataId : `DataCoordinate`
131 Expanded data ID for the dataset.
132 idGenerationMode : `DatasetIdGenEnum`
133 ID generation option. `~DatasetIdGenEnum.UNIQUE` makes a random
134 UUID4-type ID. `~DatasetIdGenEnum.DATAID_TYPE` makes a
135 deterministic UUID5-type ID based on a dataset type name and
136 ``dataId``. `~DatasetIdGenEnum.DATAID_TYPE_RUN` makes a
137 deterministic UUID5-type ID based on a dataset type name, run
138 collection name, and ``dataId``.
140 Returns
141 -------
142 datasetId : `uuid.UUID`
143 Dataset identifier.
144 """
145 if idGenerationMode is DatasetIdGenEnum.UNIQUE:
146 return uuid.uuid4()
147 else:
148 # WARNING: If you modify this code make sure that the order of
149 # items in the `items` list below never changes.
150 items: list[tuple[str, str]] = []
151 if idGenerationMode is DatasetIdGenEnum.DATAID_TYPE:
152 items = [
153 ("dataset_type", datasetType.name),
154 ]
155 elif idGenerationMode is DatasetIdGenEnum.DATAID_TYPE_RUN:
156 items = [
157 ("dataset_type", datasetType.name),
158 ("run", run),
159 ]
160 else:
161 raise ValueError(f"Unexpected ID generation mode: {idGenerationMode}")
163 for name, value in sorted(dataId.byName().items()):
164 items.append((name, str(value)))
165 data = ",".join(f"{key}={value}" for key, value in items)
166 return uuid.uuid5(self.NS_UUID, data)
169# This is constant, so don't recreate a set for each instance
170_serializedDatasetRefFieldsSet = {"id", "datasetType", "dataId", "run", "component"}
173class SerializedDatasetRef(_BaseModelCompat):
174 """Simplified model of a `DatasetRef` suitable for serialization."""
176 id: uuid.UUID
177 datasetType: SerializedDatasetType | None = None
178 dataId: SerializedDataCoordinate | None = None
179 run: StrictStr | None = None
180 component: StrictStr | None = None
182 @validator("dataId")
183 def _check_dataId(cls, v: Any, values: dict[str, Any]) -> Any: # noqa: N805
184 if (d := "datasetType") in values and values[d] is None:
185 raise ValueError("Can not specify 'dataId' without specifying 'datasetType'")
186 return v
188 @validator("run")
189 def _check_run(cls, v: Any, values: dict[str, Any]) -> Any: # noqa: N805
190 if v and (i := "id") in values and values[i] is None:
191 raise ValueError("'run' cannot be provided unless 'id' is.")
192 return v
194 @validator("component")
195 def _check_component(cls, v: Any, values: dict[str, Any]) -> Any: # noqa: N805
196 # Component should not be given if datasetType is given
197 if v and (d := "datasetType") in values and values[d] is not None:
198 raise ValueError(f"datasetType ({values[d]}) can not be set if component is given ({v}).")
199 return v
201 @classmethod
202 def direct(
203 cls,
204 *,
205 id: str,
206 run: str,
207 datasetType: dict[str, Any] | None = None,
208 dataId: dict[str, Any] | None = None,
209 component: str | None = None,
210 ) -> SerializedDatasetRef:
211 """Construct a `SerializedDatasetRef` directly without validators.
213 Notes
214 -----
215 This differs from the pydantic "construct" method in that the arguments
216 are explicitly what the model requires, and it will recurse through
217 members, constructing them from their corresponding `direct` methods.
219 The ``id`` parameter is a string representation of dataset ID, it is
220 converted to UUID by this method.
222 This method should only be called when the inputs are trusted.
223 """
224 serialized_datasetType = (
225 SerializedDatasetType.direct(**datasetType) if datasetType is not None else None
226 )
227 serialized_dataId = SerializedDataCoordinate.direct(**dataId) if dataId is not None else None
229 node = cls.model_construct(
230 _fields_set=_serializedDatasetRefFieldsSet,
231 id=uuid.UUID(id),
232 datasetType=serialized_datasetType,
233 dataId=serialized_dataId,
234 run=sys.intern(run),
235 component=component,
236 )
238 return node
241DatasetId: TypeAlias = uuid.UUID
242"""A type-annotation alias for dataset ID providing typing flexibility.
243"""
246@immutable
247class DatasetRef:
248 """Reference to a Dataset in a `Registry`.
250 A `DatasetRef` may point to a Dataset that currently does not yet exist
251 (e.g., because it is a predicted input for provenance).
253 Parameters
254 ----------
255 datasetType : `DatasetType`
256 The `DatasetType` for this Dataset.
257 dataId : `DataCoordinate`
258 A mapping of dimensions that labels the Dataset within a Collection.
259 run : `str`
260 The name of the run this dataset was associated with when it was
261 created.
262 id : `DatasetId`, optional
263 The unique identifier assigned when the dataset is created. If ``id``
264 is not specified, a new unique ID will be created.
265 conform : `bool`, optional
266 If `True` (default), call `DataCoordinate.standardize` to ensure that
267 the data ID's dimensions are consistent with the dataset type's.
268 `DatasetRef` instances for which those dimensions are not equal should
269 not be created in new code, but are still supported for backwards
270 compatibility. New code should only pass `False` if it can guarantee
271 that the dimensions are already consistent.
272 id_generation_mode : `DatasetIdGenEnum`
273 ID generation option. `~DatasetIdGenEnum.UNIQUE` makes a random
274 UUID4-type ID. `~DatasetIdGenEnum.DATAID_TYPE` makes a
275 deterministic UUID5-type ID based on a dataset type name and
276 ``dataId``. `~DatasetIdGenEnum.DATAID_TYPE_RUN` makes a
277 deterministic UUID5-type ID based on a dataset type name, run
278 collection name, and ``dataId``.
280 See Also
281 --------
282 :ref:`daf_butler_organizing_datasets`
283 """
285 _serializedType = SerializedDatasetRef
286 __slots__ = (
287 "_id",
288 "datasetType",
289 "dataId",
290 "run",
291 )
293 def __init__(
294 self,
295 datasetType: DatasetType,
296 dataId: DataCoordinate,
297 run: str,
298 *,
299 id: DatasetId | None = None,
300 conform: bool = True,
301 id_generation_mode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE,
302 ):
303 self.datasetType = datasetType
304 if conform:
305 self.dataId = DataCoordinate.standardize(dataId, graph=datasetType.dimensions)
306 else:
307 self.dataId = dataId
308 self.run = run
309 if id is not None:
310 self._id = id.int
311 else:
312 self._id = (
313 DatasetIdFactory()
314 .makeDatasetId(self.run, self.datasetType, self.dataId, id_generation_mode)
315 .int
316 )
318 @property
319 def id(self) -> DatasetId:
320 """Primary key of the dataset (`DatasetId`).
322 Cannot be changed after a `DatasetRef` is constructed.
323 """
324 return uuid.UUID(int=self._id)
326 def __eq__(self, other: Any) -> bool:
327 try:
328 return (self.datasetType, self.dataId, self.id) == (other.datasetType, other.dataId, other.id)
329 except AttributeError:
330 return NotImplemented
332 def __hash__(self) -> int:
333 return hash((self.datasetType, self.dataId, self.id))
335 @property
336 def dimensions(self) -> DimensionGraph:
337 """Dimensions associated with the underlying `DatasetType`."""
338 return self.datasetType.dimensions
340 def __repr__(self) -> str:
341 # We delegate to __str__ (i.e use "!s") for the data ID) below because
342 # DataCoordinate's __repr__ - while adhering to the guidelines for
343 # __repr__ - is much harder to users to read, while its __str__ just
344 # produces a dict that can also be passed to DatasetRef's constructor.
345 return f"DatasetRef({self.datasetType!r}, {self.dataId!s}, run={self.run!r}, id={self.id})"
347 def __str__(self) -> str:
348 s = (
349 f"{self.datasetType.name}@{self.dataId!s} [sc={self.datasetType.storageClass_name}]"
350 f" (run={self.run} id={self.id})"
351 )
352 return s
354 def __lt__(self, other: Any) -> bool:
355 # Sort by run, DatasetType name and then by DataCoordinate
356 # The __str__ representation is probably close enough but we
357 # need to ensure that sorting a DatasetRef matches what you would
358 # get if you sorted DatasetType+DataCoordinate
359 if not isinstance(other, type(self)):
360 return NotImplemented
362 # Group by run if defined, takes precedence over DatasetType
363 self_run = "" if self.run is None else self.run
364 other_run = "" if other.run is None else other.run
366 # Compare tuples in the priority order
367 return (self_run, self.datasetType, self.dataId) < (other_run, other.datasetType, other.dataId)
369 def to_simple(self, minimal: bool = False) -> SerializedDatasetRef:
370 """Convert this class to a simple python type.
372 This makes it suitable for serialization.
374 Parameters
375 ----------
376 minimal : `bool`, optional
377 Use minimal serialization. Requires Registry to convert
378 back to a full type.
380 Returns
381 -------
382 simple : `dict` or `int`
383 The object converted to a dictionary.
384 """
385 if minimal:
386 # The only thing needed to uniquely define a DatasetRef is its id
387 # so that can be used directly if it is not a component DatasetRef.
388 # Store is in a dict to allow us to easily add the planned origin
389 # information later without having to support an int and dict in
390 # simple form.
391 simple: dict[str, Any] = {"id": self.id}
392 if self.isComponent():
393 # We can still be a little minimalist with a component
394 # but we will also need to record the datasetType component
395 simple["component"] = self.datasetType.component()
396 return SerializedDatasetRef(**simple)
398 return SerializedDatasetRef(
399 datasetType=self.datasetType.to_simple(minimal=minimal),
400 dataId=self.dataId.to_simple(),
401 run=self.run,
402 id=self.id,
403 )
405 @classmethod
406 def from_simple(
407 cls,
408 simple: SerializedDatasetRef,
409 universe: DimensionUniverse | None = None,
410 registry: Registry | None = None,
411 datasetType: DatasetType | None = None,
412 ) -> DatasetRef:
413 """Construct a new object from simplified form.
415 Generally this is data returned from the `to_simple` method.
417 Parameters
418 ----------
419 simple : `dict` of [`str`, `Any`]
420 The value returned by `to_simple()`.
421 universe : `DimensionUniverse`
422 The special graph of all known dimensions.
423 Can be `None` if a registry is provided.
424 registry : `lsst.daf.butler.Registry`, optional
425 Registry to use to convert simple form of a DatasetRef to
426 a full `DatasetRef`. Can be `None` if a full description of
427 the type is provided along with a universe.
428 datasetType : DatasetType, optional
429 If datasetType is supplied, this will be used as the datasetType
430 object in the resulting DatasetRef instead of being read from
431 the `SerializedDatasetRef`. This is useful when many refs share
432 the same type as memory can be saved. Defaults to None.
434 Returns
435 -------
436 ref : `DatasetRef`
437 Newly-constructed object.
438 """
439 cache = PersistenceContextVars.datasetRefs.get()
440 localName = sys.intern(
441 datasetType.name
442 if datasetType is not None
443 else (x.name if (x := simple.datasetType) is not None else "")
444 )
445 key = (simple.id.int, localName)
446 if cache is not None and (cachedRef := cache.get(key, None)) is not None:
447 return cachedRef
448 # Minimalist component will just specify component and id and
449 # require registry to reconstruct
450 if not (simple.datasetType is not None or simple.dataId is not None or simple.run is not None):
451 if registry is None:
452 raise ValueError("Registry is required to construct component DatasetRef from integer id")
453 if simple.id is None:
454 raise ValueError("For minimal DatasetRef the ID must be defined.")
455 ref = registry.getDataset(simple.id)
456 if ref is None:
457 raise RuntimeError(f"No matching dataset found in registry for id {simple.id}")
458 if simple.component:
459 ref = ref.makeComponentRef(simple.component)
460 if cache is not None:
461 cache[key] = ref
462 return ref
464 if universe is None and registry is None:
465 raise ValueError("One of universe or registry must be provided.")
467 if universe is None and registry is not None:
468 universe = registry.dimensions
470 if universe is None:
471 # this is for mypy
472 raise ValueError("Unable to determine a usable universe")
474 if simple.datasetType is None and datasetType is None:
475 # mypy
476 raise ValueError("The DatasetType must be specified to construct a DatasetRef")
477 if datasetType is None:
478 if simple.datasetType is None:
479 raise ValueError("Cannot determine Dataset type of this serialized class")
480 datasetType = DatasetType.from_simple(simple.datasetType, universe=universe, registry=registry)
482 if simple.dataId is None:
483 # mypy
484 raise ValueError("The DataId must be specified to construct a DatasetRef")
485 dataId = DataCoordinate.from_simple(simple.dataId, universe=universe)
487 # Check that simple ref is resolved.
488 if simple.run is None:
489 dstr = ""
490 if simple.datasetType is None:
491 dstr = f" (datasetType={datasetType.name!r})"
492 raise ValueError(
493 "Run collection name is missing from serialized representation. "
494 f"Encountered with {simple!r}{dstr}."
495 )
497 newRef = cls(datasetType, dataId, id=simple.id, run=simple.run)
498 if cache is not None:
499 cache[key] = newRef
500 return newRef
502 to_json = to_json_pydantic
503 from_json: ClassVar = classmethod(from_json_pydantic)
505 @classmethod
506 def _unpickle(
507 cls,
508 datasetType: DatasetType,
509 dataId: DataCoordinate,
510 id: DatasetId,
511 run: str,
512 ) -> DatasetRef:
513 """Create new `DatasetRef`.
515 A custom factory method for use by `__reduce__` as a workaround for
516 its lack of support for keyword arguments.
517 """
518 return cls(datasetType, dataId, id=id, run=run)
520 def __reduce__(self) -> tuple:
521 return (self._unpickle, (self.datasetType, self.dataId, self.id, self.run))
523 def __deepcopy__(self, memo: dict) -> DatasetRef:
524 # DatasetRef is recursively immutable; see note in @immutable
525 # decorator.
526 return self
528 def expanded(self, dataId: DataCoordinate) -> DatasetRef:
529 """Return a new `DatasetRef` with the given expanded data ID.
531 Parameters
532 ----------
533 dataId : `DataCoordinate`
534 Data ID for the new `DatasetRef`. Must compare equal to the
535 original data ID.
537 Returns
538 -------
539 ref : `DatasetRef`
540 A new `DatasetRef` with the given data ID.
541 """
542 assert dataId == self.dataId
543 return DatasetRef(
544 datasetType=self.datasetType, dataId=dataId, id=self.id, run=self.run, conform=False
545 )
547 def isComponent(self) -> bool:
548 """Indicate whether this `DatasetRef` refers to a component.
550 Returns
551 -------
552 isComponent : `bool`
553 `True` if this `DatasetRef` is a component, `False` otherwise.
554 """
555 return self.datasetType.isComponent()
557 def isComposite(self) -> bool:
558 """Boolean indicating whether this `DatasetRef` is a composite type.
560 Returns
561 -------
562 isComposite : `bool`
563 `True` if this `DatasetRef` is a composite type, `False`
564 otherwise.
565 """
566 return self.datasetType.isComposite()
568 def _lookupNames(self) -> tuple[LookupKey, ...]:
569 """Name keys to use when looking up this DatasetRef in a configuration.
571 The names are returned in order of priority.
573 Returns
574 -------
575 names : `tuple` of `LookupKey`
576 Tuple of the `DatasetType` name and the `StorageClass` name.
577 If ``instrument`` is defined in the dataId, each of those names
578 is added to the start of the tuple with a key derived from the
579 value of ``instrument``.
580 """
581 # Special case the instrument Dimension since we allow configs
582 # to include the instrument name in the hierarchy.
583 names: tuple[LookupKey, ...] = self.datasetType._lookupNames()
585 if "instrument" in self.dataId:
586 names = tuple(n.clone(dataId={"instrument": self.dataId["instrument"]}) for n in names) + names
588 return names
590 @staticmethod
591 def groupByType(refs: Iterable[DatasetRef]) -> NamedKeyDict[DatasetType, list[DatasetRef]]:
592 """Group an iterable of `DatasetRef` by `DatasetType`.
594 Parameters
595 ----------
596 refs : `~collections.abc.Iterable` [ `DatasetRef` ]
597 `DatasetRef` instances to group.
599 Returns
600 -------
601 grouped : `NamedKeyDict` [ `DatasetType`, `list` [ `DatasetRef` ] ]
602 Grouped `DatasetRef` instances.
604 Notes
605 -----
606 When lazy item-iterables are acceptable instead of a full mapping,
607 `iter_by_type` can in some cases be far more efficient.
608 """
609 result: NamedKeyDict[DatasetType, list[DatasetRef]] = NamedKeyDict()
610 for ref in refs:
611 result.setdefault(ref.datasetType, []).append(ref)
612 return result
614 @staticmethod
615 def iter_by_type(
616 refs: Iterable[DatasetRef],
617 ) -> Iterable[tuple[DatasetType, Iterable[DatasetRef]]]:
618 """Group an iterable of `DatasetRef` by `DatasetType` with special
619 hooks for custom iterables that can do this efficiently.
621 Parameters
622 ----------
623 refs : `~collections.abc.Iterable` [ `DatasetRef` ]
624 `DatasetRef` instances to group. If this satisfies the
625 `_DatasetRefGroupedIterable` protocol, its
626 `~_DatasetRefGroupedIterable._iter_by_dataset_type` method will
627 be called.
629 Returns
630 -------
631 grouped : `~collections.abc.Iterable` [ `tuple` [ `DatasetType`, \
632 `Iterable` [ `DatasetRef` ] ]]
633 Grouped `DatasetRef` instances.
634 """
635 if isinstance(refs, _DatasetRefGroupedIterable):
636 return refs._iter_by_dataset_type()
637 return DatasetRef.groupByType(refs).items()
639 def makeCompositeRef(self) -> DatasetRef:
640 """Create a `DatasetRef` of the composite from a component ref.
642 Requires that this `DatasetRef` is a component.
644 Returns
645 -------
646 ref : `DatasetRef`
647 A `DatasetRef` with a dataset type that corresponds to the
648 composite parent of this component, and the same ID and run
649 (which may be `None`, if they are `None` in ``self``).
650 """
651 # Assume that the data ID does not need to be standardized
652 # and should match whatever this ref already has.
653 return DatasetRef(
654 self.datasetType.makeCompositeDatasetType(), self.dataId, id=self.id, run=self.run, conform=False
655 )
657 def makeComponentRef(self, name: str) -> DatasetRef:
658 """Create a `DatasetRef` that corresponds to a component.
660 Parameters
661 ----------
662 name : `str`
663 Name of the component.
665 Returns
666 -------
667 ref : `DatasetRef`
668 A `DatasetRef` with a dataset type that corresponds to the given
669 component, and the same ID and run
670 (which may be `None`, if they are `None` in ``self``).
671 """
672 # Assume that the data ID does not need to be standardized
673 # and should match whatever this ref already has.
674 return DatasetRef(
675 self.datasetType.makeComponentDatasetType(name),
676 self.dataId,
677 id=self.id,
678 run=self.run,
679 conform=False,
680 )
682 def overrideStorageClass(self, storageClass: str | StorageClass) -> DatasetRef:
683 """Create a new `DatasetRef` from this one, but with a modified
684 `DatasetType` that has a different `StorageClass`.
686 Parameters
687 ----------
688 storageClass : `str` or `StorageClass`
689 The new storage class.
691 Returns
692 -------
693 modified : `DatasetRef`
694 A new dataset reference that is the same as the current one but
695 with a different storage class in the `DatasetType`.
696 """
697 return DatasetRef(
698 datasetType=self.datasetType.overrideStorageClass(storageClass),
699 dataId=self.dataId,
700 id=self.id,
701 run=self.run,
702 conform=False,
703 )
705 def is_compatible_with(self, ref: DatasetRef) -> bool:
706 """Determine if the given `DatasetRef` is compatible with this one.
708 Parameters
709 ----------
710 other : `DatasetRef`
711 Dataset ref to check.
713 Returns
714 -------
715 is_compatible : `bool`
716 Returns `True` if the other dataset ref is either the same as this
717 or the dataset type associated with the other is compatible with
718 this one and the dataId and dataset ID match.
720 Notes
721 -----
722 Compatibility requires that the dataId and dataset ID match and the
723 `DatasetType` is compatible. Compatibility is defined as the storage
724 class associated with the dataset type of the other ref can be
725 converted to this storage class.
727 Specifically this means that if you have done:
729 .. code-block:: py
731 new_ref = ref.overrideStorageClass(sc)
733 and this is successful, then the guarantee is that:
735 .. code-block:: py
737 assert ref.is_compatible_with(new_ref) is True
739 since we know that the python type associated with the new ref can
740 be converted to the original python type. The reverse is not guaranteed
741 and depends on whether bidirectional converters have been registered.
742 """
743 if self.id != ref.id:
744 return False
745 if self.dataId != ref.dataId:
746 return False
747 if self.run != ref.run:
748 return False
749 return self.datasetType.is_compatible_with(ref.datasetType)
751 datasetType: DatasetType
752 """The definition of this dataset (`DatasetType`).
754 Cannot be changed after a `DatasetRef` is constructed.
755 """
757 dataId: DataCoordinate
758 """A mapping of `Dimension` primary key values that labels the dataset
759 within a Collection (`DataCoordinate`).
761 Cannot be changed after a `DatasetRef` is constructed.
762 """
764 run: str
765 """The name of the run that produced the dataset.
767 Cannot be changed after a `DatasetRef` is constructed.
768 """