Coverage for python/lsst/daf/butler/core/datasets/ref.py: 36%
221 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-15 07:56 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-15 07:56 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = [
24 "AmbiguousDatasetError",
25 "DatasetId",
26 "DatasetIdFactory",
27 "DatasetIdGenEnum",
28 "DatasetRef",
29 "SerializedDatasetRef",
30]
32import enum
33import sys
34import uuid
35from collections.abc import Iterable
36from typing import TYPE_CHECKING, Any, ClassVar, Protocol, runtime_checkable
38from lsst.utils.classes import immutable
40try:
41 from pydantic.v1 import BaseModel, StrictStr, validator
42except ModuleNotFoundError:
43 from pydantic import BaseModel, StrictStr, validator # type: ignore
45from ..configSupport import LookupKey
46from ..dimensions import DataCoordinate, DimensionGraph, DimensionUniverse, SerializedDataCoordinate
47from ..json import from_json_pydantic, to_json_pydantic
48from ..named import NamedKeyDict
49from ..persistenceContext import PersistenceContextVars
50from .type import DatasetType, SerializedDatasetType
52if TYPE_CHECKING:
53 from ...registry import Registry
54 from ..storageClass import StorageClass
57class AmbiguousDatasetError(Exception):
58 """Raised when a `DatasetRef` is not resolved but should be.
60 This happens when the `DatasetRef` has no ID or run but the requested
61 operation requires one of them.
62 """
65@runtime_checkable
66class _DatasetRefGroupedIterable(Protocol):
67 """A package-private interface for iterables of `DatasetRef` that know how
68 to efficiently group their contents by `DatasetType`.
70 """
72 def _iter_by_dataset_type(self) -> Iterable[tuple[DatasetType, Iterable[DatasetRef]]]:
73 """Iterate over `DatasetRef` instances, one `DatasetType` at a time.
75 Returns
76 -------
77 grouped : `~collections.abc.Iterable` [ `tuple` [ `DatasetType`, \
78 `~collections.abc.Iterable` [ `DatasetRef` ]
79 An iterable of tuples, in which the first element is a dataset type
80 and the second is an iterable of `DatasetRef` objects with exactly
81 that dataset type.
82 """
83 ...
86class DatasetIdGenEnum(enum.Enum):
87 """Enum used to specify dataset ID generation options."""
89 UNIQUE = 0
90 """Unique mode generates unique ID for each inserted dataset, e.g.
91 auto-generated by database or random UUID.
92 """
94 DATAID_TYPE = 1
95 """In this mode ID is computed deterministically from a combination of
96 dataset type and dataId.
97 """
99 DATAID_TYPE_RUN = 2
100 """In this mode ID is computed deterministically from a combination of
101 dataset type, dataId, and run collection name.
102 """
105class DatasetIdFactory:
106 """Factory for dataset IDs (UUIDs).
108 For now the logic is hard-coded and is controlled by the user-provided
109 value of `DatasetIdGenEnum`. In the future we may implement a configurable
110 logic that can guess `DatasetIdGenEnum` value from other parameters.
111 """
113 NS_UUID = uuid.UUID("840b31d9-05cd-5161-b2c8-00d32b280d0f")
114 """Namespace UUID used for UUID5 generation. Do not change. This was
115 produced by `uuid.uuid5(uuid.NAMESPACE_DNS, "lsst.org")`.
116 """
118 def makeDatasetId(
119 self,
120 run: str,
121 datasetType: DatasetType,
122 dataId: DataCoordinate,
123 idGenerationMode: DatasetIdGenEnum,
124 ) -> uuid.UUID:
125 """Generate dataset ID for a dataset.
127 Parameters
128 ----------
129 run : `str`
130 Name of the RUN collection for the dataset.
131 datasetType : `DatasetType`
132 Dataset type.
133 dataId : `DataCoordinate`
134 Expanded data ID for the dataset.
135 idGenerationMode : `DatasetIdGenEnum`
136 ID generation option. `~DatasetIdGenEnum.UNIQUE` makes a random
137 UUID4-type ID. `~DatasetIdGenEnum.DATAID_TYPE` makes a
138 deterministic UUID5-type ID based on a dataset type name and
139 ``dataId``. `~DatasetIdGenEnum.DATAID_TYPE_RUN` makes a
140 deterministic UUID5-type ID based on a dataset type name, run
141 collection name, and ``dataId``.
143 Returns
144 -------
145 datasetId : `uuid.UUID`
146 Dataset identifier.
147 """
148 if idGenerationMode is DatasetIdGenEnum.UNIQUE:
149 return uuid.uuid4()
150 else:
151 # WARNING: If you modify this code make sure that the order of
152 # items in the `items` list below never changes.
153 items: list[tuple[str, str]] = []
154 if idGenerationMode is DatasetIdGenEnum.DATAID_TYPE:
155 items = [
156 ("dataset_type", datasetType.name),
157 ]
158 elif idGenerationMode is DatasetIdGenEnum.DATAID_TYPE_RUN:
159 items = [
160 ("dataset_type", datasetType.name),
161 ("run", run),
162 ]
163 else:
164 raise ValueError(f"Unexpected ID generation mode: {idGenerationMode}")
166 for name, value in sorted(dataId.byName().items()):
167 items.append((name, str(value)))
168 data = ",".join(f"{key}={value}" for key, value in items)
169 return uuid.uuid5(self.NS_UUID, data)
172# This is constant, so don't recreate a set for each instance
173_serializedDatasetRefFieldsSet = {"id", "datasetType", "dataId", "run", "component"}
176class SerializedDatasetRef(BaseModel):
177 """Simplified model of a `DatasetRef` suitable for serialization."""
179 id: uuid.UUID
180 datasetType: SerializedDatasetType | None = None
181 dataId: SerializedDataCoordinate | None = None
182 run: StrictStr | None = None
183 component: StrictStr | None = None
185 @validator("dataId")
186 def _check_dataId(cls, v: Any, values: dict[str, Any]) -> Any: # noqa: N805
187 if (d := "datasetType") in values and values[d] is None:
188 raise ValueError("Can not specify 'dataId' without specifying 'datasetType'")
189 return v
191 @validator("run")
192 def _check_run(cls, v: Any, values: dict[str, Any]) -> Any: # noqa: N805
193 if v and (i := "id") in values and values[i] is None:
194 raise ValueError("'run' cannot be provided unless 'id' is.")
195 return v
197 @validator("component")
198 def _check_component(cls, v: Any, values: dict[str, Any]) -> Any: # noqa: N805
199 # Component should not be given if datasetType is given
200 if v and (d := "datasetType") in values and values[d] is not None:
201 raise ValueError(f"datasetType ({values[d]}) can not be set if component is given ({v}).")
202 return v
204 @classmethod
205 def direct(
206 cls,
207 *,
208 id: str,
209 run: str,
210 datasetType: dict[str, Any] | None = None,
211 dataId: dict[str, Any] | None = None,
212 component: str | None = None,
213 ) -> SerializedDatasetRef:
214 """Construct a `SerializedDatasetRef` directly without validators.
216 Notes
217 -----
218 This differs from the pydantic "construct" method in that the arguments
219 are explicitly what the model requires, and it will recurse through
220 members, constructing them from their corresponding `direct` methods.
222 The ``id`` parameter is a string representation of dataset ID, it is
223 converted to UUID by this method.
225 This method should only be called when the inputs are trusted.
226 """
227 node = SerializedDatasetRef.__new__(cls)
228 setter = object.__setattr__
229 setter(node, "id", uuid.UUID(id))
230 setter(
231 node,
232 "datasetType",
233 datasetType if datasetType is None else SerializedDatasetType.direct(**datasetType),
234 )
235 setter(node, "dataId", dataId if dataId is None else SerializedDataCoordinate.direct(**dataId))
236 setter(node, "run", sys.intern(run))
237 setter(node, "component", component)
238 setter(node, "__fields_set__", _serializedDatasetRefFieldsSet)
239 return node
242DatasetId = uuid.UUID
243"""A type-annotation alias for dataset ID providing typing flexibility.
244"""
247@immutable
248class DatasetRef:
249 """Reference to a Dataset in a `Registry`.
251 A `DatasetRef` may point to a Dataset that currently does not yet exist
252 (e.g., because it is a predicted input for provenance).
254 Parameters
255 ----------
256 datasetType : `DatasetType`
257 The `DatasetType` for this Dataset.
258 dataId : `DataCoordinate`
259 A mapping of dimensions that labels the Dataset within a Collection.
260 run : `str`
261 The name of the run this dataset was associated with when it was
262 created.
263 id : `DatasetId`, optional
264 The unique identifier assigned when the dataset is created. If ``id``
265 is not specified, a new unique ID will be created.
266 conform : `bool`, optional
267 If `True` (default), call `DataCoordinate.standardize` to ensure that
268 the data ID's dimensions are consistent with the dataset type's.
269 `DatasetRef` instances for which those dimensions are not equal should
270 not be created in new code, but are still supported for backwards
271 compatibility. New code should only pass `False` if it can guarantee
272 that the dimensions are already consistent.
273 id_generation_mode : `DatasetIdGenEnum`
274 ID generation option. `~DatasetIdGenEnum.UNIQUE` makes a random
275 UUID4-type ID. `~DatasetIdGenEnum.DATAID_TYPE` makes a
276 deterministic UUID5-type ID based on a dataset type name and
277 ``dataId``. `~DatasetIdGenEnum.DATAID_TYPE_RUN` makes a
278 deterministic UUID5-type ID based on a dataset type name, run
279 collection name, and ``dataId``.
281 See Also
282 --------
283 :ref:`daf_butler_organizing_datasets`
284 """
286 _serializedType = SerializedDatasetRef
287 __slots__ = (
288 "_id",
289 "datasetType",
290 "dataId",
291 "run",
292 )
294 def __init__(
295 self,
296 datasetType: DatasetType,
297 dataId: DataCoordinate,
298 run: str,
299 *,
300 id: DatasetId | None = None,
301 conform: bool = True,
302 id_generation_mode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE,
303 ):
304 self.datasetType = datasetType
305 if conform:
306 self.dataId = DataCoordinate.standardize(dataId, graph=datasetType.dimensions)
307 else:
308 self.dataId = dataId
309 self.run = run
310 if id is not None:
311 self._id = id.int
312 else:
313 self._id = (
314 DatasetIdFactory()
315 .makeDatasetId(self.run, self.datasetType, self.dataId, id_generation_mode)
316 .int
317 )
319 @property
320 def id(self) -> DatasetId:
321 """Primary key of the dataset (`DatasetId`).
323 Cannot be changed after a `DatasetRef` is constructed.
324 """
325 return uuid.UUID(int=self._id)
327 def __eq__(self, other: Any) -> bool:
328 try:
329 return (self.datasetType, self.dataId, self.id) == (other.datasetType, other.dataId, other.id)
330 except AttributeError:
331 return NotImplemented
333 def __hash__(self) -> int:
334 return hash((self.datasetType, self.dataId, self.id))
336 @property
337 def dimensions(self) -> DimensionGraph:
338 """Dimensions associated with the underlying `DatasetType`."""
339 return self.datasetType.dimensions
341 def __repr__(self) -> str:
342 # We delegate to __str__ (i.e use "!s") for the data ID) below because
343 # DataCoordinate's __repr__ - while adhering to the guidelines for
344 # __repr__ - is much harder to users to read, while its __str__ just
345 # produces a dict that can also be passed to DatasetRef's constructor.
346 return f"DatasetRef({self.datasetType!r}, {self.dataId!s}, run={self.run!r}, id={self.id})"
348 def __str__(self) -> str:
349 s = (
350 f"{self.datasetType.name}@{self.dataId!s} [sc={self.datasetType.storageClass_name}]"
351 f" (run={self.run} id={self.id})"
352 )
353 return s
355 def __lt__(self, other: Any) -> bool:
356 # Sort by run, DatasetType name and then by DataCoordinate
357 # The __str__ representation is probably close enough but we
358 # need to ensure that sorting a DatasetRef matches what you would
359 # get if you sorted DatasetType+DataCoordinate
360 if not isinstance(other, type(self)):
361 return NotImplemented
363 # Group by run if defined, takes precedence over DatasetType
364 self_run = "" if self.run is None else self.run
365 other_run = "" if other.run is None else other.run
367 # Compare tuples in the priority order
368 return (self_run, self.datasetType, self.dataId) < (other_run, other.datasetType, other.dataId)
370 def to_simple(self, minimal: bool = False) -> SerializedDatasetRef:
371 """Convert this class to a simple python type.
373 This makes it suitable for serialization.
375 Parameters
376 ----------
377 minimal : `bool`, optional
378 Use minimal serialization. Requires Registry to convert
379 back to a full type.
381 Returns
382 -------
383 simple : `dict` or `int`
384 The object converted to a dictionary.
385 """
386 if minimal:
387 # The only thing needed to uniquely define a DatasetRef is its id
388 # so that can be used directly if it is not a component DatasetRef.
389 # Store is in a dict to allow us to easily add the planned origin
390 # information later without having to support an int and dict in
391 # simple form.
392 simple: dict[str, Any] = {"id": self.id}
393 if self.isComponent():
394 # We can still be a little minimalist with a component
395 # but we will also need to record the datasetType component
396 simple["component"] = self.datasetType.component()
397 return SerializedDatasetRef(**simple)
399 return SerializedDatasetRef(
400 datasetType=self.datasetType.to_simple(minimal=minimal),
401 dataId=self.dataId.to_simple(),
402 run=self.run,
403 id=self.id,
404 )
406 @classmethod
407 def from_simple(
408 cls,
409 simple: SerializedDatasetRef,
410 universe: DimensionUniverse | None = None,
411 registry: Registry | None = None,
412 datasetType: DatasetType | None = None,
413 ) -> DatasetRef:
414 """Construct a new object from simplified form.
416 Generally this is data returned from the `to_simple` method.
418 Parameters
419 ----------
420 simple : `dict` of [`str`, `Any`]
421 The value returned by `to_simple()`.
422 universe : `DimensionUniverse`
423 The special graph of all known dimensions.
424 Can be `None` if a registry is provided.
425 registry : `lsst.daf.butler.Registry`, optional
426 Registry to use to convert simple form of a DatasetRef to
427 a full `DatasetRef`. Can be `None` if a full description of
428 the type is provided along with a universe.
429 datasetType : DatasetType, optional
430 If datasetType is supplied, this will be used as the datasetType
431 object in the resulting DatasetRef instead of being read from
432 the `SerializedDatasetRef`. This is useful when many refs share
433 the same type as memory can be saved. Defaults to None.
435 Returns
436 -------
437 ref : `DatasetRef`
438 Newly-constructed object.
439 """
440 cache = PersistenceContextVars.datasetRefs.get()
441 localName = sys.intern(
442 datasetType.name
443 if datasetType is not None
444 else (x.name if (x := simple.datasetType) is not None else "")
445 )
446 key = (simple.id.int, localName)
447 if cache is not None and (cachedRef := cache.get(key, None)) is not None:
448 return cachedRef
449 # Minimalist component will just specify component and id and
450 # require registry to reconstruct
451 if not (simple.datasetType is not None or simple.dataId is not None or simple.run is not None):
452 if registry is None:
453 raise ValueError("Registry is required to construct component DatasetRef from integer id")
454 if simple.id is None:
455 raise ValueError("For minimal DatasetRef the ID must be defined.")
456 ref = registry.getDataset(simple.id)
457 if ref is None:
458 raise RuntimeError(f"No matching dataset found in registry for id {simple.id}")
459 if simple.component:
460 ref = ref.makeComponentRef(simple.component)
461 if cache is not None:
462 cache[key] = ref
463 return ref
465 if universe is None and registry is None:
466 raise ValueError("One of universe or registry must be provided.")
468 if universe is None and registry is not None:
469 universe = registry.dimensions
471 if universe is None:
472 # this is for mypy
473 raise ValueError("Unable to determine a usable universe")
475 if simple.datasetType is None and datasetType is None:
476 # mypy
477 raise ValueError("The DatasetType must be specified to construct a DatasetRef")
478 if datasetType is None:
479 if simple.datasetType is None:
480 raise ValueError("Cannot determine Dataset type of this serialized class")
481 datasetType = DatasetType.from_simple(simple.datasetType, universe=universe, registry=registry)
483 if simple.dataId is None:
484 # mypy
485 raise ValueError("The DataId must be specified to construct a DatasetRef")
486 dataId = DataCoordinate.from_simple(simple.dataId, universe=universe)
488 # Check that simple ref is resolved.
489 if simple.run is None:
490 dstr = ""
491 if simple.datasetType is None:
492 dstr = f" (datasetType={datasetType.name!r})"
493 raise ValueError(
494 "Run collection name is missing from serialized representation. "
495 f"Encountered with {simple!r}{dstr}."
496 )
498 newRef = cls(datasetType, dataId, id=simple.id, run=simple.run)
499 if cache is not None:
500 cache[key] = newRef
501 return newRef
503 to_json = to_json_pydantic
504 from_json: ClassVar = classmethod(from_json_pydantic)
506 @classmethod
507 def _unpickle(
508 cls,
509 datasetType: DatasetType,
510 dataId: DataCoordinate,
511 id: DatasetId,
512 run: str,
513 ) -> DatasetRef:
514 """Create new `DatasetRef`.
516 A custom factory method for use by `__reduce__` as a workaround for
517 its lack of support for keyword arguments.
518 """
519 return cls(datasetType, dataId, id=id, run=run)
521 def __reduce__(self) -> tuple:
522 return (self._unpickle, (self.datasetType, self.dataId, self.id, self.run))
524 def __deepcopy__(self, memo: dict) -> DatasetRef:
525 # DatasetRef is recursively immutable; see note in @immutable
526 # decorator.
527 return self
529 def expanded(self, dataId: DataCoordinate) -> DatasetRef:
530 """Return a new `DatasetRef` with the given expanded data ID.
532 Parameters
533 ----------
534 dataId : `DataCoordinate`
535 Data ID for the new `DatasetRef`. Must compare equal to the
536 original data ID.
538 Returns
539 -------
540 ref : `DatasetRef`
541 A new `DatasetRef` with the given data ID.
542 """
543 assert dataId == self.dataId
544 return DatasetRef(
545 datasetType=self.datasetType, dataId=dataId, id=self.id, run=self.run, conform=False
546 )
548 def isComponent(self) -> bool:
549 """Indicate whether this `DatasetRef` refers to a component.
551 Returns
552 -------
553 isComponent : `bool`
554 `True` if this `DatasetRef` is a component, `False` otherwise.
555 """
556 return self.datasetType.isComponent()
558 def isComposite(self) -> bool:
559 """Boolean indicating whether this `DatasetRef` is a composite type.
561 Returns
562 -------
563 isComposite : `bool`
564 `True` if this `DatasetRef` is a composite type, `False`
565 otherwise.
566 """
567 return self.datasetType.isComposite()
569 def _lookupNames(self) -> tuple[LookupKey, ...]:
570 """Name keys to use when looking up this DatasetRef in a configuration.
572 The names are returned in order of priority.
574 Returns
575 -------
576 names : `tuple` of `LookupKey`
577 Tuple of the `DatasetType` name and the `StorageClass` name.
578 If ``instrument`` is defined in the dataId, each of those names
579 is added to the start of the tuple with a key derived from the
580 value of ``instrument``.
581 """
582 # Special case the instrument Dimension since we allow configs
583 # to include the instrument name in the hierarchy.
584 names: tuple[LookupKey, ...] = self.datasetType._lookupNames()
586 if "instrument" in self.dataId:
587 names = tuple(n.clone(dataId={"instrument": self.dataId["instrument"]}) for n in names) + names
589 return names
591 @staticmethod
592 def groupByType(refs: Iterable[DatasetRef]) -> NamedKeyDict[DatasetType, list[DatasetRef]]:
593 """Group an iterable of `DatasetRef` by `DatasetType`.
595 Parameters
596 ----------
597 refs : `~collections.abc.Iterable` [ `DatasetRef` ]
598 `DatasetRef` instances to group.
600 Returns
601 -------
602 grouped : `NamedKeyDict` [ `DatasetType`, `list` [ `DatasetRef` ] ]
603 Grouped `DatasetRef` instances.
605 Notes
606 -----
607 When lazy item-iterables are acceptable instead of a full mapping,
608 `iter_by_type` can in some cases be far more efficient.
609 """
610 result: NamedKeyDict[DatasetType, list[DatasetRef]] = NamedKeyDict()
611 for ref in refs:
612 result.setdefault(ref.datasetType, []).append(ref)
613 return result
615 @staticmethod
616 def iter_by_type(
617 refs: Iterable[DatasetRef],
618 ) -> Iterable[tuple[DatasetType, Iterable[DatasetRef]]]:
619 """Group an iterable of `DatasetRef` by `DatasetType` with special
620 hooks for custom iterables that can do this efficiently.
622 Parameters
623 ----------
624 refs : `~collections.abc.Iterable` [ `DatasetRef` ]
625 `DatasetRef` instances to group. If this satisfies the
626 `_DatasetRefGroupedIterable` protocol, its
627 `~_DatasetRefGroupedIterable._iter_by_dataset_type` method will
628 be called.
630 Returns
631 -------
632 grouped : `~collections.abc.Iterable` [ `tuple` [ `DatasetType`, \
633 `Iterable` [ `DatasetRef` ] ]]
634 Grouped `DatasetRef` instances.
635 """
636 if isinstance(refs, _DatasetRefGroupedIterable):
637 return refs._iter_by_dataset_type()
638 return DatasetRef.groupByType(refs).items()
640 def makeCompositeRef(self) -> DatasetRef:
641 """Create a `DatasetRef` of the composite from a component ref.
643 Requires that this `DatasetRef` is a component.
645 Returns
646 -------
647 ref : `DatasetRef`
648 A `DatasetRef` with a dataset type that corresponds to the
649 composite parent of this component, and the same ID and run
650 (which may be `None`, if they are `None` in ``self``).
651 """
652 # Assume that the data ID does not need to be standardized
653 # and should match whatever this ref already has.
654 return DatasetRef(
655 self.datasetType.makeCompositeDatasetType(), self.dataId, id=self.id, run=self.run, conform=False
656 )
658 def makeComponentRef(self, name: str) -> DatasetRef:
659 """Create a `DatasetRef` that corresponds to a component.
661 Parameters
662 ----------
663 name : `str`
664 Name of the component.
666 Returns
667 -------
668 ref : `DatasetRef`
669 A `DatasetRef` with a dataset type that corresponds to the given
670 component, and the same ID and run
671 (which may be `None`, if they are `None` in ``self``).
672 """
673 # Assume that the data ID does not need to be standardized
674 # and should match whatever this ref already has.
675 return DatasetRef(
676 self.datasetType.makeComponentDatasetType(name),
677 self.dataId,
678 id=self.id,
679 run=self.run,
680 conform=False,
681 )
683 def overrideStorageClass(self, storageClass: str | StorageClass) -> DatasetRef:
684 """Create a new `DatasetRef` from this one, but with a modified
685 `DatasetType` that has a different `StorageClass`.
687 Parameters
688 ----------
689 storageClass : `str` or `StorageClass`
690 The new storage class.
692 Returns
693 -------
694 modified : `DatasetRef`
695 A new dataset reference that is the same as the current one but
696 with a different storage class in the `DatasetType`.
697 """
698 return DatasetRef(
699 datasetType=self.datasetType.overrideStorageClass(storageClass),
700 dataId=self.dataId,
701 id=self.id,
702 run=self.run,
703 conform=False,
704 )
706 def is_compatible_with(self, ref: DatasetRef) -> bool:
707 """Determine if the given `DatasetRef` is compatible with this one.
709 Parameters
710 ----------
711 other : `DatasetRef`
712 Dataset ref to check.
714 Returns
715 -------
716 is_compatible : `bool`
717 Returns `True` if the other dataset ref is either the same as this
718 or the dataset type associated with the other is compatible with
719 this one and the dataId and dataset ID match.
721 Notes
722 -----
723 Compatibility requires that the dataId and dataset ID match and the
724 `DatasetType` is compatible. Compatibility is defined as the storage
725 class associated with the dataset type of the other ref can be
726 converted to this storage class.
728 Specifically this means that if you have done:
730 .. code-block:: py
732 new_ref = ref.overrideStorageClass(sc)
734 and this is successful, then the guarantee is that:
736 .. code-block:: py
738 assert ref.is_compatible_with(new_ref) is True
740 since we know that the python type associated with the new ref can
741 be converted to the original python type. The reverse is not guaranteed
742 and depends on whether bidirectional converters have been registered.
743 """
744 if self.id != ref.id:
745 return False
746 if self.dataId != ref.dataId:
747 return False
748 if self.run != ref.run:
749 return False
750 return self.datasetType.is_compatible_with(ref.datasetType)
752 datasetType: DatasetType
753 """The definition of this dataset (`DatasetType`).
755 Cannot be changed after a `DatasetRef` is constructed.
756 """
758 dataId: DataCoordinate
759 """A mapping of `Dimension` primary key values that labels the dataset
760 within a Collection (`DataCoordinate`).
762 Cannot be changed after a `DatasetRef` is constructed.
763 """
765 run: str
766 """The name of the run that produced the dataset.
768 Cannot be changed after a `DatasetRef` is constructed.
769 """