Coverage for python/lsst/daf/butler/core/datasets/ref.py: 34%
221 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-08-12 09:20 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-08-12 09:20 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = [
24 "AmbiguousDatasetError",
25 "DatasetId",
26 "DatasetIdFactory",
27 "DatasetIdGenEnum",
28 "DatasetRef",
29 "SerializedDatasetRef",
30]
32import enum
33import sys
34import uuid
35from collections.abc import Iterable
36from typing import TYPE_CHECKING, Any, ClassVar, Protocol, TypeAlias, runtime_checkable
38import pydantic
39from lsst.daf.butler._compat import PYDANTIC_V2, _BaseModelCompat
40from lsst.utils.classes import immutable
41from pydantic import StrictStr
43from ..configSupport import LookupKey
44from ..dimensions import DataCoordinate, DimensionGraph, DimensionUniverse, SerializedDataCoordinate
45from ..json import from_json_pydantic, to_json_pydantic
46from ..named import NamedKeyDict
47from ..persistenceContext import PersistenceContextVars
48from .type import DatasetType, SerializedDatasetType
50if TYPE_CHECKING:
51 from ...registry import Registry
52 from ..storageClass import StorageClass
55class AmbiguousDatasetError(Exception):
56 """Raised when a `DatasetRef` is not resolved but should be.
58 This happens when the `DatasetRef` has no ID or run but the requested
59 operation requires one of them.
60 """
63@runtime_checkable
64class _DatasetRefGroupedIterable(Protocol):
65 """A package-private interface for iterables of `DatasetRef` that know how
66 to efficiently group their contents by `DatasetType`.
68 """
70 def _iter_by_dataset_type(self) -> Iterable[tuple[DatasetType, Iterable[DatasetRef]]]:
71 """Iterate over `DatasetRef` instances, one `DatasetType` at a time.
73 Returns
74 -------
75 grouped : `~collections.abc.Iterable` [ `tuple` [ `DatasetType`, \
76 `~collections.abc.Iterable` [ `DatasetRef` ]
77 An iterable of tuples, in which the first element is a dataset type
78 and the second is an iterable of `DatasetRef` objects with exactly
79 that dataset type.
80 """
81 ...
84class DatasetIdGenEnum(enum.Enum):
85 """Enum used to specify dataset ID generation options."""
87 UNIQUE = 0
88 """Unique mode generates unique ID for each inserted dataset, e.g.
89 auto-generated by database or random UUID.
90 """
92 DATAID_TYPE = 1
93 """In this mode ID is computed deterministically from a combination of
94 dataset type and dataId.
95 """
97 DATAID_TYPE_RUN = 2
98 """In this mode ID is computed deterministically from a combination of
99 dataset type, dataId, and run collection name.
100 """
103class DatasetIdFactory:
104 """Factory for dataset IDs (UUIDs).
106 For now the logic is hard-coded and is controlled by the user-provided
107 value of `DatasetIdGenEnum`. In the future we may implement a configurable
108 logic that can guess `DatasetIdGenEnum` value from other parameters.
109 """
111 NS_UUID = uuid.UUID("840b31d9-05cd-5161-b2c8-00d32b280d0f")
112 """Namespace UUID used for UUID5 generation. Do not change. This was
113 produced by `uuid.uuid5(uuid.NAMESPACE_DNS, "lsst.org")`.
114 """
116 def makeDatasetId(
117 self,
118 run: str,
119 datasetType: DatasetType,
120 dataId: DataCoordinate,
121 idGenerationMode: DatasetIdGenEnum,
122 ) -> uuid.UUID:
123 """Generate dataset ID for a dataset.
125 Parameters
126 ----------
127 run : `str`
128 Name of the RUN collection for the dataset.
129 datasetType : `DatasetType`
130 Dataset type.
131 dataId : `DataCoordinate`
132 Expanded data ID for the dataset.
133 idGenerationMode : `DatasetIdGenEnum`
134 ID generation option. `~DatasetIdGenEnum.UNIQUE` makes a random
135 UUID4-type ID. `~DatasetIdGenEnum.DATAID_TYPE` makes a
136 deterministic UUID5-type ID based on a dataset type name and
137 ``dataId``. `~DatasetIdGenEnum.DATAID_TYPE_RUN` makes a
138 deterministic UUID5-type ID based on a dataset type name, run
139 collection name, and ``dataId``.
141 Returns
142 -------
143 datasetId : `uuid.UUID`
144 Dataset identifier.
145 """
146 if idGenerationMode is DatasetIdGenEnum.UNIQUE:
147 return uuid.uuid4()
148 else:
149 # WARNING: If you modify this code make sure that the order of
150 # items in the `items` list below never changes.
151 items: list[tuple[str, str]] = []
152 if idGenerationMode is DatasetIdGenEnum.DATAID_TYPE:
153 items = [
154 ("dataset_type", datasetType.name),
155 ]
156 elif idGenerationMode is DatasetIdGenEnum.DATAID_TYPE_RUN:
157 items = [
158 ("dataset_type", datasetType.name),
159 ("run", run),
160 ]
161 else:
162 raise ValueError(f"Unexpected ID generation mode: {idGenerationMode}")
164 for name, value in sorted(dataId.byName().items()):
165 items.append((name, str(value)))
166 data = ",".join(f"{key}={value}" for key, value in items)
167 return uuid.uuid5(self.NS_UUID, data)
170# This is constant, so don't recreate a set for each instance
171_serializedDatasetRefFieldsSet = {"id", "datasetType", "dataId", "run", "component"}
174class SerializedDatasetRef(_BaseModelCompat):
175 """Simplified model of a `DatasetRef` suitable for serialization."""
177 id: uuid.UUID
178 datasetType: SerializedDatasetType | None = None
179 dataId: SerializedDataCoordinate | None = None
180 run: StrictStr | None = None
181 component: StrictStr | None = None
183 if PYDANTIC_V2: 183 ↛ 186line 183 didn't jump to line 186, because the condition on line 183 was never true
184 # Can not use "after" validator since in some cases the validator
185 # seems to trigger with the datasetType field not yet set.
186 @pydantic.model_validator(mode="before") # type: ignore[attr-defined]
187 @classmethod
188 def check_consistent_parameters(cls, data: dict[str, Any]) -> dict[str, Any]:
189 has_datasetType = data.get("datasetType") is not None
190 has_dataId = data.get("dataId") is not None
191 if has_datasetType is not has_dataId:
192 raise ValueError("If specifying datasetType or dataId, must specify both.")
194 if data.get("component") is not None and has_datasetType:
195 raise ValueError("datasetType can not be set if component is given.")
196 return data
198 else:
200 @pydantic.validator("dataId")
201 def _check_dataId(cls, v: Any, values: dict[str, Any]) -> Any: # noqa: N805
202 if v and (d := "datasetType") in values and values[d] is None:
203 raise ValueError("Can not specify 'dataId' without specifying 'datasetType'")
204 return v
206 @pydantic.validator("component")
207 def _check_component(cls, v: Any, values: dict[str, Any]) -> Any: # noqa: N805
208 # Component should not be given if datasetType is given
209 if v and (d := "datasetType") in values and values[d] is not None:
210 raise ValueError(f"datasetType ({values[d]}) can not be set if component is given ({v}).")
211 return v
213 @classmethod
214 def direct(
215 cls,
216 *,
217 id: str,
218 run: str,
219 datasetType: dict[str, Any] | None = None,
220 dataId: dict[str, Any] | None = None,
221 component: str | None = None,
222 ) -> SerializedDatasetRef:
223 """Construct a `SerializedDatasetRef` directly without validators.
225 Notes
226 -----
227 This differs from the pydantic "construct" method in that the arguments
228 are explicitly what the model requires, and it will recurse through
229 members, constructing them from their corresponding `direct` methods.
231 The ``id`` parameter is a string representation of dataset ID, it is
232 converted to UUID by this method.
234 This method should only be called when the inputs are trusted.
235 """
236 serialized_datasetType = (
237 SerializedDatasetType.direct(**datasetType) if datasetType is not None else None
238 )
239 serialized_dataId = SerializedDataCoordinate.direct(**dataId) if dataId is not None else None
241 node = cls.model_construct(
242 _fields_set=_serializedDatasetRefFieldsSet,
243 id=uuid.UUID(id),
244 datasetType=serialized_datasetType,
245 dataId=serialized_dataId,
246 run=sys.intern(run),
247 component=component,
248 )
250 return node
253DatasetId: TypeAlias = uuid.UUID
254"""A type-annotation alias for dataset ID providing typing flexibility.
255"""
258@immutable
259class DatasetRef:
260 """Reference to a Dataset in a `Registry`.
262 A `DatasetRef` may point to a Dataset that currently does not yet exist
263 (e.g., because it is a predicted input for provenance).
265 Parameters
266 ----------
267 datasetType : `DatasetType`
268 The `DatasetType` for this Dataset.
269 dataId : `DataCoordinate`
270 A mapping of dimensions that labels the Dataset within a Collection.
271 run : `str`
272 The name of the run this dataset was associated with when it was
273 created.
274 id : `DatasetId`, optional
275 The unique identifier assigned when the dataset is created. If ``id``
276 is not specified, a new unique ID will be created.
277 conform : `bool`, optional
278 If `True` (default), call `DataCoordinate.standardize` to ensure that
279 the data ID's dimensions are consistent with the dataset type's.
280 `DatasetRef` instances for which those dimensions are not equal should
281 not be created in new code, but are still supported for backwards
282 compatibility. New code should only pass `False` if it can guarantee
283 that the dimensions are already consistent.
284 id_generation_mode : `DatasetIdGenEnum`
285 ID generation option. `~DatasetIdGenEnum.UNIQUE` makes a random
286 UUID4-type ID. `~DatasetIdGenEnum.DATAID_TYPE` makes a
287 deterministic UUID5-type ID based on a dataset type name and
288 ``dataId``. `~DatasetIdGenEnum.DATAID_TYPE_RUN` makes a
289 deterministic UUID5-type ID based on a dataset type name, run
290 collection name, and ``dataId``.
292 See Also
293 --------
294 :ref:`daf_butler_organizing_datasets`
295 """
297 _serializedType = SerializedDatasetRef
298 __slots__ = (
299 "_id",
300 "datasetType",
301 "dataId",
302 "run",
303 )
305 def __init__(
306 self,
307 datasetType: DatasetType,
308 dataId: DataCoordinate,
309 run: str,
310 *,
311 id: DatasetId | None = None,
312 conform: bool = True,
313 id_generation_mode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE,
314 ):
315 self.datasetType = datasetType
316 if conform:
317 self.dataId = DataCoordinate.standardize(dataId, graph=datasetType.dimensions)
318 else:
319 self.dataId = dataId
320 self.run = run
321 if id is not None:
322 self._id = id.int
323 else:
324 self._id = (
325 DatasetIdFactory()
326 .makeDatasetId(self.run, self.datasetType, self.dataId, id_generation_mode)
327 .int
328 )
330 @property
331 def id(self) -> DatasetId:
332 """Primary key of the dataset (`DatasetId`).
334 Cannot be changed after a `DatasetRef` is constructed.
335 """
336 return uuid.UUID(int=self._id)
338 def __eq__(self, other: Any) -> bool:
339 try:
340 return (self.datasetType, self.dataId, self.id) == (other.datasetType, other.dataId, other.id)
341 except AttributeError:
342 return NotImplemented
344 def __hash__(self) -> int:
345 return hash((self.datasetType, self.dataId, self.id))
347 @property
348 def dimensions(self) -> DimensionGraph:
349 """Dimensions associated with the underlying `DatasetType`."""
350 return self.datasetType.dimensions
352 def __repr__(self) -> str:
353 # We delegate to __str__ (i.e use "!s") for the data ID) below because
354 # DataCoordinate's __repr__ - while adhering to the guidelines for
355 # __repr__ - is much harder to users to read, while its __str__ just
356 # produces a dict that can also be passed to DatasetRef's constructor.
357 return f"DatasetRef({self.datasetType!r}, {self.dataId!s}, run={self.run!r}, id={self.id})"
359 def __str__(self) -> str:
360 s = (
361 f"{self.datasetType.name}@{self.dataId!s} [sc={self.datasetType.storageClass_name}]"
362 f" (run={self.run} id={self.id})"
363 )
364 return s
366 def __lt__(self, other: Any) -> bool:
367 # Sort by run, DatasetType name and then by DataCoordinate
368 # The __str__ representation is probably close enough but we
369 # need to ensure that sorting a DatasetRef matches what you would
370 # get if you sorted DatasetType+DataCoordinate
371 if not isinstance(other, type(self)):
372 return NotImplemented
374 # Group by run if defined, takes precedence over DatasetType
375 self_run = "" if self.run is None else self.run
376 other_run = "" if other.run is None else other.run
378 # Compare tuples in the priority order
379 return (self_run, self.datasetType, self.dataId) < (other_run, other.datasetType, other.dataId)
381 def to_simple(self, minimal: bool = False) -> SerializedDatasetRef:
382 """Convert this class to a simple python type.
384 This makes it suitable for serialization.
386 Parameters
387 ----------
388 minimal : `bool`, optional
389 Use minimal serialization. Requires Registry to convert
390 back to a full type.
392 Returns
393 -------
394 simple : `dict` or `int`
395 The object converted to a dictionary.
396 """
397 if minimal:
398 # The only thing needed to uniquely define a DatasetRef is its id
399 # so that can be used directly if it is not a component DatasetRef.
400 # Store is in a dict to allow us to easily add the planned origin
401 # information later without having to support an int and dict in
402 # simple form.
403 simple: dict[str, Any] = {"id": self.id}
404 if self.isComponent():
405 # We can still be a little minimalist with a component
406 # but we will also need to record the datasetType component
407 simple["component"] = self.datasetType.component()
408 return SerializedDatasetRef(**simple)
410 return SerializedDatasetRef(
411 datasetType=self.datasetType.to_simple(minimal=minimal),
412 dataId=self.dataId.to_simple(),
413 run=self.run,
414 id=self.id,
415 )
417 @classmethod
418 def from_simple(
419 cls,
420 simple: SerializedDatasetRef,
421 universe: DimensionUniverse | None = None,
422 registry: Registry | None = None,
423 datasetType: DatasetType | None = None,
424 ) -> DatasetRef:
425 """Construct a new object from simplified form.
427 Generally this is data returned from the `to_simple` method.
429 Parameters
430 ----------
431 simple : `dict` of [`str`, `Any`]
432 The value returned by `to_simple()`.
433 universe : `DimensionUniverse`
434 The special graph of all known dimensions.
435 Can be `None` if a registry is provided.
436 registry : `lsst.daf.butler.Registry`, optional
437 Registry to use to convert simple form of a DatasetRef to
438 a full `DatasetRef`. Can be `None` if a full description of
439 the type is provided along with a universe.
440 datasetType : DatasetType, optional
441 If datasetType is supplied, this will be used as the datasetType
442 object in the resulting DatasetRef instead of being read from
443 the `SerializedDatasetRef`. This is useful when many refs share
444 the same type as memory can be saved. Defaults to None.
446 Returns
447 -------
448 ref : `DatasetRef`
449 Newly-constructed object.
450 """
451 cache = PersistenceContextVars.datasetRefs.get()
452 localName = sys.intern(
453 datasetType.name
454 if datasetType is not None
455 else (x.name if (x := simple.datasetType) is not None else "")
456 )
457 key = (simple.id.int, localName)
458 if cache is not None and (cachedRef := cache.get(key, None)) is not None:
459 return cachedRef
460 # Minimalist component will just specify component and id and
461 # require registry to reconstruct
462 if not (simple.datasetType is not None or simple.dataId is not None or simple.run is not None):
463 if registry is None:
464 raise ValueError("Registry is required to construct component DatasetRef from integer id")
465 if simple.id is None:
466 raise ValueError("For minimal DatasetRef the ID must be defined.")
467 ref = registry.getDataset(simple.id)
468 if ref is None:
469 raise RuntimeError(f"No matching dataset found in registry for id {simple.id}")
470 if simple.component:
471 ref = ref.makeComponentRef(simple.component)
472 if cache is not None:
473 cache[key] = ref
474 return ref
476 if universe is None and registry is None:
477 raise ValueError("One of universe or registry must be provided.")
479 if universe is None and registry is not None:
480 universe = registry.dimensions
482 if universe is None:
483 # this is for mypy
484 raise ValueError("Unable to determine a usable universe")
486 if simple.datasetType is None and datasetType is None:
487 # mypy
488 raise ValueError("The DatasetType must be specified to construct a DatasetRef")
489 if datasetType is None:
490 if simple.datasetType is None:
491 raise ValueError("Cannot determine Dataset type of this serialized class")
492 datasetType = DatasetType.from_simple(simple.datasetType, universe=universe, registry=registry)
494 if simple.dataId is None:
495 # mypy
496 raise ValueError("The DataId must be specified to construct a DatasetRef")
497 dataId = DataCoordinate.from_simple(simple.dataId, universe=universe)
499 # Check that simple ref is resolved.
500 if simple.run is None:
501 dstr = ""
502 if simple.datasetType is None:
503 dstr = f" (datasetType={datasetType.name!r})"
504 raise ValueError(
505 "Run collection name is missing from serialized representation. "
506 f"Encountered with {simple!r}{dstr}."
507 )
509 newRef = cls(datasetType, dataId, id=simple.id, run=simple.run)
510 if cache is not None:
511 cache[key] = newRef
512 return newRef
514 to_json = to_json_pydantic
515 from_json: ClassVar = classmethod(from_json_pydantic)
517 @classmethod
518 def _unpickle(
519 cls,
520 datasetType: DatasetType,
521 dataId: DataCoordinate,
522 id: DatasetId,
523 run: str,
524 ) -> DatasetRef:
525 """Create new `DatasetRef`.
527 A custom factory method for use by `__reduce__` as a workaround for
528 its lack of support for keyword arguments.
529 """
530 return cls(datasetType, dataId, id=id, run=run)
532 def __reduce__(self) -> tuple:
533 return (self._unpickle, (self.datasetType, self.dataId, self.id, self.run))
535 def __deepcopy__(self, memo: dict) -> DatasetRef:
536 # DatasetRef is recursively immutable; see note in @immutable
537 # decorator.
538 return self
540 def expanded(self, dataId: DataCoordinate) -> DatasetRef:
541 """Return a new `DatasetRef` with the given expanded data ID.
543 Parameters
544 ----------
545 dataId : `DataCoordinate`
546 Data ID for the new `DatasetRef`. Must compare equal to the
547 original data ID.
549 Returns
550 -------
551 ref : `DatasetRef`
552 A new `DatasetRef` with the given data ID.
553 """
554 assert dataId == self.dataId
555 return DatasetRef(
556 datasetType=self.datasetType, dataId=dataId, id=self.id, run=self.run, conform=False
557 )
559 def isComponent(self) -> bool:
560 """Indicate whether this `DatasetRef` refers to a component.
562 Returns
563 -------
564 isComponent : `bool`
565 `True` if this `DatasetRef` is a component, `False` otherwise.
566 """
567 return self.datasetType.isComponent()
569 def isComposite(self) -> bool:
570 """Boolean indicating whether this `DatasetRef` is a composite type.
572 Returns
573 -------
574 isComposite : `bool`
575 `True` if this `DatasetRef` is a composite type, `False`
576 otherwise.
577 """
578 return self.datasetType.isComposite()
580 def _lookupNames(self) -> tuple[LookupKey, ...]:
581 """Name keys to use when looking up this DatasetRef in a configuration.
583 The names are returned in order of priority.
585 Returns
586 -------
587 names : `tuple` of `LookupKey`
588 Tuple of the `DatasetType` name and the `StorageClass` name.
589 If ``instrument`` is defined in the dataId, each of those names
590 is added to the start of the tuple with a key derived from the
591 value of ``instrument``.
592 """
593 # Special case the instrument Dimension since we allow configs
594 # to include the instrument name in the hierarchy.
595 names: tuple[LookupKey, ...] = self.datasetType._lookupNames()
597 if "instrument" in self.dataId:
598 names = tuple(n.clone(dataId={"instrument": self.dataId["instrument"]}) for n in names) + names
600 return names
602 @staticmethod
603 def groupByType(refs: Iterable[DatasetRef]) -> NamedKeyDict[DatasetType, list[DatasetRef]]:
604 """Group an iterable of `DatasetRef` by `DatasetType`.
606 Parameters
607 ----------
608 refs : `~collections.abc.Iterable` [ `DatasetRef` ]
609 `DatasetRef` instances to group.
611 Returns
612 -------
613 grouped : `NamedKeyDict` [ `DatasetType`, `list` [ `DatasetRef` ] ]
614 Grouped `DatasetRef` instances.
616 Notes
617 -----
618 When lazy item-iterables are acceptable instead of a full mapping,
619 `iter_by_type` can in some cases be far more efficient.
620 """
621 result: NamedKeyDict[DatasetType, list[DatasetRef]] = NamedKeyDict()
622 for ref in refs:
623 result.setdefault(ref.datasetType, []).append(ref)
624 return result
626 @staticmethod
627 def iter_by_type(
628 refs: Iterable[DatasetRef],
629 ) -> Iterable[tuple[DatasetType, Iterable[DatasetRef]]]:
630 """Group an iterable of `DatasetRef` by `DatasetType` with special
631 hooks for custom iterables that can do this efficiently.
633 Parameters
634 ----------
635 refs : `~collections.abc.Iterable` [ `DatasetRef` ]
636 `DatasetRef` instances to group. If this satisfies the
637 `_DatasetRefGroupedIterable` protocol, its
638 `~_DatasetRefGroupedIterable._iter_by_dataset_type` method will
639 be called.
641 Returns
642 -------
643 grouped : `~collections.abc.Iterable` [ `tuple` [ `DatasetType`, \
644 `Iterable` [ `DatasetRef` ] ]]
645 Grouped `DatasetRef` instances.
646 """
647 if isinstance(refs, _DatasetRefGroupedIterable):
648 return refs._iter_by_dataset_type()
649 return DatasetRef.groupByType(refs).items()
651 def makeCompositeRef(self) -> DatasetRef:
652 """Create a `DatasetRef` of the composite from a component ref.
654 Requires that this `DatasetRef` is a component.
656 Returns
657 -------
658 ref : `DatasetRef`
659 A `DatasetRef` with a dataset type that corresponds to the
660 composite parent of this component, and the same ID and run
661 (which may be `None`, if they are `None` in ``self``).
662 """
663 # Assume that the data ID does not need to be standardized
664 # and should match whatever this ref already has.
665 return DatasetRef(
666 self.datasetType.makeCompositeDatasetType(), self.dataId, id=self.id, run=self.run, conform=False
667 )
669 def makeComponentRef(self, name: str) -> DatasetRef:
670 """Create a `DatasetRef` that corresponds to a component.
672 Parameters
673 ----------
674 name : `str`
675 Name of the component.
677 Returns
678 -------
679 ref : `DatasetRef`
680 A `DatasetRef` with a dataset type that corresponds to the given
681 component, and the same ID and run
682 (which may be `None`, if they are `None` in ``self``).
683 """
684 # Assume that the data ID does not need to be standardized
685 # and should match whatever this ref already has.
686 return DatasetRef(
687 self.datasetType.makeComponentDatasetType(name),
688 self.dataId,
689 id=self.id,
690 run=self.run,
691 conform=False,
692 )
694 def overrideStorageClass(self, storageClass: str | StorageClass) -> DatasetRef:
695 """Create a new `DatasetRef` from this one, but with a modified
696 `DatasetType` that has a different `StorageClass`.
698 Parameters
699 ----------
700 storageClass : `str` or `StorageClass`
701 The new storage class.
703 Returns
704 -------
705 modified : `DatasetRef`
706 A new dataset reference that is the same as the current one but
707 with a different storage class in the `DatasetType`.
708 """
709 return DatasetRef(
710 datasetType=self.datasetType.overrideStorageClass(storageClass),
711 dataId=self.dataId,
712 id=self.id,
713 run=self.run,
714 conform=False,
715 )
717 def is_compatible_with(self, ref: DatasetRef) -> bool:
718 """Determine if the given `DatasetRef` is compatible with this one.
720 Parameters
721 ----------
722 other : `DatasetRef`
723 Dataset ref to check.
725 Returns
726 -------
727 is_compatible : `bool`
728 Returns `True` if the other dataset ref is either the same as this
729 or the dataset type associated with the other is compatible with
730 this one and the dataId and dataset ID match.
732 Notes
733 -----
734 Compatibility requires that the dataId and dataset ID match and the
735 `DatasetType` is compatible. Compatibility is defined as the storage
736 class associated with the dataset type of the other ref can be
737 converted to this storage class.
739 Specifically this means that if you have done:
741 .. code-block:: py
743 new_ref = ref.overrideStorageClass(sc)
745 and this is successful, then the guarantee is that:
747 .. code-block:: py
749 assert ref.is_compatible_with(new_ref) is True
751 since we know that the python type associated with the new ref can
752 be converted to the original python type. The reverse is not guaranteed
753 and depends on whether bidirectional converters have been registered.
754 """
755 if self.id != ref.id:
756 return False
757 if self.dataId != ref.dataId:
758 return False
759 if self.run != ref.run:
760 return False
761 return self.datasetType.is_compatible_with(ref.datasetType)
763 datasetType: DatasetType
764 """The definition of this dataset (`DatasetType`).
766 Cannot be changed after a `DatasetRef` is constructed.
767 """
769 dataId: DataCoordinate
770 """A mapping of `Dimension` primary key values that labels the dataset
771 within a Collection (`DataCoordinate`).
773 Cannot be changed after a `DatasetRef` is constructed.
774 """
776 run: str
777 """The name of the run that produced the dataset.
779 Cannot be changed after a `DatasetRef` is constructed.
780 """