Coverage for python/lsst/daf/butler/_dataset_ref.py: 32%
242 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-05 11:07 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-05 11:07 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
27from __future__ import annotations
29__all__ = [
30 "AmbiguousDatasetError",
31 "DatasetDatastoreRecords",
32 "DatasetId",
33 "DatasetIdFactory",
34 "DatasetIdGenEnum",
35 "DatasetRef",
36 "SerializedDatasetRef",
37]
39import enum
40import sys
41import uuid
42from collections.abc import Iterable, Mapping
43from typing import TYPE_CHECKING, Any, ClassVar, Literal, Protocol, TypeAlias, runtime_checkable
45import pydantic
46from lsst.daf.butler._compat import PYDANTIC_V2, _BaseModelCompat
47from lsst.utils.classes import immutable
48from pydantic import StrictStr
50from ._config_support import LookupKey
51from ._dataset_type import DatasetType, SerializedDatasetType
52from ._named import NamedKeyDict
53from .datastore.stored_file_info import StoredDatastoreItemInfo
54from .dimensions import DataCoordinate, DimensionGraph, DimensionUniverse, SerializedDataCoordinate
55from .json import from_json_pydantic, to_json_pydantic
56from .persistence_context import PersistenceContextVars
58if TYPE_CHECKING:
59 from ._storage_class import StorageClass
60 from .registry import Registry
62# Per-dataset records grouped by opaque table name, usually there is just one
63# opaque table.
64DatasetDatastoreRecords: TypeAlias = Mapping[str, Iterable[StoredDatastoreItemInfo]]
67class AmbiguousDatasetError(Exception):
68 """Raised when a `DatasetRef` is not resolved but should be.
70 This happens when the `DatasetRef` has no ID or run but the requested
71 operation requires one of them.
72 """
75@runtime_checkable
76class _DatasetRefGroupedIterable(Protocol):
77 """A package-private interface for iterables of `DatasetRef` that know how
78 to efficiently group their contents by `DatasetType`.
80 """
82 def _iter_by_dataset_type(self) -> Iterable[tuple[DatasetType, Iterable[DatasetRef]]]:
83 """Iterate over `DatasetRef` instances, one `DatasetType` at a time.
85 Returns
86 -------
87 grouped : `~collections.abc.Iterable` [ `tuple` [ `DatasetType`, \
88 `~collections.abc.Iterable` [ `DatasetRef` ]
89 An iterable of tuples, in which the first element is a dataset type
90 and the second is an iterable of `DatasetRef` objects with exactly
91 that dataset type.
92 """
93 ...
96class DatasetIdGenEnum(enum.Enum):
97 """Enum used to specify dataset ID generation options."""
99 UNIQUE = 0
100 """Unique mode generates unique ID for each inserted dataset, e.g.
101 auto-generated by database or random UUID.
102 """
104 DATAID_TYPE = 1
105 """In this mode ID is computed deterministically from a combination of
106 dataset type and dataId.
107 """
109 DATAID_TYPE_RUN = 2
110 """In this mode ID is computed deterministically from a combination of
111 dataset type, dataId, and run collection name.
112 """
115class DatasetIdFactory:
116 """Factory for dataset IDs (UUIDs).
118 For now the logic is hard-coded and is controlled by the user-provided
119 value of `DatasetIdGenEnum`. In the future we may implement a configurable
120 logic that can guess `DatasetIdGenEnum` value from other parameters.
121 """
123 NS_UUID = uuid.UUID("840b31d9-05cd-5161-b2c8-00d32b280d0f")
124 """Namespace UUID used for UUID5 generation. Do not change. This was
125 produced by `uuid.uuid5(uuid.NAMESPACE_DNS, "lsst.org")`.
126 """
128 def makeDatasetId(
129 self,
130 run: str,
131 datasetType: DatasetType,
132 dataId: DataCoordinate,
133 idGenerationMode: DatasetIdGenEnum,
134 ) -> uuid.UUID:
135 """Generate dataset ID for a dataset.
137 Parameters
138 ----------
139 run : `str`
140 Name of the RUN collection for the dataset.
141 datasetType : `DatasetType`
142 Dataset type.
143 dataId : `DataCoordinate`
144 Expanded data ID for the dataset.
145 idGenerationMode : `DatasetIdGenEnum`
146 ID generation option. `~DatasetIdGenEnum.UNIQUE` makes a random
147 UUID4-type ID. `~DatasetIdGenEnum.DATAID_TYPE` makes a
148 deterministic UUID5-type ID based on a dataset type name and
149 ``dataId``. `~DatasetIdGenEnum.DATAID_TYPE_RUN` makes a
150 deterministic UUID5-type ID based on a dataset type name, run
151 collection name, and ``dataId``.
153 Returns
154 -------
155 datasetId : `uuid.UUID`
156 Dataset identifier.
157 """
158 if idGenerationMode is DatasetIdGenEnum.UNIQUE:
159 return uuid.uuid4()
160 else:
161 # WARNING: If you modify this code make sure that the order of
162 # items in the `items` list below never changes.
163 items: list[tuple[str, str]] = []
164 if idGenerationMode is DatasetIdGenEnum.DATAID_TYPE:
165 items = [
166 ("dataset_type", datasetType.name),
167 ]
168 elif idGenerationMode is DatasetIdGenEnum.DATAID_TYPE_RUN:
169 items = [
170 ("dataset_type", datasetType.name),
171 ("run", run),
172 ]
173 else:
174 raise ValueError(f"Unexpected ID generation mode: {idGenerationMode}")
176 for name, value in sorted(dataId.required.items()):
177 items.append((name, str(value)))
178 data = ",".join(f"{key}={value}" for key, value in items)
179 return uuid.uuid5(self.NS_UUID, data)
182# This is constant, so don't recreate a set for each instance
183_serializedDatasetRefFieldsSet = {"id", "datasetType", "dataId", "run", "component"}
186class SerializedDatasetRef(_BaseModelCompat):
187 """Simplified model of a `DatasetRef` suitable for serialization."""
189 id: uuid.UUID
190 datasetType: SerializedDatasetType | None = None
191 dataId: SerializedDataCoordinate | None = None
192 run: StrictStr | None = None
193 component: StrictStr | None = None
195 if PYDANTIC_V2: 195 ↛ 198line 195 didn't jump to line 198, because the condition on line 195 was never true
196 # Can not use "after" validator since in some cases the validator
197 # seems to trigger with the datasetType field not yet set.
198 @pydantic.model_validator(mode="before") # type: ignore[attr-defined]
199 @classmethod
200 def check_consistent_parameters(cls, data: dict[str, Any]) -> dict[str, Any]:
201 has_datasetType = data.get("datasetType") is not None
202 has_dataId = data.get("dataId") is not None
203 if has_datasetType is not has_dataId:
204 raise ValueError("If specifying datasetType or dataId, must specify both.")
206 if data.get("component") is not None and has_datasetType:
207 raise ValueError("datasetType can not be set if component is given.")
208 return data
210 else:
212 @pydantic.validator("dataId")
213 def _check_dataId(cls, v: Any, values: dict[str, Any]) -> Any: # noqa: N805
214 if v and (d := "datasetType") in values and values[d] is None:
215 raise ValueError("Can not specify 'dataId' without specifying 'datasetType'")
216 return v
218 @pydantic.validator("component")
219 def _check_component(cls, v: Any, values: dict[str, Any]) -> Any: # noqa: N805
220 # Component should not be given if datasetType is given
221 if v and (d := "datasetType") in values and values[d] is not None:
222 raise ValueError(f"datasetType ({values[d]}) can not be set if component is given ({v}).")
223 return v
225 @classmethod
226 def direct(
227 cls,
228 *,
229 id: str,
230 run: str,
231 datasetType: dict[str, Any] | None = None,
232 dataId: dict[str, Any] | None = None,
233 component: str | None = None,
234 ) -> SerializedDatasetRef:
235 """Construct a `SerializedDatasetRef` directly without validators.
237 Notes
238 -----
239 This differs from the pydantic "construct" method in that the arguments
240 are explicitly what the model requires, and it will recurse through
241 members, constructing them from their corresponding `direct` methods.
243 The ``id`` parameter is a string representation of dataset ID, it is
244 converted to UUID by this method.
246 This method should only be called when the inputs are trusted.
247 """
248 serialized_datasetType = (
249 SerializedDatasetType.direct(**datasetType) if datasetType is not None else None
250 )
251 serialized_dataId = SerializedDataCoordinate.direct(**dataId) if dataId is not None else None
253 node = cls.model_construct(
254 _fields_set=_serializedDatasetRefFieldsSet,
255 id=uuid.UUID(id),
256 datasetType=serialized_datasetType,
257 dataId=serialized_dataId,
258 run=sys.intern(run),
259 component=component,
260 )
262 return node
265DatasetId: TypeAlias = uuid.UUID
266"""A type-annotation alias for dataset ID providing typing flexibility.
267"""
270@immutable
271class DatasetRef:
272 """Reference to a Dataset in a `Registry`.
274 A `DatasetRef` may point to a Dataset that currently does not yet exist
275 (e.g., because it is a predicted input for provenance).
277 Parameters
278 ----------
279 datasetType : `DatasetType`
280 The `DatasetType` for this Dataset.
281 dataId : `DataCoordinate`
282 A mapping of dimensions that labels the Dataset within a Collection.
283 run : `str`
284 The name of the run this dataset was associated with when it was
285 created.
286 id : `DatasetId`, optional
287 The unique identifier assigned when the dataset is created. If ``id``
288 is not specified, a new unique ID will be created.
289 conform : `bool`, optional
290 If `True` (default), call `DataCoordinate.standardize` to ensure that
291 the data ID's dimensions are consistent with the dataset type's.
292 `DatasetRef` instances for which those dimensions are not equal should
293 not be created in new code, but are still supported for backwards
294 compatibility. New code should only pass `False` if it can guarantee
295 that the dimensions are already consistent.
296 id_generation_mode : `DatasetIdGenEnum`
297 ID generation option. `~DatasetIdGenEnum.UNIQUE` makes a random
298 UUID4-type ID. `~DatasetIdGenEnum.DATAID_TYPE` makes a
299 deterministic UUID5-type ID based on a dataset type name and
300 ``dataId``. `~DatasetIdGenEnum.DATAID_TYPE_RUN` makes a
301 deterministic UUID5-type ID based on a dataset type name, run
302 collection name, and ``dataId``.
304 See Also
305 --------
306 :ref:`daf_butler_organizing_datasets`
307 """
309 _serializedType = SerializedDatasetRef
310 __slots__ = (
311 "_id",
312 "datasetType",
313 "dataId",
314 "run",
315 "_datastore_records",
316 )
318 def __init__(
319 self,
320 datasetType: DatasetType,
321 dataId: DataCoordinate,
322 run: str,
323 *,
324 id: DatasetId | None = None,
325 conform: bool = True,
326 id_generation_mode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE,
327 datastore_records: DatasetDatastoreRecords | None = None,
328 ):
329 self.datasetType = datasetType
330 if conform:
331 self.dataId = DataCoordinate.standardize(dataId, dimensions=datasetType.dimensions)
332 else:
333 self.dataId = dataId
334 self.run = run
335 if id is not None:
336 self._id = id.int
337 else:
338 self._id = (
339 DatasetIdFactory()
340 .makeDatasetId(self.run, self.datasetType, self.dataId, id_generation_mode)
341 .int
342 )
343 self._datastore_records = datastore_records
345 @property
346 def id(self) -> DatasetId:
347 """Primary key of the dataset (`DatasetId`).
349 Cannot be changed after a `DatasetRef` is constructed.
350 """
351 return uuid.UUID(int=self._id)
353 def __eq__(self, other: Any) -> bool:
354 try:
355 return (self.datasetType, self.dataId, self.id) == (other.datasetType, other.dataId, other.id)
356 except AttributeError:
357 return NotImplemented
359 def __hash__(self) -> int:
360 return hash((self.datasetType, self.dataId, self.id))
362 @property
363 def dimensions(self) -> DimensionGraph:
364 """Dimensions associated with the underlying `DatasetType`."""
365 return self.datasetType.dimensions
367 def __repr__(self) -> str:
368 # We delegate to __str__ (i.e use "!s") for the data ID) below because
369 # DataCoordinate's __repr__ - while adhering to the guidelines for
370 # __repr__ - is much harder to users to read, while its __str__ just
371 # produces a dict that can also be passed to DatasetRef's constructor.
372 return f"DatasetRef({self.datasetType!r}, {self.dataId!s}, run={self.run!r}, id={self.id})"
374 def __str__(self) -> str:
375 s = (
376 f"{self.datasetType.name}@{self.dataId!s} [sc={self.datasetType.storageClass_name}]"
377 f" (run={self.run} id={self.id})"
378 )
379 return s
381 def __lt__(self, other: Any) -> bool:
382 # Sort by run, DatasetType name and then by DataCoordinate
383 # The __str__ representation is probably close enough but we
384 # need to ensure that sorting a DatasetRef matches what you would
385 # get if you sorted DatasetType+DataCoordinate
386 if not isinstance(other, type(self)):
387 return NotImplemented
389 # Group by run if defined, takes precedence over DatasetType
390 self_run = "" if self.run is None else self.run
391 other_run = "" if other.run is None else other.run
393 # Compare tuples in the priority order
394 return (self_run, self.datasetType, self.dataId) < (other_run, other.datasetType, other.dataId)
396 def to_simple(self, minimal: bool = False) -> SerializedDatasetRef:
397 """Convert this class to a simple python type.
399 This makes it suitable for serialization.
401 Parameters
402 ----------
403 minimal : `bool`, optional
404 Use minimal serialization. Requires Registry to convert
405 back to a full type.
407 Returns
408 -------
409 simple : `dict` or `int`
410 The object converted to a dictionary.
411 """
412 if minimal:
413 # The only thing needed to uniquely define a DatasetRef is its id
414 # so that can be used directly if it is not a component DatasetRef.
415 # Store is in a dict to allow us to easily add the planned origin
416 # information later without having to support an int and dict in
417 # simple form.
418 simple: dict[str, Any] = {"id": self.id}
419 if self.isComponent():
420 # We can still be a little minimalist with a component
421 # but we will also need to record the datasetType component
422 simple["component"] = self.datasetType.component()
423 return SerializedDatasetRef(**simple)
425 return SerializedDatasetRef(
426 datasetType=self.datasetType.to_simple(minimal=minimal),
427 dataId=self.dataId.to_simple(),
428 run=self.run,
429 id=self.id,
430 )
432 @classmethod
433 def from_simple(
434 cls,
435 simple: SerializedDatasetRef,
436 universe: DimensionUniverse | None = None,
437 registry: Registry | None = None,
438 datasetType: DatasetType | None = None,
439 ) -> DatasetRef:
440 """Construct a new object from simplified form.
442 Generally this is data returned from the `to_simple` method.
444 Parameters
445 ----------
446 simple : `dict` of [`str`, `Any`]
447 The value returned by `to_simple()`.
448 universe : `DimensionUniverse`
449 The special graph of all known dimensions.
450 Can be `None` if a registry is provided.
451 registry : `lsst.daf.butler.Registry`, optional
452 Registry to use to convert simple form of a DatasetRef to
453 a full `DatasetRef`. Can be `None` if a full description of
454 the type is provided along with a universe.
455 datasetType : DatasetType, optional
456 If datasetType is supplied, this will be used as the datasetType
457 object in the resulting DatasetRef instead of being read from
458 the `SerializedDatasetRef`. This is useful when many refs share
459 the same type as memory can be saved. Defaults to None.
461 Returns
462 -------
463 ref : `DatasetRef`
464 Newly-constructed object.
465 """
466 cache = PersistenceContextVars.datasetRefs.get()
467 key = simple.id.int
468 if cache is not None and (ref := cache.get(key, None)) is not None:
469 if datasetType is not None:
470 if (component := datasetType.component()) is not None:
471 ref = ref.makeComponentRef(component)
472 ref = ref.overrideStorageClass(datasetType.storageClass_name)
473 return ref
474 if simple.datasetType is not None:
475 _, component = DatasetType.splitDatasetTypeName(simple.datasetType.name)
476 if component is not None:
477 ref = ref.makeComponentRef(component)
478 if simple.datasetType.storageClass is not None:
479 ref = ref.overrideStorageClass(simple.datasetType.storageClass)
480 return ref
481 # If dataset type is not given ignore the cache, because we can't
482 # reliably return the right storage class.
483 # Minimalist component will just specify component and id and
484 # require registry to reconstruct
485 if simple.datasetType is None and simple.dataId is None and simple.run is None:
486 if registry is None:
487 raise ValueError("Registry is required to construct component DatasetRef from integer id")
488 if simple.id is None:
489 raise ValueError("For minimal DatasetRef the ID must be defined.")
490 ref = registry.getDataset(simple.id)
491 if ref is None:
492 raise RuntimeError(f"No matching dataset found in registry for id {simple.id}")
493 if simple.component:
494 ref = ref.makeComponentRef(simple.component)
495 else:
496 if universe is None:
497 if registry is None:
498 raise ValueError("One of universe or registry must be provided.")
499 universe = registry.dimensions
500 if datasetType is None:
501 if simple.datasetType is None:
502 raise ValueError("Cannot determine Dataset type of this serialized class")
503 datasetType = DatasetType.from_simple(
504 simple.datasetType, universe=universe, registry=registry
505 )
506 if simple.dataId is None:
507 # mypy
508 raise ValueError("The DataId must be specified to construct a DatasetRef")
509 dataId = DataCoordinate.from_simple(simple.dataId, universe=universe)
510 # Check that simple ref is resolved.
511 if simple.run is None:
512 dstr = ""
513 if simple.datasetType is None:
514 dstr = f" (datasetType={datasetType.name!r})"
515 raise ValueError(
516 "Run collection name is missing from serialized representation. "
517 f"Encountered with {simple!r}{dstr}."
518 )
519 ref = cls(
520 datasetType,
521 dataId,
522 id=simple.id,
523 run=simple.run,
524 )
525 if cache is not None:
526 if ref.datasetType.component() is not None:
527 cache[key] = ref.makeCompositeRef()
528 else:
529 cache[key] = ref
530 return ref
532 to_json = to_json_pydantic
533 from_json: ClassVar = classmethod(from_json_pydantic)
535 @classmethod
536 def _unpickle(
537 cls,
538 datasetType: DatasetType,
539 dataId: DataCoordinate,
540 id: DatasetId,
541 run: str,
542 datastore_records: DatasetDatastoreRecords | None,
543 ) -> DatasetRef:
544 """Create new `DatasetRef`.
546 A custom factory method for use by `__reduce__` as a workaround for
547 its lack of support for keyword arguments.
548 """
549 return cls(datasetType, dataId, id=id, run=run, datastore_records=datastore_records)
551 def __reduce__(self) -> tuple:
552 return (
553 self._unpickle,
554 (self.datasetType, self.dataId, self.id, self.run, self._datastore_records),
555 )
557 def __deepcopy__(self, memo: dict) -> DatasetRef:
558 # DatasetRef is recursively immutable; see note in @immutable
559 # decorator.
560 return self
562 def expanded(self, dataId: DataCoordinate) -> DatasetRef:
563 """Return a new `DatasetRef` with the given expanded data ID.
565 Parameters
566 ----------
567 dataId : `DataCoordinate`
568 Data ID for the new `DatasetRef`. Must compare equal to the
569 original data ID.
571 Returns
572 -------
573 ref : `DatasetRef`
574 A new `DatasetRef` with the given data ID.
575 """
576 assert dataId == self.dataId
577 return DatasetRef(
578 datasetType=self.datasetType,
579 dataId=dataId,
580 id=self.id,
581 run=self.run,
582 conform=False,
583 datastore_records=self._datastore_records,
584 )
586 def isComponent(self) -> bool:
587 """Indicate whether this `DatasetRef` refers to a component.
589 Returns
590 -------
591 isComponent : `bool`
592 `True` if this `DatasetRef` is a component, `False` otherwise.
593 """
594 return self.datasetType.isComponent()
596 def isComposite(self) -> bool:
597 """Boolean indicating whether this `DatasetRef` is a composite type.
599 Returns
600 -------
601 isComposite : `bool`
602 `True` if this `DatasetRef` is a composite type, `False`
603 otherwise.
604 """
605 return self.datasetType.isComposite()
607 def _lookupNames(self) -> tuple[LookupKey, ...]:
608 """Name keys to use when looking up this DatasetRef in a configuration.
610 The names are returned in order of priority.
612 Returns
613 -------
614 names : `tuple` of `LookupKey`
615 Tuple of the `DatasetType` name and the `StorageClass` name.
616 If ``instrument`` is defined in the dataId, each of those names
617 is added to the start of the tuple with a key derived from the
618 value of ``instrument``.
619 """
620 # Special case the instrument Dimension since we allow configs
621 # to include the instrument name in the hierarchy.
622 names: tuple[LookupKey, ...] = self.datasetType._lookupNames()
624 if "instrument" in self.dataId:
625 names = tuple(n.clone(dataId={"instrument": self.dataId["instrument"]}) for n in names) + names
627 return names
629 @staticmethod
630 def groupByType(refs: Iterable[DatasetRef]) -> NamedKeyDict[DatasetType, list[DatasetRef]]:
631 """Group an iterable of `DatasetRef` by `DatasetType`.
633 Parameters
634 ----------
635 refs : `~collections.abc.Iterable` [ `DatasetRef` ]
636 `DatasetRef` instances to group.
638 Returns
639 -------
640 grouped : `NamedKeyDict` [ `DatasetType`, `list` [ `DatasetRef` ] ]
641 Grouped `DatasetRef` instances.
643 Notes
644 -----
645 When lazy item-iterables are acceptable instead of a full mapping,
646 `iter_by_type` can in some cases be far more efficient.
647 """
648 result: NamedKeyDict[DatasetType, list[DatasetRef]] = NamedKeyDict()
649 for ref in refs:
650 result.setdefault(ref.datasetType, []).append(ref)
651 return result
653 @staticmethod
654 def iter_by_type(
655 refs: Iterable[DatasetRef],
656 ) -> Iterable[tuple[DatasetType, Iterable[DatasetRef]]]:
657 """Group an iterable of `DatasetRef` by `DatasetType` with special
658 hooks for custom iterables that can do this efficiently.
660 Parameters
661 ----------
662 refs : `~collections.abc.Iterable` [ `DatasetRef` ]
663 `DatasetRef` instances to group. If this satisfies the
664 `_DatasetRefGroupedIterable` protocol, its
665 `~_DatasetRefGroupedIterable._iter_by_dataset_type` method will
666 be called.
668 Returns
669 -------
670 grouped : `~collections.abc.Iterable` [ `tuple` [ `DatasetType`, \
671 `Iterable` [ `DatasetRef` ] ]]
672 Grouped `DatasetRef` instances.
673 """
674 if isinstance(refs, _DatasetRefGroupedIterable):
675 return refs._iter_by_dataset_type()
676 return DatasetRef.groupByType(refs).items()
678 def makeCompositeRef(self) -> DatasetRef:
679 """Create a `DatasetRef` of the composite from a component ref.
681 Requires that this `DatasetRef` is a component.
683 Returns
684 -------
685 ref : `DatasetRef`
686 A `DatasetRef` with a dataset type that corresponds to the
687 composite parent of this component, and the same ID and run
688 (which may be `None`, if they are `None` in ``self``).
689 """
690 # Assume that the data ID does not need to be standardized
691 # and should match whatever this ref already has.
692 return DatasetRef(
693 self.datasetType.makeCompositeDatasetType(),
694 self.dataId,
695 id=self.id,
696 run=self.run,
697 conform=False,
698 datastore_records=self._datastore_records,
699 )
701 def makeComponentRef(self, name: str) -> DatasetRef:
702 """Create a `DatasetRef` that corresponds to a component.
704 Parameters
705 ----------
706 name : `str`
707 Name of the component.
709 Returns
710 -------
711 ref : `DatasetRef`
712 A `DatasetRef` with a dataset type that corresponds to the given
713 component, and the same ID and run
714 (which may be `None`, if they are `None` in ``self``).
715 """
716 # Assume that the data ID does not need to be standardized
717 # and should match whatever this ref already has.
718 return DatasetRef(
719 self.datasetType.makeComponentDatasetType(name),
720 self.dataId,
721 id=self.id,
722 run=self.run,
723 conform=False,
724 datastore_records=self._datastore_records,
725 )
727 def overrideStorageClass(self, storageClass: str | StorageClass) -> DatasetRef:
728 """Create a new `DatasetRef` from this one, but with a modified
729 `DatasetType` that has a different `StorageClass`.
731 Parameters
732 ----------
733 storageClass : `str` or `StorageClass`
734 The new storage class.
736 Returns
737 -------
738 modified : `DatasetRef`
739 A new dataset reference that is the same as the current one but
740 with a different storage class in the `DatasetType`.
741 """
742 return self.replace(storage_class=storageClass)
744 def replace(
745 self,
746 *,
747 id: DatasetId | None = None,
748 run: str | None = None,
749 storage_class: str | StorageClass | None = None,
750 datastore_records: DatasetDatastoreRecords | None | Literal[False] = False,
751 ) -> DatasetRef:
752 """Create a new `DatasetRef` from this one, but with some modified
753 attributes.
755 Parameters
756 ----------
757 id : `DatasetId` or `None`
758 If not `None` then update dataset ID.
759 run : `str` or `None`
760 If not `None` then update run collection name. If ``dataset_id`` is
761 `None` then this will also cause new dataset ID to be generated.
762 storage_class : `str` or `StorageClass` or `None`.
763 The new storage class. If not `None`, replaces existing storage
764 class.
765 datastore_records : `DatasetDatastoreRecords` or `None`
766 New datastore records. If `None` remove all records. By default
767 datastore records are preserved.
769 Returns
770 -------
771 modified : `DatasetRef`
772 A new dataset reference with updated attributes.
773 """
774 if datastore_records is False:
775 datastore_records = self._datastore_records
776 if storage_class is None:
777 datasetType = self.datasetType
778 else:
779 datasetType = self.datasetType.overrideStorageClass(storage_class)
780 if run is None:
781 run = self.run
782 # Do not regenerate dataset ID if run is the same.
783 if id is None:
784 id = self.id
785 return DatasetRef(
786 datasetType=datasetType,
787 dataId=self.dataId,
788 run=run,
789 id=id,
790 conform=False,
791 datastore_records=datastore_records,
792 )
794 def is_compatible_with(self, ref: DatasetRef) -> bool:
795 """Determine if the given `DatasetRef` is compatible with this one.
797 Parameters
798 ----------
799 other : `DatasetRef`
800 Dataset ref to check.
802 Returns
803 -------
804 is_compatible : `bool`
805 Returns `True` if the other dataset ref is either the same as this
806 or the dataset type associated with the other is compatible with
807 this one and the dataId and dataset ID match.
809 Notes
810 -----
811 Compatibility requires that the dataId and dataset ID match and the
812 `DatasetType` is compatible. Compatibility is defined as the storage
813 class associated with the dataset type of the other ref can be
814 converted to this storage class.
816 Specifically this means that if you have done:
818 .. code-block:: py
820 new_ref = ref.overrideStorageClass(sc)
822 and this is successful, then the guarantee is that:
824 .. code-block:: py
826 assert ref.is_compatible_with(new_ref) is True
828 since we know that the python type associated with the new ref can
829 be converted to the original python type. The reverse is not guaranteed
830 and depends on whether bidirectional converters have been registered.
831 """
832 if self.id != ref.id:
833 return False
834 if self.dataId != ref.dataId:
835 return False
836 if self.run != ref.run:
837 return False
838 return self.datasetType.is_compatible_with(ref.datasetType)
840 datasetType: DatasetType
841 """The definition of this dataset (`DatasetType`).
843 Cannot be changed after a `DatasetRef` is constructed.
844 """
846 dataId: DataCoordinate
847 """A mapping of `Dimension` primary key values that labels the dataset
848 within a Collection (`DataCoordinate`).
850 Cannot be changed after a `DatasetRef` is constructed.
851 """
853 run: str
854 """The name of the run that produced the dataset.
856 Cannot be changed after a `DatasetRef` is constructed.
857 """
859 datastore_records: DatasetDatastoreRecords | None
860 """Optional datastore records (`DatasetDatastoreRecords`).
862 Cannot be changed after a `DatasetRef` is constructed.
863 """