Coverage for python/lsst/daf/butler/_dataset_ref.py: 34%
230 statements
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-07 02:46 -0700
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-07 02:46 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
27from __future__ import annotations
29__all__ = [
30 "AmbiguousDatasetError",
31 "DatasetDatastoreRecords",
32 "DatasetId",
33 "DatasetIdFactory",
34 "DatasetIdGenEnum",
35 "DatasetRef",
36 "SerializedDatasetRef",
37]
39import enum
40import sys
41import uuid
42from collections.abc import Iterable, Mapping
43from typing import TYPE_CHECKING, Any, ClassVar, Literal, Protocol, TypeAlias, runtime_checkable
45import pydantic
46from lsst.utils.classes import immutable
47from pydantic import StrictStr
49from ._config_support import LookupKey
50from ._dataset_type import DatasetType, SerializedDatasetType
51from ._named import NamedKeyDict
52from .datastore.stored_file_info import StoredDatastoreItemInfo
53from .dimensions import DataCoordinate, DimensionGraph, DimensionUniverse, SerializedDataCoordinate
54from .json import from_json_pydantic, to_json_pydantic
55from .persistence_context import PersistenceContextVars
57if TYPE_CHECKING:
58 from ._storage_class import StorageClass
59 from .registry import Registry
61# Per-dataset records grouped by opaque table name, usually there is just one
62# opaque table.
63DatasetDatastoreRecords: TypeAlias = Mapping[str, list[StoredDatastoreItemInfo]]
66class AmbiguousDatasetError(Exception):
67 """Raised when a `DatasetRef` is not resolved but should be.
69 This happens when the `DatasetRef` has no ID or run but the requested
70 operation requires one of them.
71 """
74@runtime_checkable
75class _DatasetRefGroupedIterable(Protocol):
76 """A package-private interface for iterables of `DatasetRef` that know how
77 to efficiently group their contents by `DatasetType`.
79 """
81 def _iter_by_dataset_type(self) -> Iterable[tuple[DatasetType, Iterable[DatasetRef]]]:
82 """Iterate over `DatasetRef` instances, one `DatasetType` at a time.
84 Returns
85 -------
86 grouped : `~collections.abc.Iterable` [ `tuple` [ `DatasetType`, \
87 `~collections.abc.Iterable` [ `DatasetRef` ]
88 An iterable of tuples, in which the first element is a dataset type
89 and the second is an iterable of `DatasetRef` objects with exactly
90 that dataset type.
91 """
92 ...
95class DatasetIdGenEnum(enum.Enum):
96 """Enum used to specify dataset ID generation options."""
98 UNIQUE = 0
99 """Unique mode generates unique ID for each inserted dataset, e.g.
100 auto-generated by database or random UUID.
101 """
103 DATAID_TYPE = 1
104 """In this mode ID is computed deterministically from a combination of
105 dataset type and dataId.
106 """
108 DATAID_TYPE_RUN = 2
109 """In this mode ID is computed deterministically from a combination of
110 dataset type, dataId, and run collection name.
111 """
114class DatasetIdFactory:
115 """Factory for dataset IDs (UUIDs).
117 For now the logic is hard-coded and is controlled by the user-provided
118 value of `DatasetIdGenEnum`. In the future we may implement a configurable
119 logic that can guess `DatasetIdGenEnum` value from other parameters.
120 """
122 NS_UUID = uuid.UUID("840b31d9-05cd-5161-b2c8-00d32b280d0f")
123 """Namespace UUID used for UUID5 generation. Do not change. This was
124 produced by `uuid.uuid5(uuid.NAMESPACE_DNS, "lsst.org")`.
125 """
127 def makeDatasetId(
128 self,
129 run: str,
130 datasetType: DatasetType,
131 dataId: DataCoordinate,
132 idGenerationMode: DatasetIdGenEnum,
133 ) -> uuid.UUID:
134 """Generate dataset ID for a dataset.
136 Parameters
137 ----------
138 run : `str`
139 Name of the RUN collection for the dataset.
140 datasetType : `DatasetType`
141 Dataset type.
142 dataId : `DataCoordinate`
143 Expanded data ID for the dataset.
144 idGenerationMode : `DatasetIdGenEnum`
145 ID generation option. `~DatasetIdGenEnum.UNIQUE` makes a random
146 UUID4-type ID. `~DatasetIdGenEnum.DATAID_TYPE` makes a
147 deterministic UUID5-type ID based on a dataset type name and
148 ``dataId``. `~DatasetIdGenEnum.DATAID_TYPE_RUN` makes a
149 deterministic UUID5-type ID based on a dataset type name, run
150 collection name, and ``dataId``.
152 Returns
153 -------
154 datasetId : `uuid.UUID`
155 Dataset identifier.
156 """
157 if idGenerationMode is DatasetIdGenEnum.UNIQUE:
158 return uuid.uuid4()
159 else:
160 # WARNING: If you modify this code make sure that the order of
161 # items in the `items` list below never changes.
162 items: list[tuple[str, str]] = []
163 if idGenerationMode is DatasetIdGenEnum.DATAID_TYPE:
164 items = [
165 ("dataset_type", datasetType.name),
166 ]
167 elif idGenerationMode is DatasetIdGenEnum.DATAID_TYPE_RUN:
168 items = [
169 ("dataset_type", datasetType.name),
170 ("run", run),
171 ]
172 else:
173 raise ValueError(f"Unexpected ID generation mode: {idGenerationMode}")
175 for name, value in sorted(dataId.required.items()):
176 items.append((name, str(value)))
177 data = ",".join(f"{key}={value}" for key, value in items)
178 return uuid.uuid5(self.NS_UUID, data)
181# This is constant, so don't recreate a set for each instance
182_serializedDatasetRefFieldsSet = {"id", "datasetType", "dataId", "run", "component"}
185class SerializedDatasetRef(pydantic.BaseModel):
186 """Simplified model of a `DatasetRef` suitable for serialization."""
188 id: uuid.UUID
189 datasetType: SerializedDatasetType | None = None
190 dataId: SerializedDataCoordinate | None = None
191 run: StrictStr | None = None
192 component: StrictStr | None = None
194 # Can not use "after" validator since in some cases the validator
195 # seems to trigger with the datasetType field not yet set.
196 @pydantic.model_validator(mode="before") # type: ignore[attr-defined]
197 @classmethod
198 def check_consistent_parameters(cls, data: dict[str, Any]) -> dict[str, Any]:
199 has_datasetType = data.get("datasetType") is not None
200 has_dataId = data.get("dataId") is not None
201 if has_datasetType is not has_dataId:
202 raise ValueError("If specifying datasetType or dataId, must specify both.")
204 if data.get("component") is not None and has_datasetType:
205 raise ValueError("datasetType can not be set if component is given.")
206 return data
208 @classmethod
209 def direct(
210 cls,
211 *,
212 id: str,
213 run: str,
214 datasetType: dict[str, Any] | None = None,
215 dataId: dict[str, Any] | None = None,
216 component: str | None = None,
217 ) -> SerializedDatasetRef:
218 """Construct a `SerializedDatasetRef` directly without validators.
220 Parameters
221 ----------
222 id : `str`
223 The UUID in string form.
224 run : `str`
225 The run for this dataset.
226 datasetType : `dict` [`str`, `typing.Any`]
227 A representation of the dataset type.
228 dataId : `dict` [`str`, `typing.Any`]
229 A representation of the data ID.
230 component : `str` or `None`
231 Any component associated with this ref.
233 Returns
234 -------
235 serialized : `SerializedDatasetRef`
236 A Pydantic model representing the given parameters.
238 Notes
239 -----
240 This differs from the pydantic "construct" method in that the arguments
241 are explicitly what the model requires, and it will recurse through
242 members, constructing them from their corresponding `direct` methods.
244 The ``id`` parameter is a string representation of dataset ID, it is
245 converted to UUID by this method.
247 This method should only be called when the inputs are trusted.
248 """
249 serialized_datasetType = (
250 SerializedDatasetType.direct(**datasetType) if datasetType is not None else None
251 )
252 serialized_dataId = SerializedDataCoordinate.direct(**dataId) if dataId is not None else None
254 node = cls.model_construct(
255 _fields_set=_serializedDatasetRefFieldsSet,
256 id=uuid.UUID(id),
257 datasetType=serialized_datasetType,
258 dataId=serialized_dataId,
259 run=sys.intern(run),
260 component=component,
261 )
263 return node
266DatasetId: TypeAlias = uuid.UUID
267"""A type-annotation alias for dataset ID providing typing flexibility.
268"""
271@immutable
272class DatasetRef:
273 """Reference to a Dataset in a `Registry`.
275 A `DatasetRef` may point to a Dataset that currently does not yet exist
276 (e.g., because it is a predicted input for provenance).
278 Parameters
279 ----------
280 datasetType : `DatasetType`
281 The `DatasetType` for this Dataset.
282 dataId : `DataCoordinate`
283 A mapping of dimensions that labels the Dataset within a Collection.
284 run : `str`
285 The name of the run this dataset was associated with when it was
286 created.
287 id : `DatasetId`, optional
288 The unique identifier assigned when the dataset is created. If ``id``
289 is not specified, a new unique ID will be created.
290 conform : `bool`, optional
291 If `True` (default), call `DataCoordinate.standardize` to ensure that
292 the data ID's dimensions are consistent with the dataset type's.
293 `DatasetRef` instances for which those dimensions are not equal should
294 not be created in new code, but are still supported for backwards
295 compatibility. New code should only pass `False` if it can guarantee
296 that the dimensions are already consistent.
297 id_generation_mode : `DatasetIdGenEnum`
298 ID generation option. `~DatasetIdGenEnum.UNIQUE` makes a random
299 UUID4-type ID. `~DatasetIdGenEnum.DATAID_TYPE` makes a
300 deterministic UUID5-type ID based on a dataset type name and
301 ``dataId``. `~DatasetIdGenEnum.DATAID_TYPE_RUN` makes a
302 deterministic UUID5-type ID based on a dataset type name, run
303 collection name, and ``dataId``.
304 datastore_records : `DatasetDatastoreRecords` or `None`
305 Datastore records to attach.
307 Notes
308 -----
309 See also :ref:`daf_butler_organizing_datasets`
310 """
312 _serializedType: ClassVar[type[pydantic.BaseModel]] = SerializedDatasetRef
313 __slots__ = (
314 "_id",
315 "datasetType",
316 "dataId",
317 "run",
318 "_datastore_records",
319 )
321 def __init__(
322 self,
323 datasetType: DatasetType,
324 dataId: DataCoordinate,
325 run: str,
326 *,
327 id: DatasetId | None = None,
328 conform: bool = True,
329 id_generation_mode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE,
330 datastore_records: DatasetDatastoreRecords | None = None,
331 ):
332 self.datasetType = datasetType
333 if conform:
334 self.dataId = DataCoordinate.standardize(dataId, dimensions=datasetType.dimensions)
335 else:
336 self.dataId = dataId
337 self.run = run
338 if id is not None:
339 self._id = id.int
340 else:
341 self._id = (
342 DatasetIdFactory()
343 .makeDatasetId(self.run, self.datasetType, self.dataId, id_generation_mode)
344 .int
345 )
346 self._datastore_records = datastore_records
348 @property
349 def id(self) -> DatasetId:
350 """Primary key of the dataset (`DatasetId`).
352 Cannot be changed after a `DatasetRef` is constructed.
353 """
354 return uuid.UUID(int=self._id)
356 def __eq__(self, other: Any) -> bool:
357 try:
358 return (self.datasetType, self.dataId, self.id) == (other.datasetType, other.dataId, other.id)
359 except AttributeError:
360 return NotImplemented
362 def __hash__(self) -> int:
363 return hash((self.datasetType, self.dataId, self.id))
365 @property
366 def dimensions(self) -> DimensionGraph:
367 """Dimensions associated with the underlying `DatasetType`."""
368 return self.datasetType.dimensions
370 def __repr__(self) -> str:
371 # We delegate to __str__ (i.e use "!s") for the data ID) below because
372 # DataCoordinate's __repr__ - while adhering to the guidelines for
373 # __repr__ - is much harder to users to read, while its __str__ just
374 # produces a dict that can also be passed to DatasetRef's constructor.
375 return f"DatasetRef({self.datasetType!r}, {self.dataId!s}, run={self.run!r}, id={self.id})"
377 def __str__(self) -> str:
378 s = (
379 f"{self.datasetType.name}@{self.dataId!s} [sc={self.datasetType.storageClass_name}]"
380 f" (run={self.run} id={self.id})"
381 )
382 return s
384 def __lt__(self, other: Any) -> bool:
385 # Sort by run, DatasetType name and then by DataCoordinate
386 # The __str__ representation is probably close enough but we
387 # need to ensure that sorting a DatasetRef matches what you would
388 # get if you sorted DatasetType+DataCoordinate
389 if not isinstance(other, type(self)):
390 return NotImplemented
392 # Group by run if defined, takes precedence over DatasetType
393 self_run = "" if self.run is None else self.run
394 other_run = "" if other.run is None else other.run
396 # Compare tuples in the priority order
397 return (self_run, self.datasetType, self.dataId) < (other_run, other.datasetType, other.dataId)
399 def to_simple(self, minimal: bool = False) -> SerializedDatasetRef:
400 """Convert this class to a simple python type.
402 This makes it suitable for serialization.
404 Parameters
405 ----------
406 minimal : `bool`, optional
407 Use minimal serialization. Requires Registry to convert
408 back to a full type.
410 Returns
411 -------
412 simple : `dict` or `int`
413 The object converted to a dictionary.
414 """
415 if minimal:
416 # The only thing needed to uniquely define a DatasetRef is its id
417 # so that can be used directly if it is not a component DatasetRef.
418 # Store is in a dict to allow us to easily add the planned origin
419 # information later without having to support an int and dict in
420 # simple form.
421 simple: dict[str, Any] = {"id": self.id}
422 if self.isComponent():
423 # We can still be a little minimalist with a component
424 # but we will also need to record the datasetType component
425 simple["component"] = self.datasetType.component()
426 return SerializedDatasetRef(**simple)
428 return SerializedDatasetRef(
429 datasetType=self.datasetType.to_simple(minimal=minimal),
430 dataId=self.dataId.to_simple(),
431 run=self.run,
432 id=self.id,
433 )
435 @classmethod
436 def from_simple(
437 cls,
438 simple: SerializedDatasetRef,
439 universe: DimensionUniverse | None = None,
440 registry: Registry | None = None,
441 datasetType: DatasetType | None = None,
442 ) -> DatasetRef:
443 """Construct a new object from simplified form.
445 Generally this is data returned from the `to_simple` method.
447 Parameters
448 ----------
449 simple : `dict` of [`str`, `Any`]
450 The value returned by `to_simple()`.
451 universe : `DimensionUniverse`
452 The special graph of all known dimensions.
453 Can be `None` if a registry is provided.
454 registry : `lsst.daf.butler.Registry`, optional
455 Registry to use to convert simple form of a DatasetRef to
456 a full `DatasetRef`. Can be `None` if a full description of
457 the type is provided along with a universe.
458 datasetType : DatasetType, optional
459 If datasetType is supplied, this will be used as the datasetType
460 object in the resulting DatasetRef instead of being read from
461 the `SerializedDatasetRef`. This is useful when many refs share
462 the same type as memory can be saved. Defaults to None.
464 Returns
465 -------
466 ref : `DatasetRef`
467 Newly-constructed object.
468 """
469 cache = PersistenceContextVars.datasetRefs.get()
470 key = simple.id.int
471 if cache is not None and (ref := cache.get(key, None)) is not None:
472 if datasetType is not None:
473 if (component := datasetType.component()) is not None:
474 ref = ref.makeComponentRef(component)
475 ref = ref.overrideStorageClass(datasetType.storageClass_name)
476 return ref
477 if simple.datasetType is not None:
478 _, component = DatasetType.splitDatasetTypeName(simple.datasetType.name)
479 if component is not None:
480 ref = ref.makeComponentRef(component)
481 if simple.datasetType.storageClass is not None:
482 ref = ref.overrideStorageClass(simple.datasetType.storageClass)
483 return ref
484 # If dataset type is not given ignore the cache, because we can't
485 # reliably return the right storage class.
486 # Minimalist component will just specify component and id and
487 # require registry to reconstruct
488 if simple.datasetType is None and simple.dataId is None and simple.run is None:
489 if registry is None:
490 raise ValueError("Registry is required to construct component DatasetRef from integer id")
491 if simple.id is None:
492 raise ValueError("For minimal DatasetRef the ID must be defined.")
493 ref = registry.getDataset(simple.id)
494 if ref is None:
495 raise RuntimeError(f"No matching dataset found in registry for id {simple.id}")
496 if simple.component:
497 ref = ref.makeComponentRef(simple.component)
498 else:
499 if universe is None:
500 if registry is None:
501 raise ValueError("One of universe or registry must be provided.")
502 universe = registry.dimensions
503 if datasetType is None:
504 if simple.datasetType is None:
505 raise ValueError("Cannot determine Dataset type of this serialized class")
506 datasetType = DatasetType.from_simple(
507 simple.datasetType, universe=universe, registry=registry
508 )
509 if simple.dataId is None:
510 # mypy
511 raise ValueError("The DataId must be specified to construct a DatasetRef")
512 dataId = DataCoordinate.from_simple(simple.dataId, universe=universe)
513 # Check that simple ref is resolved.
514 if simple.run is None:
515 dstr = ""
516 if simple.datasetType is None:
517 dstr = f" (datasetType={datasetType.name!r})"
518 raise ValueError(
519 "Run collection name is missing from serialized representation. "
520 f"Encountered with {simple!r}{dstr}."
521 )
522 ref = cls(
523 datasetType,
524 dataId,
525 id=simple.id,
526 run=simple.run,
527 )
528 if cache is not None:
529 if ref.datasetType.component() is not None:
530 cache[key] = ref.makeCompositeRef()
531 else:
532 cache[key] = ref
533 return ref
535 to_json = to_json_pydantic
536 from_json: ClassVar = classmethod(from_json_pydantic)
538 @classmethod
539 def _unpickle(
540 cls,
541 datasetType: DatasetType,
542 dataId: DataCoordinate,
543 id: DatasetId,
544 run: str,
545 datastore_records: DatasetDatastoreRecords | None,
546 ) -> DatasetRef:
547 """Create new `DatasetRef`.
549 A custom factory method for use by `__reduce__` as a workaround for
550 its lack of support for keyword arguments.
551 """
552 return cls(datasetType, dataId, id=id, run=run, datastore_records=datastore_records)
554 def __reduce__(self) -> tuple:
555 return (
556 self._unpickle,
557 (self.datasetType, self.dataId, self.id, self.run, self._datastore_records),
558 )
560 def __deepcopy__(self, memo: dict) -> DatasetRef:
561 # DatasetRef is recursively immutable; see note in @immutable
562 # decorator.
563 return self
565 def expanded(self, dataId: DataCoordinate) -> DatasetRef:
566 """Return a new `DatasetRef` with the given expanded data ID.
568 Parameters
569 ----------
570 dataId : `DataCoordinate`
571 Data ID for the new `DatasetRef`. Must compare equal to the
572 original data ID.
574 Returns
575 -------
576 ref : `DatasetRef`
577 A new `DatasetRef` with the given data ID.
578 """
579 assert dataId == self.dataId
580 return DatasetRef(
581 datasetType=self.datasetType,
582 dataId=dataId,
583 id=self.id,
584 run=self.run,
585 conform=False,
586 datastore_records=self._datastore_records,
587 )
589 def isComponent(self) -> bool:
590 """Indicate whether this `DatasetRef` refers to a component.
592 Returns
593 -------
594 isComponent : `bool`
595 `True` if this `DatasetRef` is a component, `False` otherwise.
596 """
597 return self.datasetType.isComponent()
599 def isComposite(self) -> bool:
600 """Boolean indicating whether this `DatasetRef` is a composite type.
602 Returns
603 -------
604 isComposite : `bool`
605 `True` if this `DatasetRef` is a composite type, `False`
606 otherwise.
607 """
608 return self.datasetType.isComposite()
610 def _lookupNames(self) -> tuple[LookupKey, ...]:
611 """Name keys to use when looking up this DatasetRef in a configuration.
613 The names are returned in order of priority.
615 Returns
616 -------
617 names : `tuple` of `LookupKey`
618 Tuple of the `DatasetType` name and the `StorageClass` name.
619 If ``instrument`` is defined in the dataId, each of those names
620 is added to the start of the tuple with a key derived from the
621 value of ``instrument``.
622 """
623 # Special case the instrument Dimension since we allow configs
624 # to include the instrument name in the hierarchy.
625 names: tuple[LookupKey, ...] = self.datasetType._lookupNames()
627 if "instrument" in self.dataId:
628 names = tuple(n.clone(dataId={"instrument": self.dataId["instrument"]}) for n in names) + names
630 return names
632 @staticmethod
633 def groupByType(refs: Iterable[DatasetRef]) -> NamedKeyDict[DatasetType, list[DatasetRef]]:
634 """Group an iterable of `DatasetRef` by `DatasetType`.
636 Parameters
637 ----------
638 refs : `~collections.abc.Iterable` [ `DatasetRef` ]
639 `DatasetRef` instances to group.
641 Returns
642 -------
643 grouped : `NamedKeyDict` [ `DatasetType`, `list` [ `DatasetRef` ] ]
644 Grouped `DatasetRef` instances.
646 Notes
647 -----
648 When lazy item-iterables are acceptable instead of a full mapping,
649 `iter_by_type` can in some cases be far more efficient.
650 """
651 result: NamedKeyDict[DatasetType, list[DatasetRef]] = NamedKeyDict()
652 for ref in refs:
653 result.setdefault(ref.datasetType, []).append(ref)
654 return result
656 @staticmethod
657 def iter_by_type(
658 refs: Iterable[DatasetRef],
659 ) -> Iterable[tuple[DatasetType, Iterable[DatasetRef]]]:
660 """Group an iterable of `DatasetRef` by `DatasetType` with special
661 hooks for custom iterables that can do this efficiently.
663 Parameters
664 ----------
665 refs : `~collections.abc.Iterable` [ `DatasetRef` ]
666 `DatasetRef` instances to group. If this satisfies the
667 `_DatasetRefGroupedIterable` protocol, its
668 `~_DatasetRefGroupedIterable._iter_by_dataset_type` method will
669 be called.
671 Returns
672 -------
673 grouped : `~collections.abc.Iterable` [ `tuple` [ `DatasetType`, \
674 `Iterable` [ `DatasetRef` ] ]]
675 Grouped `DatasetRef` instances.
676 """
677 if isinstance(refs, _DatasetRefGroupedIterable):
678 return refs._iter_by_dataset_type()
679 return DatasetRef.groupByType(refs).items()
681 def makeCompositeRef(self) -> DatasetRef:
682 """Create a `DatasetRef` of the composite from a component ref.
684 Requires that this `DatasetRef` is a component.
686 Returns
687 -------
688 ref : `DatasetRef`
689 A `DatasetRef` with a dataset type that corresponds to the
690 composite parent of this component, and the same ID and run
691 (which may be `None`, if they are `None` in ``self``).
692 """
693 # Assume that the data ID does not need to be standardized
694 # and should match whatever this ref already has.
695 return DatasetRef(
696 self.datasetType.makeCompositeDatasetType(),
697 self.dataId,
698 id=self.id,
699 run=self.run,
700 conform=False,
701 datastore_records=self._datastore_records,
702 )
704 def makeComponentRef(self, name: str) -> DatasetRef:
705 """Create a `DatasetRef` that corresponds to a component.
707 Parameters
708 ----------
709 name : `str`
710 Name of the component.
712 Returns
713 -------
714 ref : `DatasetRef`
715 A `DatasetRef` with a dataset type that corresponds to the given
716 component, and the same ID and run
717 (which may be `None`, if they are `None` in ``self``).
718 """
719 # Assume that the data ID does not need to be standardized
720 # and should match whatever this ref already has.
721 return DatasetRef(
722 self.datasetType.makeComponentDatasetType(name),
723 self.dataId,
724 id=self.id,
725 run=self.run,
726 conform=False,
727 datastore_records=self._datastore_records,
728 )
730 def overrideStorageClass(self, storageClass: str | StorageClass) -> DatasetRef:
731 """Create a new `DatasetRef` from this one, but with a modified
732 `DatasetType` that has a different `StorageClass`.
734 Parameters
735 ----------
736 storageClass : `str` or `StorageClass`
737 The new storage class.
739 Returns
740 -------
741 modified : `DatasetRef`
742 A new dataset reference that is the same as the current one but
743 with a different storage class in the `DatasetType`.
744 """
745 return self.replace(storage_class=storageClass)
747 def replace(
748 self,
749 *,
750 id: DatasetId | None = None,
751 run: str | None = None,
752 storage_class: str | StorageClass | None = None,
753 datastore_records: DatasetDatastoreRecords | None | Literal[False] = False,
754 ) -> DatasetRef:
755 """Create a new `DatasetRef` from this one, but with some modified
756 attributes.
758 Parameters
759 ----------
760 id : `DatasetId` or `None`
761 If not `None` then update dataset ID.
762 run : `str` or `None`
763 If not `None` then update run collection name. If ``dataset_id`` is
764 `None` then this will also cause new dataset ID to be generated.
765 storage_class : `str` or `StorageClass` or `None`
766 The new storage class. If not `None`, replaces existing storage
767 class.
768 datastore_records : `DatasetDatastoreRecords` or `None`
769 New datastore records. If `None` remove all records. By default
770 datastore records are preserved.
772 Returns
773 -------
774 modified : `DatasetRef`
775 A new dataset reference with updated attributes.
776 """
777 if datastore_records is False:
778 datastore_records = self._datastore_records
779 if storage_class is None:
780 datasetType = self.datasetType
781 else:
782 datasetType = self.datasetType.overrideStorageClass(storage_class)
783 if run is None:
784 run = self.run
785 # Do not regenerate dataset ID if run is the same.
786 if id is None:
787 id = self.id
788 return DatasetRef(
789 datasetType=datasetType,
790 dataId=self.dataId,
791 run=run,
792 id=id,
793 conform=False,
794 datastore_records=datastore_records,
795 )
797 def is_compatible_with(self, other: DatasetRef) -> bool:
798 """Determine if the given `DatasetRef` is compatible with this one.
800 Parameters
801 ----------
802 other : `DatasetRef`
803 Dataset ref to check.
805 Returns
806 -------
807 is_compatible : `bool`
808 Returns `True` if the other dataset ref is either the same as this
809 or the dataset type associated with the other is compatible with
810 this one and the dataId and dataset ID match.
812 Notes
813 -----
814 Compatibility requires that the dataId and dataset ID match and the
815 `DatasetType` is compatible. Compatibility is defined as the storage
816 class associated with the dataset type of the other ref can be
817 converted to this storage class.
819 Specifically this means that if you have done:
821 .. code-block:: py
823 new_ref = ref.overrideStorageClass(sc)
825 and this is successful, then the guarantee is that:
827 .. code-block:: py
829 assert ref.is_compatible_with(new_ref) is True
831 since we know that the python type associated with the new ref can
832 be converted to the original python type. The reverse is not guaranteed
833 and depends on whether bidirectional converters have been registered.
834 """
835 if self.id != other.id:
836 return False
837 if self.dataId != other.dataId:
838 return False
839 if self.run != other.run:
840 return False
841 return self.datasetType.is_compatible_with(other.datasetType)
843 datasetType: DatasetType
844 """The definition of this dataset (`DatasetType`).
846 Cannot be changed after a `DatasetRef` is constructed.
847 """
849 dataId: DataCoordinate
850 """A mapping of `Dimension` primary key values that labels the dataset
851 within a Collection (`DataCoordinate`).
853 Cannot be changed after a `DatasetRef` is constructed.
854 """
856 run: str
857 """The name of the run that produced the dataset.
859 Cannot be changed after a `DatasetRef` is constructed.
860 """
862 datastore_records: DatasetDatastoreRecords | None
863 """Optional datastore records (`DatasetDatastoreRecords`).
865 Cannot be changed after a `DatasetRef` is constructed.
866 """