Coverage for python/lsst/daf/butler/_dataset_ref.py: 33%
237 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-27 09:44 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-27 09:44 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
27from __future__ import annotations
29__all__ = [
30 "AmbiguousDatasetError",
31 "DatasetDatastoreRecords",
32 "DatasetId",
33 "DatasetIdFactory",
34 "DatasetIdGenEnum",
35 "DatasetRef",
36 "SerializedDatasetRef",
37]
39import enum
40import sys
41import uuid
42from collections.abc import Iterable, Mapping
43from typing import TYPE_CHECKING, Any, ClassVar, Literal, Protocol, TypeAlias, runtime_checkable
45import pydantic
46from lsst.daf.butler._compat import PYDANTIC_V2, _BaseModelCompat
47from lsst.utils.classes import immutable
48from pydantic import StrictStr
50from ._config_support import LookupKey
51from ._dataset_type import DatasetType, SerializedDatasetType
52from ._named import NamedKeyDict
53from .datastore.stored_file_info import StoredDatastoreItemInfo
54from .dimensions import DataCoordinate, DimensionGraph, DimensionUniverse, SerializedDataCoordinate
55from .json import from_json_pydantic, to_json_pydantic
56from .persistence_context import PersistenceContextVars
58if TYPE_CHECKING:
59 from ._storage_class import StorageClass
60 from .registry import Registry
62# Per-dataset records grouped by opaque table name, usually there is just one
63# opaque table.
64DatasetDatastoreRecords: TypeAlias = Mapping[str, Iterable[StoredDatastoreItemInfo]]
67class AmbiguousDatasetError(Exception):
68 """Raised when a `DatasetRef` is not resolved but should be.
70 This happens when the `DatasetRef` has no ID or run but the requested
71 operation requires one of them.
72 """
75@runtime_checkable
76class _DatasetRefGroupedIterable(Protocol):
77 """A package-private interface for iterables of `DatasetRef` that know how
78 to efficiently group their contents by `DatasetType`.
80 """
82 def _iter_by_dataset_type(self) -> Iterable[tuple[DatasetType, Iterable[DatasetRef]]]:
83 """Iterate over `DatasetRef` instances, one `DatasetType` at a time.
85 Returns
86 -------
87 grouped : `~collections.abc.Iterable` [ `tuple` [ `DatasetType`, \
88 `~collections.abc.Iterable` [ `DatasetRef` ]
89 An iterable of tuples, in which the first element is a dataset type
90 and the second is an iterable of `DatasetRef` objects with exactly
91 that dataset type.
92 """
93 ...
96class DatasetIdGenEnum(enum.Enum):
97 """Enum used to specify dataset ID generation options."""
99 UNIQUE = 0
100 """Unique mode generates unique ID for each inserted dataset, e.g.
101 auto-generated by database or random UUID.
102 """
104 DATAID_TYPE = 1
105 """In this mode ID is computed deterministically from a combination of
106 dataset type and dataId.
107 """
109 DATAID_TYPE_RUN = 2
110 """In this mode ID is computed deterministically from a combination of
111 dataset type, dataId, and run collection name.
112 """
115class DatasetIdFactory:
116 """Factory for dataset IDs (UUIDs).
118 For now the logic is hard-coded and is controlled by the user-provided
119 value of `DatasetIdGenEnum`. In the future we may implement a configurable
120 logic that can guess `DatasetIdGenEnum` value from other parameters.
121 """
123 NS_UUID = uuid.UUID("840b31d9-05cd-5161-b2c8-00d32b280d0f")
124 """Namespace UUID used for UUID5 generation. Do not change. This was
125 produced by `uuid.uuid5(uuid.NAMESPACE_DNS, "lsst.org")`.
126 """
128 def makeDatasetId(
129 self,
130 run: str,
131 datasetType: DatasetType,
132 dataId: DataCoordinate,
133 idGenerationMode: DatasetIdGenEnum,
134 ) -> uuid.UUID:
135 """Generate dataset ID for a dataset.
137 Parameters
138 ----------
139 run : `str`
140 Name of the RUN collection for the dataset.
141 datasetType : `DatasetType`
142 Dataset type.
143 dataId : `DataCoordinate`
144 Expanded data ID for the dataset.
145 idGenerationMode : `DatasetIdGenEnum`
146 ID generation option. `~DatasetIdGenEnum.UNIQUE` makes a random
147 UUID4-type ID. `~DatasetIdGenEnum.DATAID_TYPE` makes a
148 deterministic UUID5-type ID based on a dataset type name and
149 ``dataId``. `~DatasetIdGenEnum.DATAID_TYPE_RUN` makes a
150 deterministic UUID5-type ID based on a dataset type name, run
151 collection name, and ``dataId``.
153 Returns
154 -------
155 datasetId : `uuid.UUID`
156 Dataset identifier.
157 """
158 if idGenerationMode is DatasetIdGenEnum.UNIQUE:
159 return uuid.uuid4()
160 else:
161 # WARNING: If you modify this code make sure that the order of
162 # items in the `items` list below never changes.
163 items: list[tuple[str, str]] = []
164 if idGenerationMode is DatasetIdGenEnum.DATAID_TYPE:
165 items = [
166 ("dataset_type", datasetType.name),
167 ]
168 elif idGenerationMode is DatasetIdGenEnum.DATAID_TYPE_RUN:
169 items = [
170 ("dataset_type", datasetType.name),
171 ("run", run),
172 ]
173 else:
174 raise ValueError(f"Unexpected ID generation mode: {idGenerationMode}")
176 for name, value in sorted(dataId.byName().items()):
177 items.append((name, str(value)))
178 data = ",".join(f"{key}={value}" for key, value in items)
179 return uuid.uuid5(self.NS_UUID, data)
182# This is constant, so don't recreate a set for each instance
183_serializedDatasetRefFieldsSet = {"id", "datasetType", "dataId", "run", "component"}
186class SerializedDatasetRef(_BaseModelCompat):
187 """Simplified model of a `DatasetRef` suitable for serialization."""
189 id: uuid.UUID
190 datasetType: SerializedDatasetType | None = None
191 dataId: SerializedDataCoordinate | None = None
192 run: StrictStr | None = None
193 component: StrictStr | None = None
195 if PYDANTIC_V2: 195 ↛ 198line 195 didn't jump to line 198, because the condition on line 195 was never true
196 # Can not use "after" validator since in some cases the validator
197 # seems to trigger with the datasetType field not yet set.
198 @pydantic.model_validator(mode="before") # type: ignore[attr-defined]
199 @classmethod
200 def check_consistent_parameters(cls, data: dict[str, Any]) -> dict[str, Any]:
201 has_datasetType = data.get("datasetType") is not None
202 has_dataId = data.get("dataId") is not None
203 if has_datasetType is not has_dataId:
204 raise ValueError("If specifying datasetType or dataId, must specify both.")
206 if data.get("component") is not None and has_datasetType:
207 raise ValueError("datasetType can not be set if component is given.")
208 return data
210 else:
212 @pydantic.validator("dataId")
213 def _check_dataId(cls, v: Any, values: dict[str, Any]) -> Any: # noqa: N805
214 if v and (d := "datasetType") in values and values[d] is None:
215 raise ValueError("Can not specify 'dataId' without specifying 'datasetType'")
216 return v
218 @pydantic.validator("component")
219 def _check_component(cls, v: Any, values: dict[str, Any]) -> Any: # noqa: N805
220 # Component should not be given if datasetType is given
221 if v and (d := "datasetType") in values and values[d] is not None:
222 raise ValueError(f"datasetType ({values[d]}) can not be set if component is given ({v}).")
223 return v
225 @classmethod
226 def direct(
227 cls,
228 *,
229 id: str,
230 run: str,
231 datasetType: dict[str, Any] | None = None,
232 dataId: dict[str, Any] | None = None,
233 component: str | None = None,
234 ) -> SerializedDatasetRef:
235 """Construct a `SerializedDatasetRef` directly without validators.
237 Notes
238 -----
239 This differs from the pydantic "construct" method in that the arguments
240 are explicitly what the model requires, and it will recurse through
241 members, constructing them from their corresponding `direct` methods.
243 The ``id`` parameter is a string representation of dataset ID, it is
244 converted to UUID by this method.
246 This method should only be called when the inputs are trusted.
247 """
248 serialized_datasetType = (
249 SerializedDatasetType.direct(**datasetType) if datasetType is not None else None
250 )
251 serialized_dataId = SerializedDataCoordinate.direct(**dataId) if dataId is not None else None
253 node = cls.model_construct(
254 _fields_set=_serializedDatasetRefFieldsSet,
255 id=uuid.UUID(id),
256 datasetType=serialized_datasetType,
257 dataId=serialized_dataId,
258 run=sys.intern(run),
259 component=component,
260 )
262 return node
265DatasetId: TypeAlias = uuid.UUID
266"""A type-annotation alias for dataset ID providing typing flexibility.
267"""
270@immutable
271class DatasetRef:
272 """Reference to a Dataset in a `Registry`.
274 A `DatasetRef` may point to a Dataset that currently does not yet exist
275 (e.g., because it is a predicted input for provenance).
277 Parameters
278 ----------
279 datasetType : `DatasetType`
280 The `DatasetType` for this Dataset.
281 dataId : `DataCoordinate`
282 A mapping of dimensions that labels the Dataset within a Collection.
283 run : `str`
284 The name of the run this dataset was associated with when it was
285 created.
286 id : `DatasetId`, optional
287 The unique identifier assigned when the dataset is created. If ``id``
288 is not specified, a new unique ID will be created.
289 conform : `bool`, optional
290 If `True` (default), call `DataCoordinate.standardize` to ensure that
291 the data ID's dimensions are consistent with the dataset type's.
292 `DatasetRef` instances for which those dimensions are not equal should
293 not be created in new code, but are still supported for backwards
294 compatibility. New code should only pass `False` if it can guarantee
295 that the dimensions are already consistent.
296 id_generation_mode : `DatasetIdGenEnum`
297 ID generation option. `~DatasetIdGenEnum.UNIQUE` makes a random
298 UUID4-type ID. `~DatasetIdGenEnum.DATAID_TYPE` makes a
299 deterministic UUID5-type ID based on a dataset type name and
300 ``dataId``. `~DatasetIdGenEnum.DATAID_TYPE_RUN` makes a
301 deterministic UUID5-type ID based on a dataset type name, run
302 collection name, and ``dataId``.
304 See Also
305 --------
306 :ref:`daf_butler_organizing_datasets`
307 """
309 _serializedType = SerializedDatasetRef
310 __slots__ = (
311 "_id",
312 "datasetType",
313 "dataId",
314 "run",
315 "_datastore_records",
316 )
318 def __init__(
319 self,
320 datasetType: DatasetType,
321 dataId: DataCoordinate,
322 run: str,
323 *,
324 id: DatasetId | None = None,
325 conform: bool = True,
326 id_generation_mode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE,
327 datastore_records: DatasetDatastoreRecords | None = None,
328 ):
329 self.datasetType = datasetType
330 if conform:
331 self.dataId = DataCoordinate.standardize(dataId, graph=datasetType.dimensions)
332 else:
333 self.dataId = dataId
334 self.run = run
335 if id is not None:
336 self._id = id.int
337 else:
338 self._id = (
339 DatasetIdFactory()
340 .makeDatasetId(self.run, self.datasetType, self.dataId, id_generation_mode)
341 .int
342 )
343 self._datastore_records = datastore_records
345 @property
346 def id(self) -> DatasetId:
347 """Primary key of the dataset (`DatasetId`).
349 Cannot be changed after a `DatasetRef` is constructed.
350 """
351 return uuid.UUID(int=self._id)
353 def __eq__(self, other: Any) -> bool:
354 try:
355 return (self.datasetType, self.dataId, self.id) == (other.datasetType, other.dataId, other.id)
356 except AttributeError:
357 return NotImplemented
359 def __hash__(self) -> int:
360 return hash((self.datasetType, self.dataId, self.id))
362 @property
363 def dimensions(self) -> DimensionGraph:
364 """Dimensions associated with the underlying `DatasetType`."""
365 return self.datasetType.dimensions
367 def __repr__(self) -> str:
368 # We delegate to __str__ (i.e use "!s") for the data ID) below because
369 # DataCoordinate's __repr__ - while adhering to the guidelines for
370 # __repr__ - is much harder to users to read, while its __str__ just
371 # produces a dict that can also be passed to DatasetRef's constructor.
372 return f"DatasetRef({self.datasetType!r}, {self.dataId!s}, run={self.run!r}, id={self.id})"
374 def __str__(self) -> str:
375 s = (
376 f"{self.datasetType.name}@{self.dataId!s} [sc={self.datasetType.storageClass_name}]"
377 f" (run={self.run} id={self.id})"
378 )
379 return s
381 def __lt__(self, other: Any) -> bool:
382 # Sort by run, DatasetType name and then by DataCoordinate
383 # The __str__ representation is probably close enough but we
384 # need to ensure that sorting a DatasetRef matches what you would
385 # get if you sorted DatasetType+DataCoordinate
386 if not isinstance(other, type(self)):
387 return NotImplemented
389 # Group by run if defined, takes precedence over DatasetType
390 self_run = "" if self.run is None else self.run
391 other_run = "" if other.run is None else other.run
393 # Compare tuples in the priority order
394 return (self_run, self.datasetType, self.dataId) < (other_run, other.datasetType, other.dataId)
396 def to_simple(self, minimal: bool = False) -> SerializedDatasetRef:
397 """Convert this class to a simple python type.
399 This makes it suitable for serialization.
401 Parameters
402 ----------
403 minimal : `bool`, optional
404 Use minimal serialization. Requires Registry to convert
405 back to a full type.
407 Returns
408 -------
409 simple : `dict` or `int`
410 The object converted to a dictionary.
411 """
412 if minimal:
413 # The only thing needed to uniquely define a DatasetRef is its id
414 # so that can be used directly if it is not a component DatasetRef.
415 # Store is in a dict to allow us to easily add the planned origin
416 # information later without having to support an int and dict in
417 # simple form.
418 simple: dict[str, Any] = {"id": self.id}
419 if self.isComponent():
420 # We can still be a little minimalist with a component
421 # but we will also need to record the datasetType component
422 simple["component"] = self.datasetType.component()
423 return SerializedDatasetRef(**simple)
425 return SerializedDatasetRef(
426 datasetType=self.datasetType.to_simple(minimal=minimal),
427 dataId=self.dataId.to_simple(),
428 run=self.run,
429 id=self.id,
430 )
432 @classmethod
433 def from_simple(
434 cls,
435 simple: SerializedDatasetRef,
436 universe: DimensionUniverse | None = None,
437 registry: Registry | None = None,
438 datasetType: DatasetType | None = None,
439 ) -> DatasetRef:
440 """Construct a new object from simplified form.
442 Generally this is data returned from the `to_simple` method.
444 Parameters
445 ----------
446 simple : `dict` of [`str`, `Any`]
447 The value returned by `to_simple()`.
448 universe : `DimensionUniverse`
449 The special graph of all known dimensions.
450 Can be `None` if a registry is provided.
451 registry : `lsst.daf.butler.Registry`, optional
452 Registry to use to convert simple form of a DatasetRef to
453 a full `DatasetRef`. Can be `None` if a full description of
454 the type is provided along with a universe.
455 datasetType : DatasetType, optional
456 If datasetType is supplied, this will be used as the datasetType
457 object in the resulting DatasetRef instead of being read from
458 the `SerializedDatasetRef`. This is useful when many refs share
459 the same type as memory can be saved. Defaults to None.
461 Returns
462 -------
463 ref : `DatasetRef`
464 Newly-constructed object.
465 """
466 cache = PersistenceContextVars.datasetRefs.get()
467 localName = sys.intern(
468 datasetType.name
469 if datasetType is not None
470 else (x.name if (x := simple.datasetType) is not None else "")
471 )
472 key = (simple.id.int, localName)
473 if cache is not None and (cachedRef := cache.get(key, None)) is not None:
474 return cachedRef
475 # Minimalist component will just specify component and id and
476 # require registry to reconstruct
477 if not (simple.datasetType is not None or simple.dataId is not None or simple.run is not None):
478 if registry is None:
479 raise ValueError("Registry is required to construct component DatasetRef from integer id")
480 if simple.id is None:
481 raise ValueError("For minimal DatasetRef the ID must be defined.")
482 ref = registry.getDataset(simple.id)
483 if ref is None:
484 raise RuntimeError(f"No matching dataset found in registry for id {simple.id}")
485 if simple.component:
486 ref = ref.makeComponentRef(simple.component)
487 if cache is not None:
488 cache[key] = ref
489 return ref
491 if universe is None and registry is None:
492 raise ValueError("One of universe or registry must be provided.")
494 if universe is None and registry is not None:
495 universe = registry.dimensions
497 if universe is None:
498 # this is for mypy
499 raise ValueError("Unable to determine a usable universe")
501 if simple.datasetType is None and datasetType is None:
502 # mypy
503 raise ValueError("The DatasetType must be specified to construct a DatasetRef")
504 if datasetType is None:
505 if simple.datasetType is None:
506 raise ValueError("Cannot determine Dataset type of this serialized class")
507 datasetType = DatasetType.from_simple(simple.datasetType, universe=universe, registry=registry)
509 if simple.dataId is None:
510 # mypy
511 raise ValueError("The DataId must be specified to construct a DatasetRef")
512 dataId = DataCoordinate.from_simple(simple.dataId, universe=universe)
514 # Check that simple ref is resolved.
515 if simple.run is None:
516 dstr = ""
517 if simple.datasetType is None:
518 dstr = f" (datasetType={datasetType.name!r})"
519 raise ValueError(
520 "Run collection name is missing from serialized representation. "
521 f"Encountered with {simple!r}{dstr}."
522 )
524 newRef = cls(
525 datasetType,
526 dataId,
527 id=simple.id,
528 run=simple.run,
529 )
530 if cache is not None:
531 cache[key] = newRef
532 return newRef
534 to_json = to_json_pydantic
535 from_json: ClassVar = classmethod(from_json_pydantic)
537 @classmethod
538 def _unpickle(
539 cls,
540 datasetType: DatasetType,
541 dataId: DataCoordinate,
542 id: DatasetId,
543 run: str,
544 datastore_records: DatasetDatastoreRecords | None,
545 ) -> DatasetRef:
546 """Create new `DatasetRef`.
548 A custom factory method for use by `__reduce__` as a workaround for
549 its lack of support for keyword arguments.
550 """
551 return cls(datasetType, dataId, id=id, run=run, datastore_records=datastore_records)
553 def __reduce__(self) -> tuple:
554 return (
555 self._unpickle,
556 (self.datasetType, self.dataId, self.id, self.run, self._datastore_records),
557 )
559 def __deepcopy__(self, memo: dict) -> DatasetRef:
560 # DatasetRef is recursively immutable; see note in @immutable
561 # decorator.
562 return self
564 def expanded(self, dataId: DataCoordinate) -> DatasetRef:
565 """Return a new `DatasetRef` with the given expanded data ID.
567 Parameters
568 ----------
569 dataId : `DataCoordinate`
570 Data ID for the new `DatasetRef`. Must compare equal to the
571 original data ID.
573 Returns
574 -------
575 ref : `DatasetRef`
576 A new `DatasetRef` with the given data ID.
577 """
578 assert dataId == self.dataId
579 return DatasetRef(
580 datasetType=self.datasetType,
581 dataId=dataId,
582 id=self.id,
583 run=self.run,
584 conform=False,
585 datastore_records=self._datastore_records,
586 )
588 def isComponent(self) -> bool:
589 """Indicate whether this `DatasetRef` refers to a component.
591 Returns
592 -------
593 isComponent : `bool`
594 `True` if this `DatasetRef` is a component, `False` otherwise.
595 """
596 return self.datasetType.isComponent()
598 def isComposite(self) -> bool:
599 """Boolean indicating whether this `DatasetRef` is a composite type.
601 Returns
602 -------
603 isComposite : `bool`
604 `True` if this `DatasetRef` is a composite type, `False`
605 otherwise.
606 """
607 return self.datasetType.isComposite()
609 def _lookupNames(self) -> tuple[LookupKey, ...]:
610 """Name keys to use when looking up this DatasetRef in a configuration.
612 The names are returned in order of priority.
614 Returns
615 -------
616 names : `tuple` of `LookupKey`
617 Tuple of the `DatasetType` name and the `StorageClass` name.
618 If ``instrument`` is defined in the dataId, each of those names
619 is added to the start of the tuple with a key derived from the
620 value of ``instrument``.
621 """
622 # Special case the instrument Dimension since we allow configs
623 # to include the instrument name in the hierarchy.
624 names: tuple[LookupKey, ...] = self.datasetType._lookupNames()
626 if "instrument" in self.dataId:
627 names = tuple(n.clone(dataId={"instrument": self.dataId["instrument"]}) for n in names) + names
629 return names
631 @staticmethod
632 def groupByType(refs: Iterable[DatasetRef]) -> NamedKeyDict[DatasetType, list[DatasetRef]]:
633 """Group an iterable of `DatasetRef` by `DatasetType`.
635 Parameters
636 ----------
637 refs : `~collections.abc.Iterable` [ `DatasetRef` ]
638 `DatasetRef` instances to group.
640 Returns
641 -------
642 grouped : `NamedKeyDict` [ `DatasetType`, `list` [ `DatasetRef` ] ]
643 Grouped `DatasetRef` instances.
645 Notes
646 -----
647 When lazy item-iterables are acceptable instead of a full mapping,
648 `iter_by_type` can in some cases be far more efficient.
649 """
650 result: NamedKeyDict[DatasetType, list[DatasetRef]] = NamedKeyDict()
651 for ref in refs:
652 result.setdefault(ref.datasetType, []).append(ref)
653 return result
655 @staticmethod
656 def iter_by_type(
657 refs: Iterable[DatasetRef],
658 ) -> Iterable[tuple[DatasetType, Iterable[DatasetRef]]]:
659 """Group an iterable of `DatasetRef` by `DatasetType` with special
660 hooks for custom iterables that can do this efficiently.
662 Parameters
663 ----------
664 refs : `~collections.abc.Iterable` [ `DatasetRef` ]
665 `DatasetRef` instances to group. If this satisfies the
666 `_DatasetRefGroupedIterable` protocol, its
667 `~_DatasetRefGroupedIterable._iter_by_dataset_type` method will
668 be called.
670 Returns
671 -------
672 grouped : `~collections.abc.Iterable` [ `tuple` [ `DatasetType`, \
673 `Iterable` [ `DatasetRef` ] ]]
674 Grouped `DatasetRef` instances.
675 """
676 if isinstance(refs, _DatasetRefGroupedIterable):
677 return refs._iter_by_dataset_type()
678 return DatasetRef.groupByType(refs).items()
680 def makeCompositeRef(self) -> DatasetRef:
681 """Create a `DatasetRef` of the composite from a component ref.
683 Requires that this `DatasetRef` is a component.
685 Returns
686 -------
687 ref : `DatasetRef`
688 A `DatasetRef` with a dataset type that corresponds to the
689 composite parent of this component, and the same ID and run
690 (which may be `None`, if they are `None` in ``self``).
691 """
692 # Assume that the data ID does not need to be standardized
693 # and should match whatever this ref already has.
694 return DatasetRef(
695 self.datasetType.makeCompositeDatasetType(),
696 self.dataId,
697 id=self.id,
698 run=self.run,
699 conform=False,
700 datastore_records=self._datastore_records,
701 )
703 def makeComponentRef(self, name: str) -> DatasetRef:
704 """Create a `DatasetRef` that corresponds to a component.
706 Parameters
707 ----------
708 name : `str`
709 Name of the component.
711 Returns
712 -------
713 ref : `DatasetRef`
714 A `DatasetRef` with a dataset type that corresponds to the given
715 component, and the same ID and run
716 (which may be `None`, if they are `None` in ``self``).
717 """
718 # Assume that the data ID does not need to be standardized
719 # and should match whatever this ref already has.
720 return DatasetRef(
721 self.datasetType.makeComponentDatasetType(name),
722 self.dataId,
723 id=self.id,
724 run=self.run,
725 conform=False,
726 datastore_records=self._datastore_records,
727 )
729 def overrideStorageClass(self, storageClass: str | StorageClass) -> DatasetRef:
730 """Create a new `DatasetRef` from this one, but with a modified
731 `DatasetType` that has a different `StorageClass`.
733 Parameters
734 ----------
735 storageClass : `str` or `StorageClass`
736 The new storage class.
738 Returns
739 -------
740 modified : `DatasetRef`
741 A new dataset reference that is the same as the current one but
742 with a different storage class in the `DatasetType`.
743 """
744 return self.replace(storage_class=storageClass)
746 def replace(
747 self,
748 *,
749 id: DatasetId | None = None,
750 run: str | None = None,
751 storage_class: str | StorageClass | None = None,
752 datastore_records: DatasetDatastoreRecords | None | Literal[False] = False,
753 ) -> DatasetRef:
754 """Create a new `DatasetRef` from this one, but with some modified
755 attributes.
757 Parameters
758 ----------
759 id : `DatasetId` or `None`
760 If not `None` then update dataset ID.
761 run : `str` or `None`
762 If not `None` then update run collection name. If ``dataset_id`` is
763 `None` then this will also cause new dataset ID to be generated.
764 storage_class : `str` or `StorageClass` or `None`.
765 The new storage class. If not `None`, replaces existing storage
766 class.
767 datastore_records : `DatasetDatastoreRecords` or `None`
768 New datastore records. If `None` remove all records. By default
769 datastore records are preserved.
771 Returns
772 -------
773 modified : `DatasetRef`
774 A new dataset reference with updated attributes.
775 """
776 if datastore_records is False:
777 datastore_records = self._datastore_records
778 if storage_class is None:
779 datasetType = self.datasetType
780 else:
781 datasetType = self.datasetType.overrideStorageClass(storage_class)
782 if run is None:
783 run = self.run
784 # Do not regenerate dataset ID if run is the same.
785 if id is None:
786 id = self.id
787 return DatasetRef(
788 datasetType=datasetType,
789 dataId=self.dataId,
790 run=run,
791 id=id,
792 conform=False,
793 datastore_records=datastore_records,
794 )
796 def is_compatible_with(self, ref: DatasetRef) -> bool:
797 """Determine if the given `DatasetRef` is compatible with this one.
799 Parameters
800 ----------
801 other : `DatasetRef`
802 Dataset ref to check.
804 Returns
805 -------
806 is_compatible : `bool`
807 Returns `True` if the other dataset ref is either the same as this
808 or the dataset type associated with the other is compatible with
809 this one and the dataId and dataset ID match.
811 Notes
812 -----
813 Compatibility requires that the dataId and dataset ID match and the
814 `DatasetType` is compatible. Compatibility is defined as the storage
815 class associated with the dataset type of the other ref can be
816 converted to this storage class.
818 Specifically this means that if you have done:
820 .. code-block:: py
822 new_ref = ref.overrideStorageClass(sc)
824 and this is successful, then the guarantee is that:
826 .. code-block:: py
828 assert ref.is_compatible_with(new_ref) is True
830 since we know that the python type associated with the new ref can
831 be converted to the original python type. The reverse is not guaranteed
832 and depends on whether bidirectional converters have been registered.
833 """
834 if self.id != ref.id:
835 return False
836 if self.dataId != ref.dataId:
837 return False
838 if self.run != ref.run:
839 return False
840 return self.datasetType.is_compatible_with(ref.datasetType)
842 datasetType: DatasetType
843 """The definition of this dataset (`DatasetType`).
845 Cannot be changed after a `DatasetRef` is constructed.
846 """
848 dataId: DataCoordinate
849 """A mapping of `Dimension` primary key values that labels the dataset
850 within a Collection (`DataCoordinate`).
852 Cannot be changed after a `DatasetRef` is constructed.
853 """
855 run: str
856 """The name of the run that produced the dataset.
858 Cannot be changed after a `DatasetRef` is constructed.
859 """
861 datastore_records: DatasetDatastoreRecords | None
862 """Optional datastore records (`DatasetDatastoreRecords`).
864 Cannot be changed after a `DatasetRef` is constructed.
865 """