Coverage for python/lsst/daf/butler/core/datasets/ref.py: 36%

214 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-08-05 01:26 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = [ 

24 "AmbiguousDatasetError", 

25 "DatasetId", 

26 "DatasetIdFactory", 

27 "DatasetIdGenEnum", 

28 "DatasetRef", 

29 "SerializedDatasetRef", 

30] 

31 

32import enum 

33import sys 

34import uuid 

35from collections.abc import Iterable 

36from typing import TYPE_CHECKING, Any, ClassVar, Protocol, TypeAlias, runtime_checkable 

37 

38from lsst.daf.butler._compat import _BaseModelCompat 

39from lsst.utils.classes import immutable 

40from pydantic import StrictStr, validator 

41 

42from ..configSupport import LookupKey 

43from ..dimensions import DataCoordinate, DimensionGraph, DimensionUniverse, SerializedDataCoordinate 

44from ..json import from_json_pydantic, to_json_pydantic 

45from ..named import NamedKeyDict 

46from ..persistenceContext import PersistenceContextVars 

47from .type import DatasetType, SerializedDatasetType 

48 

49if TYPE_CHECKING: 

50 from ...registry import Registry 

51 from ..storageClass import StorageClass 

52 

53 

54class AmbiguousDatasetError(Exception): 

55 """Raised when a `DatasetRef` is not resolved but should be. 

56 

57 This happens when the `DatasetRef` has no ID or run but the requested 

58 operation requires one of them. 

59 """ 

60 

61 

62@runtime_checkable 

63class _DatasetRefGroupedIterable(Protocol): 

64 """A package-private interface for iterables of `DatasetRef` that know how 

65 to efficiently group their contents by `DatasetType`. 

66 

67 """ 

68 

69 def _iter_by_dataset_type(self) -> Iterable[tuple[DatasetType, Iterable[DatasetRef]]]: 

70 """Iterate over `DatasetRef` instances, one `DatasetType` at a time. 

71 

72 Returns 

73 ------- 

74 grouped : `~collections.abc.Iterable` [ `tuple` [ `DatasetType`, \ 

75 `~collections.abc.Iterable` [ `DatasetRef` ] 

76 An iterable of tuples, in which the first element is a dataset type 

77 and the second is an iterable of `DatasetRef` objects with exactly 

78 that dataset type. 

79 """ 

80 ... 

81 

82 

83class DatasetIdGenEnum(enum.Enum): 

84 """Enum used to specify dataset ID generation options.""" 

85 

86 UNIQUE = 0 

87 """Unique mode generates unique ID for each inserted dataset, e.g. 

88 auto-generated by database or random UUID. 

89 """ 

90 

91 DATAID_TYPE = 1 

92 """In this mode ID is computed deterministically from a combination of 

93 dataset type and dataId. 

94 """ 

95 

96 DATAID_TYPE_RUN = 2 

97 """In this mode ID is computed deterministically from a combination of 

98 dataset type, dataId, and run collection name. 

99 """ 

100 

101 

102class DatasetIdFactory: 

103 """Factory for dataset IDs (UUIDs). 

104 

105 For now the logic is hard-coded and is controlled by the user-provided 

106 value of `DatasetIdGenEnum`. In the future we may implement a configurable 

107 logic that can guess `DatasetIdGenEnum` value from other parameters. 

108 """ 

109 

110 NS_UUID = uuid.UUID("840b31d9-05cd-5161-b2c8-00d32b280d0f") 

111 """Namespace UUID used for UUID5 generation. Do not change. This was 

112 produced by `uuid.uuid5(uuid.NAMESPACE_DNS, "lsst.org")`. 

113 """ 

114 

115 def makeDatasetId( 

116 self, 

117 run: str, 

118 datasetType: DatasetType, 

119 dataId: DataCoordinate, 

120 idGenerationMode: DatasetIdGenEnum, 

121 ) -> uuid.UUID: 

122 """Generate dataset ID for a dataset. 

123 

124 Parameters 

125 ---------- 

126 run : `str` 

127 Name of the RUN collection for the dataset. 

128 datasetType : `DatasetType` 

129 Dataset type. 

130 dataId : `DataCoordinate` 

131 Expanded data ID for the dataset. 

132 idGenerationMode : `DatasetIdGenEnum` 

133 ID generation option. `~DatasetIdGenEnum.UNIQUE` makes a random 

134 UUID4-type ID. `~DatasetIdGenEnum.DATAID_TYPE` makes a 

135 deterministic UUID5-type ID based on a dataset type name and 

136 ``dataId``. `~DatasetIdGenEnum.DATAID_TYPE_RUN` makes a 

137 deterministic UUID5-type ID based on a dataset type name, run 

138 collection name, and ``dataId``. 

139 

140 Returns 

141 ------- 

142 datasetId : `uuid.UUID` 

143 Dataset identifier. 

144 """ 

145 if idGenerationMode is DatasetIdGenEnum.UNIQUE: 

146 return uuid.uuid4() 

147 else: 

148 # WARNING: If you modify this code make sure that the order of 

149 # items in the `items` list below never changes. 

150 items: list[tuple[str, str]] = [] 

151 if idGenerationMode is DatasetIdGenEnum.DATAID_TYPE: 

152 items = [ 

153 ("dataset_type", datasetType.name), 

154 ] 

155 elif idGenerationMode is DatasetIdGenEnum.DATAID_TYPE_RUN: 

156 items = [ 

157 ("dataset_type", datasetType.name), 

158 ("run", run), 

159 ] 

160 else: 

161 raise ValueError(f"Unexpected ID generation mode: {idGenerationMode}") 

162 

163 for name, value in sorted(dataId.byName().items()): 

164 items.append((name, str(value))) 

165 data = ",".join(f"{key}={value}" for key, value in items) 

166 return uuid.uuid5(self.NS_UUID, data) 

167 

168 

169# This is constant, so don't recreate a set for each instance 

170_serializedDatasetRefFieldsSet = {"id", "datasetType", "dataId", "run", "component"} 

171 

172 

173class SerializedDatasetRef(_BaseModelCompat): 

174 """Simplified model of a `DatasetRef` suitable for serialization.""" 

175 

176 id: uuid.UUID 

177 datasetType: SerializedDatasetType | None = None 

178 dataId: SerializedDataCoordinate | None = None 

179 run: StrictStr | None = None 

180 component: StrictStr | None = None 

181 

182 @validator("dataId") 

183 def _check_dataId(cls, v: Any, values: dict[str, Any]) -> Any: # noqa: N805 

184 if (d := "datasetType") in values and values[d] is None: 

185 raise ValueError("Can not specify 'dataId' without specifying 'datasetType'") 

186 return v 

187 

188 @validator("run") 

189 def _check_run(cls, v: Any, values: dict[str, Any]) -> Any: # noqa: N805 

190 if v and (i := "id") in values and values[i] is None: 

191 raise ValueError("'run' cannot be provided unless 'id' is.") 

192 return v 

193 

194 @validator("component") 

195 def _check_component(cls, v: Any, values: dict[str, Any]) -> Any: # noqa: N805 

196 # Component should not be given if datasetType is given 

197 if v and (d := "datasetType") in values and values[d] is not None: 

198 raise ValueError(f"datasetType ({values[d]}) can not be set if component is given ({v}).") 

199 return v 

200 

201 @classmethod 

202 def direct( 

203 cls, 

204 *, 

205 id: str, 

206 run: str, 

207 datasetType: dict[str, Any] | None = None, 

208 dataId: dict[str, Any] | None = None, 

209 component: str | None = None, 

210 ) -> SerializedDatasetRef: 

211 """Construct a `SerializedDatasetRef` directly without validators. 

212 

213 Notes 

214 ----- 

215 This differs from the pydantic "construct" method in that the arguments 

216 are explicitly what the model requires, and it will recurse through 

217 members, constructing them from their corresponding `direct` methods. 

218 

219 The ``id`` parameter is a string representation of dataset ID, it is 

220 converted to UUID by this method. 

221 

222 This method should only be called when the inputs are trusted. 

223 """ 

224 serialized_datasetType = ( 

225 SerializedDatasetType.direct(**datasetType) if datasetType is not None else None 

226 ) 

227 serialized_dataId = SerializedDataCoordinate.direct(**dataId) if dataId is not None else None 

228 

229 node = cls.model_construct( 

230 _fields_set=_serializedDatasetRefFieldsSet, 

231 id=uuid.UUID(id), 

232 datasetType=serialized_datasetType, 

233 dataId=serialized_dataId, 

234 run=sys.intern(run), 

235 component=component, 

236 ) 

237 

238 return node 

239 

240 

241DatasetId: TypeAlias = uuid.UUID 

242"""A type-annotation alias for dataset ID providing typing flexibility. 

243""" 

244 

245 

246@immutable 

247class DatasetRef: 

248 """Reference to a Dataset in a `Registry`. 

249 

250 A `DatasetRef` may point to a Dataset that currently does not yet exist 

251 (e.g., because it is a predicted input for provenance). 

252 

253 Parameters 

254 ---------- 

255 datasetType : `DatasetType` 

256 The `DatasetType` for this Dataset. 

257 dataId : `DataCoordinate` 

258 A mapping of dimensions that labels the Dataset within a Collection. 

259 run : `str` 

260 The name of the run this dataset was associated with when it was 

261 created. 

262 id : `DatasetId`, optional 

263 The unique identifier assigned when the dataset is created. If ``id`` 

264 is not specified, a new unique ID will be created. 

265 conform : `bool`, optional 

266 If `True` (default), call `DataCoordinate.standardize` to ensure that 

267 the data ID's dimensions are consistent with the dataset type's. 

268 `DatasetRef` instances for which those dimensions are not equal should 

269 not be created in new code, but are still supported for backwards 

270 compatibility. New code should only pass `False` if it can guarantee 

271 that the dimensions are already consistent. 

272 id_generation_mode : `DatasetIdGenEnum` 

273 ID generation option. `~DatasetIdGenEnum.UNIQUE` makes a random 

274 UUID4-type ID. `~DatasetIdGenEnum.DATAID_TYPE` makes a 

275 deterministic UUID5-type ID based on a dataset type name and 

276 ``dataId``. `~DatasetIdGenEnum.DATAID_TYPE_RUN` makes a 

277 deterministic UUID5-type ID based on a dataset type name, run 

278 collection name, and ``dataId``. 

279 

280 See Also 

281 -------- 

282 :ref:`daf_butler_organizing_datasets` 

283 """ 

284 

285 _serializedType = SerializedDatasetRef 

286 __slots__ = ( 

287 "_id", 

288 "datasetType", 

289 "dataId", 

290 "run", 

291 ) 

292 

293 def __init__( 

294 self, 

295 datasetType: DatasetType, 

296 dataId: DataCoordinate, 

297 run: str, 

298 *, 

299 id: DatasetId | None = None, 

300 conform: bool = True, 

301 id_generation_mode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE, 

302 ): 

303 self.datasetType = datasetType 

304 if conform: 

305 self.dataId = DataCoordinate.standardize(dataId, graph=datasetType.dimensions) 

306 else: 

307 self.dataId = dataId 

308 self.run = run 

309 if id is not None: 

310 self._id = id.int 

311 else: 

312 self._id = ( 

313 DatasetIdFactory() 

314 .makeDatasetId(self.run, self.datasetType, self.dataId, id_generation_mode) 

315 .int 

316 ) 

317 

318 @property 

319 def id(self) -> DatasetId: 

320 """Primary key of the dataset (`DatasetId`). 

321 

322 Cannot be changed after a `DatasetRef` is constructed. 

323 """ 

324 return uuid.UUID(int=self._id) 

325 

326 def __eq__(self, other: Any) -> bool: 

327 try: 

328 return (self.datasetType, self.dataId, self.id) == (other.datasetType, other.dataId, other.id) 

329 except AttributeError: 

330 return NotImplemented 

331 

332 def __hash__(self) -> int: 

333 return hash((self.datasetType, self.dataId, self.id)) 

334 

335 @property 

336 def dimensions(self) -> DimensionGraph: 

337 """Dimensions associated with the underlying `DatasetType`.""" 

338 return self.datasetType.dimensions 

339 

340 def __repr__(self) -> str: 

341 # We delegate to __str__ (i.e use "!s") for the data ID) below because 

342 # DataCoordinate's __repr__ - while adhering to the guidelines for 

343 # __repr__ - is much harder to users to read, while its __str__ just 

344 # produces a dict that can also be passed to DatasetRef's constructor. 

345 return f"DatasetRef({self.datasetType!r}, {self.dataId!s}, run={self.run!r}, id={self.id})" 

346 

347 def __str__(self) -> str: 

348 s = ( 

349 f"{self.datasetType.name}@{self.dataId!s} [sc={self.datasetType.storageClass_name}]" 

350 f" (run={self.run} id={self.id})" 

351 ) 

352 return s 

353 

354 def __lt__(self, other: Any) -> bool: 

355 # Sort by run, DatasetType name and then by DataCoordinate 

356 # The __str__ representation is probably close enough but we 

357 # need to ensure that sorting a DatasetRef matches what you would 

358 # get if you sorted DatasetType+DataCoordinate 

359 if not isinstance(other, type(self)): 

360 return NotImplemented 

361 

362 # Group by run if defined, takes precedence over DatasetType 

363 self_run = "" if self.run is None else self.run 

364 other_run = "" if other.run is None else other.run 

365 

366 # Compare tuples in the priority order 

367 return (self_run, self.datasetType, self.dataId) < (other_run, other.datasetType, other.dataId) 

368 

369 def to_simple(self, minimal: bool = False) -> SerializedDatasetRef: 

370 """Convert this class to a simple python type. 

371 

372 This makes it suitable for serialization. 

373 

374 Parameters 

375 ---------- 

376 minimal : `bool`, optional 

377 Use minimal serialization. Requires Registry to convert 

378 back to a full type. 

379 

380 Returns 

381 ------- 

382 simple : `dict` or `int` 

383 The object converted to a dictionary. 

384 """ 

385 if minimal: 

386 # The only thing needed to uniquely define a DatasetRef is its id 

387 # so that can be used directly if it is not a component DatasetRef. 

388 # Store is in a dict to allow us to easily add the planned origin 

389 # information later without having to support an int and dict in 

390 # simple form. 

391 simple: dict[str, Any] = {"id": self.id} 

392 if self.isComponent(): 

393 # We can still be a little minimalist with a component 

394 # but we will also need to record the datasetType component 

395 simple["component"] = self.datasetType.component() 

396 return SerializedDatasetRef(**simple) 

397 

398 return SerializedDatasetRef( 

399 datasetType=self.datasetType.to_simple(minimal=minimal), 

400 dataId=self.dataId.to_simple(), 

401 run=self.run, 

402 id=self.id, 

403 ) 

404 

405 @classmethod 

406 def from_simple( 

407 cls, 

408 simple: SerializedDatasetRef, 

409 universe: DimensionUniverse | None = None, 

410 registry: Registry | None = None, 

411 datasetType: DatasetType | None = None, 

412 ) -> DatasetRef: 

413 """Construct a new object from simplified form. 

414 

415 Generally this is data returned from the `to_simple` method. 

416 

417 Parameters 

418 ---------- 

419 simple : `dict` of [`str`, `Any`] 

420 The value returned by `to_simple()`. 

421 universe : `DimensionUniverse` 

422 The special graph of all known dimensions. 

423 Can be `None` if a registry is provided. 

424 registry : `lsst.daf.butler.Registry`, optional 

425 Registry to use to convert simple form of a DatasetRef to 

426 a full `DatasetRef`. Can be `None` if a full description of 

427 the type is provided along with a universe. 

428 datasetType : DatasetType, optional 

429 If datasetType is supplied, this will be used as the datasetType 

430 object in the resulting DatasetRef instead of being read from 

431 the `SerializedDatasetRef`. This is useful when many refs share 

432 the same type as memory can be saved. Defaults to None. 

433 

434 Returns 

435 ------- 

436 ref : `DatasetRef` 

437 Newly-constructed object. 

438 """ 

439 cache = PersistenceContextVars.datasetRefs.get() 

440 localName = sys.intern( 

441 datasetType.name 

442 if datasetType is not None 

443 else (x.name if (x := simple.datasetType) is not None else "") 

444 ) 

445 key = (simple.id.int, localName) 

446 if cache is not None and (cachedRef := cache.get(key, None)) is not None: 

447 return cachedRef 

448 # Minimalist component will just specify component and id and 

449 # require registry to reconstruct 

450 if not (simple.datasetType is not None or simple.dataId is not None or simple.run is not None): 

451 if registry is None: 

452 raise ValueError("Registry is required to construct component DatasetRef from integer id") 

453 if simple.id is None: 

454 raise ValueError("For minimal DatasetRef the ID must be defined.") 

455 ref = registry.getDataset(simple.id) 

456 if ref is None: 

457 raise RuntimeError(f"No matching dataset found in registry for id {simple.id}") 

458 if simple.component: 

459 ref = ref.makeComponentRef(simple.component) 

460 if cache is not None: 

461 cache[key] = ref 

462 return ref 

463 

464 if universe is None and registry is None: 

465 raise ValueError("One of universe or registry must be provided.") 

466 

467 if universe is None and registry is not None: 

468 universe = registry.dimensions 

469 

470 if universe is None: 

471 # this is for mypy 

472 raise ValueError("Unable to determine a usable universe") 

473 

474 if simple.datasetType is None and datasetType is None: 

475 # mypy 

476 raise ValueError("The DatasetType must be specified to construct a DatasetRef") 

477 if datasetType is None: 

478 if simple.datasetType is None: 

479 raise ValueError("Cannot determine Dataset type of this serialized class") 

480 datasetType = DatasetType.from_simple(simple.datasetType, universe=universe, registry=registry) 

481 

482 if simple.dataId is None: 

483 # mypy 

484 raise ValueError("The DataId must be specified to construct a DatasetRef") 

485 dataId = DataCoordinate.from_simple(simple.dataId, universe=universe) 

486 

487 # Check that simple ref is resolved. 

488 if simple.run is None: 

489 dstr = "" 

490 if simple.datasetType is None: 

491 dstr = f" (datasetType={datasetType.name!r})" 

492 raise ValueError( 

493 "Run collection name is missing from serialized representation. " 

494 f"Encountered with {simple!r}{dstr}." 

495 ) 

496 

497 newRef = cls(datasetType, dataId, id=simple.id, run=simple.run) 

498 if cache is not None: 

499 cache[key] = newRef 

500 return newRef 

501 

502 to_json = to_json_pydantic 

503 from_json: ClassVar = classmethod(from_json_pydantic) 

504 

505 @classmethod 

506 def _unpickle( 

507 cls, 

508 datasetType: DatasetType, 

509 dataId: DataCoordinate, 

510 id: DatasetId, 

511 run: str, 

512 ) -> DatasetRef: 

513 """Create new `DatasetRef`. 

514 

515 A custom factory method for use by `__reduce__` as a workaround for 

516 its lack of support for keyword arguments. 

517 """ 

518 return cls(datasetType, dataId, id=id, run=run) 

519 

520 def __reduce__(self) -> tuple: 

521 return (self._unpickle, (self.datasetType, self.dataId, self.id, self.run)) 

522 

523 def __deepcopy__(self, memo: dict) -> DatasetRef: 

524 # DatasetRef is recursively immutable; see note in @immutable 

525 # decorator. 

526 return self 

527 

528 def expanded(self, dataId: DataCoordinate) -> DatasetRef: 

529 """Return a new `DatasetRef` with the given expanded data ID. 

530 

531 Parameters 

532 ---------- 

533 dataId : `DataCoordinate` 

534 Data ID for the new `DatasetRef`. Must compare equal to the 

535 original data ID. 

536 

537 Returns 

538 ------- 

539 ref : `DatasetRef` 

540 A new `DatasetRef` with the given data ID. 

541 """ 

542 assert dataId == self.dataId 

543 return DatasetRef( 

544 datasetType=self.datasetType, dataId=dataId, id=self.id, run=self.run, conform=False 

545 ) 

546 

547 def isComponent(self) -> bool: 

548 """Indicate whether this `DatasetRef` refers to a component. 

549 

550 Returns 

551 ------- 

552 isComponent : `bool` 

553 `True` if this `DatasetRef` is a component, `False` otherwise. 

554 """ 

555 return self.datasetType.isComponent() 

556 

557 def isComposite(self) -> bool: 

558 """Boolean indicating whether this `DatasetRef` is a composite type. 

559 

560 Returns 

561 ------- 

562 isComposite : `bool` 

563 `True` if this `DatasetRef` is a composite type, `False` 

564 otherwise. 

565 """ 

566 return self.datasetType.isComposite() 

567 

568 def _lookupNames(self) -> tuple[LookupKey, ...]: 

569 """Name keys to use when looking up this DatasetRef in a configuration. 

570 

571 The names are returned in order of priority. 

572 

573 Returns 

574 ------- 

575 names : `tuple` of `LookupKey` 

576 Tuple of the `DatasetType` name and the `StorageClass` name. 

577 If ``instrument`` is defined in the dataId, each of those names 

578 is added to the start of the tuple with a key derived from the 

579 value of ``instrument``. 

580 """ 

581 # Special case the instrument Dimension since we allow configs 

582 # to include the instrument name in the hierarchy. 

583 names: tuple[LookupKey, ...] = self.datasetType._lookupNames() 

584 

585 if "instrument" in self.dataId: 

586 names = tuple(n.clone(dataId={"instrument": self.dataId["instrument"]}) for n in names) + names 

587 

588 return names 

589 

590 @staticmethod 

591 def groupByType(refs: Iterable[DatasetRef]) -> NamedKeyDict[DatasetType, list[DatasetRef]]: 

592 """Group an iterable of `DatasetRef` by `DatasetType`. 

593 

594 Parameters 

595 ---------- 

596 refs : `~collections.abc.Iterable` [ `DatasetRef` ] 

597 `DatasetRef` instances to group. 

598 

599 Returns 

600 ------- 

601 grouped : `NamedKeyDict` [ `DatasetType`, `list` [ `DatasetRef` ] ] 

602 Grouped `DatasetRef` instances. 

603 

604 Notes 

605 ----- 

606 When lazy item-iterables are acceptable instead of a full mapping, 

607 `iter_by_type` can in some cases be far more efficient. 

608 """ 

609 result: NamedKeyDict[DatasetType, list[DatasetRef]] = NamedKeyDict() 

610 for ref in refs: 

611 result.setdefault(ref.datasetType, []).append(ref) 

612 return result 

613 

614 @staticmethod 

615 def iter_by_type( 

616 refs: Iterable[DatasetRef], 

617 ) -> Iterable[tuple[DatasetType, Iterable[DatasetRef]]]: 

618 """Group an iterable of `DatasetRef` by `DatasetType` with special 

619 hooks for custom iterables that can do this efficiently. 

620 

621 Parameters 

622 ---------- 

623 refs : `~collections.abc.Iterable` [ `DatasetRef` ] 

624 `DatasetRef` instances to group. If this satisfies the 

625 `_DatasetRefGroupedIterable` protocol, its 

626 `~_DatasetRefGroupedIterable._iter_by_dataset_type` method will 

627 be called. 

628 

629 Returns 

630 ------- 

631 grouped : `~collections.abc.Iterable` [ `tuple` [ `DatasetType`, \ 

632 `Iterable` [ `DatasetRef` ] ]] 

633 Grouped `DatasetRef` instances. 

634 """ 

635 if isinstance(refs, _DatasetRefGroupedIterable): 

636 return refs._iter_by_dataset_type() 

637 return DatasetRef.groupByType(refs).items() 

638 

639 def makeCompositeRef(self) -> DatasetRef: 

640 """Create a `DatasetRef` of the composite from a component ref. 

641 

642 Requires that this `DatasetRef` is a component. 

643 

644 Returns 

645 ------- 

646 ref : `DatasetRef` 

647 A `DatasetRef` with a dataset type that corresponds to the 

648 composite parent of this component, and the same ID and run 

649 (which may be `None`, if they are `None` in ``self``). 

650 """ 

651 # Assume that the data ID does not need to be standardized 

652 # and should match whatever this ref already has. 

653 return DatasetRef( 

654 self.datasetType.makeCompositeDatasetType(), self.dataId, id=self.id, run=self.run, conform=False 

655 ) 

656 

657 def makeComponentRef(self, name: str) -> DatasetRef: 

658 """Create a `DatasetRef` that corresponds to a component. 

659 

660 Parameters 

661 ---------- 

662 name : `str` 

663 Name of the component. 

664 

665 Returns 

666 ------- 

667 ref : `DatasetRef` 

668 A `DatasetRef` with a dataset type that corresponds to the given 

669 component, and the same ID and run 

670 (which may be `None`, if they are `None` in ``self``). 

671 """ 

672 # Assume that the data ID does not need to be standardized 

673 # and should match whatever this ref already has. 

674 return DatasetRef( 

675 self.datasetType.makeComponentDatasetType(name), 

676 self.dataId, 

677 id=self.id, 

678 run=self.run, 

679 conform=False, 

680 ) 

681 

682 def overrideStorageClass(self, storageClass: str | StorageClass) -> DatasetRef: 

683 """Create a new `DatasetRef` from this one, but with a modified 

684 `DatasetType` that has a different `StorageClass`. 

685 

686 Parameters 

687 ---------- 

688 storageClass : `str` or `StorageClass` 

689 The new storage class. 

690 

691 Returns 

692 ------- 

693 modified : `DatasetRef` 

694 A new dataset reference that is the same as the current one but 

695 with a different storage class in the `DatasetType`. 

696 """ 

697 return DatasetRef( 

698 datasetType=self.datasetType.overrideStorageClass(storageClass), 

699 dataId=self.dataId, 

700 id=self.id, 

701 run=self.run, 

702 conform=False, 

703 ) 

704 

705 def is_compatible_with(self, ref: DatasetRef) -> bool: 

706 """Determine if the given `DatasetRef` is compatible with this one. 

707 

708 Parameters 

709 ---------- 

710 other : `DatasetRef` 

711 Dataset ref to check. 

712 

713 Returns 

714 ------- 

715 is_compatible : `bool` 

716 Returns `True` if the other dataset ref is either the same as this 

717 or the dataset type associated with the other is compatible with 

718 this one and the dataId and dataset ID match. 

719 

720 Notes 

721 ----- 

722 Compatibility requires that the dataId and dataset ID match and the 

723 `DatasetType` is compatible. Compatibility is defined as the storage 

724 class associated with the dataset type of the other ref can be 

725 converted to this storage class. 

726 

727 Specifically this means that if you have done: 

728 

729 .. code-block:: py 

730 

731 new_ref = ref.overrideStorageClass(sc) 

732 

733 and this is successful, then the guarantee is that: 

734 

735 .. code-block:: py 

736 

737 assert ref.is_compatible_with(new_ref) is True 

738 

739 since we know that the python type associated with the new ref can 

740 be converted to the original python type. The reverse is not guaranteed 

741 and depends on whether bidirectional converters have been registered. 

742 """ 

743 if self.id != ref.id: 

744 return False 

745 if self.dataId != ref.dataId: 

746 return False 

747 if self.run != ref.run: 

748 return False 

749 return self.datasetType.is_compatible_with(ref.datasetType) 

750 

751 datasetType: DatasetType 

752 """The definition of this dataset (`DatasetType`). 

753 

754 Cannot be changed after a `DatasetRef` is constructed. 

755 """ 

756 

757 dataId: DataCoordinate 

758 """A mapping of `Dimension` primary key values that labels the dataset 

759 within a Collection (`DataCoordinate`). 

760 

761 Cannot be changed after a `DatasetRef` is constructed. 

762 """ 

763 

764 run: str 

765 """The name of the run that produced the dataset. 

766 

767 Cannot be changed after a `DatasetRef` is constructed. 

768 """