Coverage for python/lsst/daf/butler/core/datasets/ref.py: 36%

221 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-07-15 07:56 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = [ 

24 "AmbiguousDatasetError", 

25 "DatasetId", 

26 "DatasetIdFactory", 

27 "DatasetIdGenEnum", 

28 "DatasetRef", 

29 "SerializedDatasetRef", 

30] 

31 

32import enum 

33import sys 

34import uuid 

35from collections.abc import Iterable 

36from typing import TYPE_CHECKING, Any, ClassVar, Protocol, runtime_checkable 

37 

38from lsst.utils.classes import immutable 

39 

40try: 

41 from pydantic.v1 import BaseModel, StrictStr, validator 

42except ModuleNotFoundError: 

43 from pydantic import BaseModel, StrictStr, validator # type: ignore 

44 

45from ..configSupport import LookupKey 

46from ..dimensions import DataCoordinate, DimensionGraph, DimensionUniverse, SerializedDataCoordinate 

47from ..json import from_json_pydantic, to_json_pydantic 

48from ..named import NamedKeyDict 

49from ..persistenceContext import PersistenceContextVars 

50from .type import DatasetType, SerializedDatasetType 

51 

52if TYPE_CHECKING: 

53 from ...registry import Registry 

54 from ..storageClass import StorageClass 

55 

56 

57class AmbiguousDatasetError(Exception): 

58 """Raised when a `DatasetRef` is not resolved but should be. 

59 

60 This happens when the `DatasetRef` has no ID or run but the requested 

61 operation requires one of them. 

62 """ 

63 

64 

65@runtime_checkable 

66class _DatasetRefGroupedIterable(Protocol): 

67 """A package-private interface for iterables of `DatasetRef` that know how 

68 to efficiently group their contents by `DatasetType`. 

69 

70 """ 

71 

72 def _iter_by_dataset_type(self) -> Iterable[tuple[DatasetType, Iterable[DatasetRef]]]: 

73 """Iterate over `DatasetRef` instances, one `DatasetType` at a time. 

74 

75 Returns 

76 ------- 

77 grouped : `~collections.abc.Iterable` [ `tuple` [ `DatasetType`, \ 

78 `~collections.abc.Iterable` [ `DatasetRef` ] 

79 An iterable of tuples, in which the first element is a dataset type 

80 and the second is an iterable of `DatasetRef` objects with exactly 

81 that dataset type. 

82 """ 

83 ... 

84 

85 

86class DatasetIdGenEnum(enum.Enum): 

87 """Enum used to specify dataset ID generation options.""" 

88 

89 UNIQUE = 0 

90 """Unique mode generates unique ID for each inserted dataset, e.g. 

91 auto-generated by database or random UUID. 

92 """ 

93 

94 DATAID_TYPE = 1 

95 """In this mode ID is computed deterministically from a combination of 

96 dataset type and dataId. 

97 """ 

98 

99 DATAID_TYPE_RUN = 2 

100 """In this mode ID is computed deterministically from a combination of 

101 dataset type, dataId, and run collection name. 

102 """ 

103 

104 

105class DatasetIdFactory: 

106 """Factory for dataset IDs (UUIDs). 

107 

108 For now the logic is hard-coded and is controlled by the user-provided 

109 value of `DatasetIdGenEnum`. In the future we may implement a configurable 

110 logic that can guess `DatasetIdGenEnum` value from other parameters. 

111 """ 

112 

113 NS_UUID = uuid.UUID("840b31d9-05cd-5161-b2c8-00d32b280d0f") 

114 """Namespace UUID used for UUID5 generation. Do not change. This was 

115 produced by `uuid.uuid5(uuid.NAMESPACE_DNS, "lsst.org")`. 

116 """ 

117 

118 def makeDatasetId( 

119 self, 

120 run: str, 

121 datasetType: DatasetType, 

122 dataId: DataCoordinate, 

123 idGenerationMode: DatasetIdGenEnum, 

124 ) -> uuid.UUID: 

125 """Generate dataset ID for a dataset. 

126 

127 Parameters 

128 ---------- 

129 run : `str` 

130 Name of the RUN collection for the dataset. 

131 datasetType : `DatasetType` 

132 Dataset type. 

133 dataId : `DataCoordinate` 

134 Expanded data ID for the dataset. 

135 idGenerationMode : `DatasetIdGenEnum` 

136 ID generation option. `~DatasetIdGenEnum.UNIQUE` makes a random 

137 UUID4-type ID. `~DatasetIdGenEnum.DATAID_TYPE` makes a 

138 deterministic UUID5-type ID based on a dataset type name and 

139 ``dataId``. `~DatasetIdGenEnum.DATAID_TYPE_RUN` makes a 

140 deterministic UUID5-type ID based on a dataset type name, run 

141 collection name, and ``dataId``. 

142 

143 Returns 

144 ------- 

145 datasetId : `uuid.UUID` 

146 Dataset identifier. 

147 """ 

148 if idGenerationMode is DatasetIdGenEnum.UNIQUE: 

149 return uuid.uuid4() 

150 else: 

151 # WARNING: If you modify this code make sure that the order of 

152 # items in the `items` list below never changes. 

153 items: list[tuple[str, str]] = [] 

154 if idGenerationMode is DatasetIdGenEnum.DATAID_TYPE: 

155 items = [ 

156 ("dataset_type", datasetType.name), 

157 ] 

158 elif idGenerationMode is DatasetIdGenEnum.DATAID_TYPE_RUN: 

159 items = [ 

160 ("dataset_type", datasetType.name), 

161 ("run", run), 

162 ] 

163 else: 

164 raise ValueError(f"Unexpected ID generation mode: {idGenerationMode}") 

165 

166 for name, value in sorted(dataId.byName().items()): 

167 items.append((name, str(value))) 

168 data = ",".join(f"{key}={value}" for key, value in items) 

169 return uuid.uuid5(self.NS_UUID, data) 

170 

171 

172# This is constant, so don't recreate a set for each instance 

173_serializedDatasetRefFieldsSet = {"id", "datasetType", "dataId", "run", "component"} 

174 

175 

176class SerializedDatasetRef(BaseModel): 

177 """Simplified model of a `DatasetRef` suitable for serialization.""" 

178 

179 id: uuid.UUID 

180 datasetType: SerializedDatasetType | None = None 

181 dataId: SerializedDataCoordinate | None = None 

182 run: StrictStr | None = None 

183 component: StrictStr | None = None 

184 

185 @validator("dataId") 

186 def _check_dataId(cls, v: Any, values: dict[str, Any]) -> Any: # noqa: N805 

187 if (d := "datasetType") in values and values[d] is None: 

188 raise ValueError("Can not specify 'dataId' without specifying 'datasetType'") 

189 return v 

190 

191 @validator("run") 

192 def _check_run(cls, v: Any, values: dict[str, Any]) -> Any: # noqa: N805 

193 if v and (i := "id") in values and values[i] is None: 

194 raise ValueError("'run' cannot be provided unless 'id' is.") 

195 return v 

196 

197 @validator("component") 

198 def _check_component(cls, v: Any, values: dict[str, Any]) -> Any: # noqa: N805 

199 # Component should not be given if datasetType is given 

200 if v and (d := "datasetType") in values and values[d] is not None: 

201 raise ValueError(f"datasetType ({values[d]}) can not be set if component is given ({v}).") 

202 return v 

203 

204 @classmethod 

205 def direct( 

206 cls, 

207 *, 

208 id: str, 

209 run: str, 

210 datasetType: dict[str, Any] | None = None, 

211 dataId: dict[str, Any] | None = None, 

212 component: str | None = None, 

213 ) -> SerializedDatasetRef: 

214 """Construct a `SerializedDatasetRef` directly without validators. 

215 

216 Notes 

217 ----- 

218 This differs from the pydantic "construct" method in that the arguments 

219 are explicitly what the model requires, and it will recurse through 

220 members, constructing them from their corresponding `direct` methods. 

221 

222 The ``id`` parameter is a string representation of dataset ID, it is 

223 converted to UUID by this method. 

224 

225 This method should only be called when the inputs are trusted. 

226 """ 

227 node = SerializedDatasetRef.__new__(cls) 

228 setter = object.__setattr__ 

229 setter(node, "id", uuid.UUID(id)) 

230 setter( 

231 node, 

232 "datasetType", 

233 datasetType if datasetType is None else SerializedDatasetType.direct(**datasetType), 

234 ) 

235 setter(node, "dataId", dataId if dataId is None else SerializedDataCoordinate.direct(**dataId)) 

236 setter(node, "run", sys.intern(run)) 

237 setter(node, "component", component) 

238 setter(node, "__fields_set__", _serializedDatasetRefFieldsSet) 

239 return node 

240 

241 

242DatasetId = uuid.UUID 

243"""A type-annotation alias for dataset ID providing typing flexibility. 

244""" 

245 

246 

247@immutable 

248class DatasetRef: 

249 """Reference to a Dataset in a `Registry`. 

250 

251 A `DatasetRef` may point to a Dataset that currently does not yet exist 

252 (e.g., because it is a predicted input for provenance). 

253 

254 Parameters 

255 ---------- 

256 datasetType : `DatasetType` 

257 The `DatasetType` for this Dataset. 

258 dataId : `DataCoordinate` 

259 A mapping of dimensions that labels the Dataset within a Collection. 

260 run : `str` 

261 The name of the run this dataset was associated with when it was 

262 created. 

263 id : `DatasetId`, optional 

264 The unique identifier assigned when the dataset is created. If ``id`` 

265 is not specified, a new unique ID will be created. 

266 conform : `bool`, optional 

267 If `True` (default), call `DataCoordinate.standardize` to ensure that 

268 the data ID's dimensions are consistent with the dataset type's. 

269 `DatasetRef` instances for which those dimensions are not equal should 

270 not be created in new code, but are still supported for backwards 

271 compatibility. New code should only pass `False` if it can guarantee 

272 that the dimensions are already consistent. 

273 id_generation_mode : `DatasetIdGenEnum` 

274 ID generation option. `~DatasetIdGenEnum.UNIQUE` makes a random 

275 UUID4-type ID. `~DatasetIdGenEnum.DATAID_TYPE` makes a 

276 deterministic UUID5-type ID based on a dataset type name and 

277 ``dataId``. `~DatasetIdGenEnum.DATAID_TYPE_RUN` makes a 

278 deterministic UUID5-type ID based on a dataset type name, run 

279 collection name, and ``dataId``. 

280 

281 See Also 

282 -------- 

283 :ref:`daf_butler_organizing_datasets` 

284 """ 

285 

286 _serializedType = SerializedDatasetRef 

287 __slots__ = ( 

288 "_id", 

289 "datasetType", 

290 "dataId", 

291 "run", 

292 ) 

293 

294 def __init__( 

295 self, 

296 datasetType: DatasetType, 

297 dataId: DataCoordinate, 

298 run: str, 

299 *, 

300 id: DatasetId | None = None, 

301 conform: bool = True, 

302 id_generation_mode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE, 

303 ): 

304 self.datasetType = datasetType 

305 if conform: 

306 self.dataId = DataCoordinate.standardize(dataId, graph=datasetType.dimensions) 

307 else: 

308 self.dataId = dataId 

309 self.run = run 

310 if id is not None: 

311 self._id = id.int 

312 else: 

313 self._id = ( 

314 DatasetIdFactory() 

315 .makeDatasetId(self.run, self.datasetType, self.dataId, id_generation_mode) 

316 .int 

317 ) 

318 

319 @property 

320 def id(self) -> DatasetId: 

321 """Primary key of the dataset (`DatasetId`). 

322 

323 Cannot be changed after a `DatasetRef` is constructed. 

324 """ 

325 return uuid.UUID(int=self._id) 

326 

327 def __eq__(self, other: Any) -> bool: 

328 try: 

329 return (self.datasetType, self.dataId, self.id) == (other.datasetType, other.dataId, other.id) 

330 except AttributeError: 

331 return NotImplemented 

332 

333 def __hash__(self) -> int: 

334 return hash((self.datasetType, self.dataId, self.id)) 

335 

336 @property 

337 def dimensions(self) -> DimensionGraph: 

338 """Dimensions associated with the underlying `DatasetType`.""" 

339 return self.datasetType.dimensions 

340 

341 def __repr__(self) -> str: 

342 # We delegate to __str__ (i.e use "!s") for the data ID) below because 

343 # DataCoordinate's __repr__ - while adhering to the guidelines for 

344 # __repr__ - is much harder to users to read, while its __str__ just 

345 # produces a dict that can also be passed to DatasetRef's constructor. 

346 return f"DatasetRef({self.datasetType!r}, {self.dataId!s}, run={self.run!r}, id={self.id})" 

347 

348 def __str__(self) -> str: 

349 s = ( 

350 f"{self.datasetType.name}@{self.dataId!s} [sc={self.datasetType.storageClass_name}]" 

351 f" (run={self.run} id={self.id})" 

352 ) 

353 return s 

354 

355 def __lt__(self, other: Any) -> bool: 

356 # Sort by run, DatasetType name and then by DataCoordinate 

357 # The __str__ representation is probably close enough but we 

358 # need to ensure that sorting a DatasetRef matches what you would 

359 # get if you sorted DatasetType+DataCoordinate 

360 if not isinstance(other, type(self)): 

361 return NotImplemented 

362 

363 # Group by run if defined, takes precedence over DatasetType 

364 self_run = "" if self.run is None else self.run 

365 other_run = "" if other.run is None else other.run 

366 

367 # Compare tuples in the priority order 

368 return (self_run, self.datasetType, self.dataId) < (other_run, other.datasetType, other.dataId) 

369 

370 def to_simple(self, minimal: bool = False) -> SerializedDatasetRef: 

371 """Convert this class to a simple python type. 

372 

373 This makes it suitable for serialization. 

374 

375 Parameters 

376 ---------- 

377 minimal : `bool`, optional 

378 Use minimal serialization. Requires Registry to convert 

379 back to a full type. 

380 

381 Returns 

382 ------- 

383 simple : `dict` or `int` 

384 The object converted to a dictionary. 

385 """ 

386 if minimal: 

387 # The only thing needed to uniquely define a DatasetRef is its id 

388 # so that can be used directly if it is not a component DatasetRef. 

389 # Store is in a dict to allow us to easily add the planned origin 

390 # information later without having to support an int and dict in 

391 # simple form. 

392 simple: dict[str, Any] = {"id": self.id} 

393 if self.isComponent(): 

394 # We can still be a little minimalist with a component 

395 # but we will also need to record the datasetType component 

396 simple["component"] = self.datasetType.component() 

397 return SerializedDatasetRef(**simple) 

398 

399 return SerializedDatasetRef( 

400 datasetType=self.datasetType.to_simple(minimal=minimal), 

401 dataId=self.dataId.to_simple(), 

402 run=self.run, 

403 id=self.id, 

404 ) 

405 

406 @classmethod 

407 def from_simple( 

408 cls, 

409 simple: SerializedDatasetRef, 

410 universe: DimensionUniverse | None = None, 

411 registry: Registry | None = None, 

412 datasetType: DatasetType | None = None, 

413 ) -> DatasetRef: 

414 """Construct a new object from simplified form. 

415 

416 Generally this is data returned from the `to_simple` method. 

417 

418 Parameters 

419 ---------- 

420 simple : `dict` of [`str`, `Any`] 

421 The value returned by `to_simple()`. 

422 universe : `DimensionUniverse` 

423 The special graph of all known dimensions. 

424 Can be `None` if a registry is provided. 

425 registry : `lsst.daf.butler.Registry`, optional 

426 Registry to use to convert simple form of a DatasetRef to 

427 a full `DatasetRef`. Can be `None` if a full description of 

428 the type is provided along with a universe. 

429 datasetType : DatasetType, optional 

430 If datasetType is supplied, this will be used as the datasetType 

431 object in the resulting DatasetRef instead of being read from 

432 the `SerializedDatasetRef`. This is useful when many refs share 

433 the same type as memory can be saved. Defaults to None. 

434 

435 Returns 

436 ------- 

437 ref : `DatasetRef` 

438 Newly-constructed object. 

439 """ 

440 cache = PersistenceContextVars.datasetRefs.get() 

441 localName = sys.intern( 

442 datasetType.name 

443 if datasetType is not None 

444 else (x.name if (x := simple.datasetType) is not None else "") 

445 ) 

446 key = (simple.id.int, localName) 

447 if cache is not None and (cachedRef := cache.get(key, None)) is not None: 

448 return cachedRef 

449 # Minimalist component will just specify component and id and 

450 # require registry to reconstruct 

451 if not (simple.datasetType is not None or simple.dataId is not None or simple.run is not None): 

452 if registry is None: 

453 raise ValueError("Registry is required to construct component DatasetRef from integer id") 

454 if simple.id is None: 

455 raise ValueError("For minimal DatasetRef the ID must be defined.") 

456 ref = registry.getDataset(simple.id) 

457 if ref is None: 

458 raise RuntimeError(f"No matching dataset found in registry for id {simple.id}") 

459 if simple.component: 

460 ref = ref.makeComponentRef(simple.component) 

461 if cache is not None: 

462 cache[key] = ref 

463 return ref 

464 

465 if universe is None and registry is None: 

466 raise ValueError("One of universe or registry must be provided.") 

467 

468 if universe is None and registry is not None: 

469 universe = registry.dimensions 

470 

471 if universe is None: 

472 # this is for mypy 

473 raise ValueError("Unable to determine a usable universe") 

474 

475 if simple.datasetType is None and datasetType is None: 

476 # mypy 

477 raise ValueError("The DatasetType must be specified to construct a DatasetRef") 

478 if datasetType is None: 

479 if simple.datasetType is None: 

480 raise ValueError("Cannot determine Dataset type of this serialized class") 

481 datasetType = DatasetType.from_simple(simple.datasetType, universe=universe, registry=registry) 

482 

483 if simple.dataId is None: 

484 # mypy 

485 raise ValueError("The DataId must be specified to construct a DatasetRef") 

486 dataId = DataCoordinate.from_simple(simple.dataId, universe=universe) 

487 

488 # Check that simple ref is resolved. 

489 if simple.run is None: 

490 dstr = "" 

491 if simple.datasetType is None: 

492 dstr = f" (datasetType={datasetType.name!r})" 

493 raise ValueError( 

494 "Run collection name is missing from serialized representation. " 

495 f"Encountered with {simple!r}{dstr}." 

496 ) 

497 

498 newRef = cls(datasetType, dataId, id=simple.id, run=simple.run) 

499 if cache is not None: 

500 cache[key] = newRef 

501 return newRef 

502 

503 to_json = to_json_pydantic 

504 from_json: ClassVar = classmethod(from_json_pydantic) 

505 

506 @classmethod 

507 def _unpickle( 

508 cls, 

509 datasetType: DatasetType, 

510 dataId: DataCoordinate, 

511 id: DatasetId, 

512 run: str, 

513 ) -> DatasetRef: 

514 """Create new `DatasetRef`. 

515 

516 A custom factory method for use by `__reduce__` as a workaround for 

517 its lack of support for keyword arguments. 

518 """ 

519 return cls(datasetType, dataId, id=id, run=run) 

520 

521 def __reduce__(self) -> tuple: 

522 return (self._unpickle, (self.datasetType, self.dataId, self.id, self.run)) 

523 

524 def __deepcopy__(self, memo: dict) -> DatasetRef: 

525 # DatasetRef is recursively immutable; see note in @immutable 

526 # decorator. 

527 return self 

528 

529 def expanded(self, dataId: DataCoordinate) -> DatasetRef: 

530 """Return a new `DatasetRef` with the given expanded data ID. 

531 

532 Parameters 

533 ---------- 

534 dataId : `DataCoordinate` 

535 Data ID for the new `DatasetRef`. Must compare equal to the 

536 original data ID. 

537 

538 Returns 

539 ------- 

540 ref : `DatasetRef` 

541 A new `DatasetRef` with the given data ID. 

542 """ 

543 assert dataId == self.dataId 

544 return DatasetRef( 

545 datasetType=self.datasetType, dataId=dataId, id=self.id, run=self.run, conform=False 

546 ) 

547 

548 def isComponent(self) -> bool: 

549 """Indicate whether this `DatasetRef` refers to a component. 

550 

551 Returns 

552 ------- 

553 isComponent : `bool` 

554 `True` if this `DatasetRef` is a component, `False` otherwise. 

555 """ 

556 return self.datasetType.isComponent() 

557 

558 def isComposite(self) -> bool: 

559 """Boolean indicating whether this `DatasetRef` is a composite type. 

560 

561 Returns 

562 ------- 

563 isComposite : `bool` 

564 `True` if this `DatasetRef` is a composite type, `False` 

565 otherwise. 

566 """ 

567 return self.datasetType.isComposite() 

568 

569 def _lookupNames(self) -> tuple[LookupKey, ...]: 

570 """Name keys to use when looking up this DatasetRef in a configuration. 

571 

572 The names are returned in order of priority. 

573 

574 Returns 

575 ------- 

576 names : `tuple` of `LookupKey` 

577 Tuple of the `DatasetType` name and the `StorageClass` name. 

578 If ``instrument`` is defined in the dataId, each of those names 

579 is added to the start of the tuple with a key derived from the 

580 value of ``instrument``. 

581 """ 

582 # Special case the instrument Dimension since we allow configs 

583 # to include the instrument name in the hierarchy. 

584 names: tuple[LookupKey, ...] = self.datasetType._lookupNames() 

585 

586 if "instrument" in self.dataId: 

587 names = tuple(n.clone(dataId={"instrument": self.dataId["instrument"]}) for n in names) + names 

588 

589 return names 

590 

591 @staticmethod 

592 def groupByType(refs: Iterable[DatasetRef]) -> NamedKeyDict[DatasetType, list[DatasetRef]]: 

593 """Group an iterable of `DatasetRef` by `DatasetType`. 

594 

595 Parameters 

596 ---------- 

597 refs : `~collections.abc.Iterable` [ `DatasetRef` ] 

598 `DatasetRef` instances to group. 

599 

600 Returns 

601 ------- 

602 grouped : `NamedKeyDict` [ `DatasetType`, `list` [ `DatasetRef` ] ] 

603 Grouped `DatasetRef` instances. 

604 

605 Notes 

606 ----- 

607 When lazy item-iterables are acceptable instead of a full mapping, 

608 `iter_by_type` can in some cases be far more efficient. 

609 """ 

610 result: NamedKeyDict[DatasetType, list[DatasetRef]] = NamedKeyDict() 

611 for ref in refs: 

612 result.setdefault(ref.datasetType, []).append(ref) 

613 return result 

614 

615 @staticmethod 

616 def iter_by_type( 

617 refs: Iterable[DatasetRef], 

618 ) -> Iterable[tuple[DatasetType, Iterable[DatasetRef]]]: 

619 """Group an iterable of `DatasetRef` by `DatasetType` with special 

620 hooks for custom iterables that can do this efficiently. 

621 

622 Parameters 

623 ---------- 

624 refs : `~collections.abc.Iterable` [ `DatasetRef` ] 

625 `DatasetRef` instances to group. If this satisfies the 

626 `_DatasetRefGroupedIterable` protocol, its 

627 `~_DatasetRefGroupedIterable._iter_by_dataset_type` method will 

628 be called. 

629 

630 Returns 

631 ------- 

632 grouped : `~collections.abc.Iterable` [ `tuple` [ `DatasetType`, \ 

633 `Iterable` [ `DatasetRef` ] ]] 

634 Grouped `DatasetRef` instances. 

635 """ 

636 if isinstance(refs, _DatasetRefGroupedIterable): 

637 return refs._iter_by_dataset_type() 

638 return DatasetRef.groupByType(refs).items() 

639 

640 def makeCompositeRef(self) -> DatasetRef: 

641 """Create a `DatasetRef` of the composite from a component ref. 

642 

643 Requires that this `DatasetRef` is a component. 

644 

645 Returns 

646 ------- 

647 ref : `DatasetRef` 

648 A `DatasetRef` with a dataset type that corresponds to the 

649 composite parent of this component, and the same ID and run 

650 (which may be `None`, if they are `None` in ``self``). 

651 """ 

652 # Assume that the data ID does not need to be standardized 

653 # and should match whatever this ref already has. 

654 return DatasetRef( 

655 self.datasetType.makeCompositeDatasetType(), self.dataId, id=self.id, run=self.run, conform=False 

656 ) 

657 

658 def makeComponentRef(self, name: str) -> DatasetRef: 

659 """Create a `DatasetRef` that corresponds to a component. 

660 

661 Parameters 

662 ---------- 

663 name : `str` 

664 Name of the component. 

665 

666 Returns 

667 ------- 

668 ref : `DatasetRef` 

669 A `DatasetRef` with a dataset type that corresponds to the given 

670 component, and the same ID and run 

671 (which may be `None`, if they are `None` in ``self``). 

672 """ 

673 # Assume that the data ID does not need to be standardized 

674 # and should match whatever this ref already has. 

675 return DatasetRef( 

676 self.datasetType.makeComponentDatasetType(name), 

677 self.dataId, 

678 id=self.id, 

679 run=self.run, 

680 conform=False, 

681 ) 

682 

683 def overrideStorageClass(self, storageClass: str | StorageClass) -> DatasetRef: 

684 """Create a new `DatasetRef` from this one, but with a modified 

685 `DatasetType` that has a different `StorageClass`. 

686 

687 Parameters 

688 ---------- 

689 storageClass : `str` or `StorageClass` 

690 The new storage class. 

691 

692 Returns 

693 ------- 

694 modified : `DatasetRef` 

695 A new dataset reference that is the same as the current one but 

696 with a different storage class in the `DatasetType`. 

697 """ 

698 return DatasetRef( 

699 datasetType=self.datasetType.overrideStorageClass(storageClass), 

700 dataId=self.dataId, 

701 id=self.id, 

702 run=self.run, 

703 conform=False, 

704 ) 

705 

706 def is_compatible_with(self, ref: DatasetRef) -> bool: 

707 """Determine if the given `DatasetRef` is compatible with this one. 

708 

709 Parameters 

710 ---------- 

711 other : `DatasetRef` 

712 Dataset ref to check. 

713 

714 Returns 

715 ------- 

716 is_compatible : `bool` 

717 Returns `True` if the other dataset ref is either the same as this 

718 or the dataset type associated with the other is compatible with 

719 this one and the dataId and dataset ID match. 

720 

721 Notes 

722 ----- 

723 Compatibility requires that the dataId and dataset ID match and the 

724 `DatasetType` is compatible. Compatibility is defined as the storage 

725 class associated with the dataset type of the other ref can be 

726 converted to this storage class. 

727 

728 Specifically this means that if you have done: 

729 

730 .. code-block:: py 

731 

732 new_ref = ref.overrideStorageClass(sc) 

733 

734 and this is successful, then the guarantee is that: 

735 

736 .. code-block:: py 

737 

738 assert ref.is_compatible_with(new_ref) is True 

739 

740 since we know that the python type associated with the new ref can 

741 be converted to the original python type. The reverse is not guaranteed 

742 and depends on whether bidirectional converters have been registered. 

743 """ 

744 if self.id != ref.id: 

745 return False 

746 if self.dataId != ref.dataId: 

747 return False 

748 if self.run != ref.run: 

749 return False 

750 return self.datasetType.is_compatible_with(ref.datasetType) 

751 

752 datasetType: DatasetType 

753 """The definition of this dataset (`DatasetType`). 

754 

755 Cannot be changed after a `DatasetRef` is constructed. 

756 """ 

757 

758 dataId: DataCoordinate 

759 """A mapping of `Dimension` primary key values that labels the dataset 

760 within a Collection (`DataCoordinate`). 

761 

762 Cannot be changed after a `DatasetRef` is constructed. 

763 """ 

764 

765 run: str 

766 """The name of the run that produced the dataset. 

767 

768 Cannot be changed after a `DatasetRef` is constructed. 

769 """