Coverage for python/lsst/daf/butler/core/datasets/ref.py: 34%

221 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-08-12 09:20 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = [ 

24 "AmbiguousDatasetError", 

25 "DatasetId", 

26 "DatasetIdFactory", 

27 "DatasetIdGenEnum", 

28 "DatasetRef", 

29 "SerializedDatasetRef", 

30] 

31 

32import enum 

33import sys 

34import uuid 

35from collections.abc import Iterable 

36from typing import TYPE_CHECKING, Any, ClassVar, Protocol, TypeAlias, runtime_checkable 

37 

38import pydantic 

39from lsst.daf.butler._compat import PYDANTIC_V2, _BaseModelCompat 

40from lsst.utils.classes import immutable 

41from pydantic import StrictStr 

42 

43from ..configSupport import LookupKey 

44from ..dimensions import DataCoordinate, DimensionGraph, DimensionUniverse, SerializedDataCoordinate 

45from ..json import from_json_pydantic, to_json_pydantic 

46from ..named import NamedKeyDict 

47from ..persistenceContext import PersistenceContextVars 

48from .type import DatasetType, SerializedDatasetType 

49 

50if TYPE_CHECKING: 

51 from ...registry import Registry 

52 from ..storageClass import StorageClass 

53 

54 

55class AmbiguousDatasetError(Exception): 

56 """Raised when a `DatasetRef` is not resolved but should be. 

57 

58 This happens when the `DatasetRef` has no ID or run but the requested 

59 operation requires one of them. 

60 """ 

61 

62 

63@runtime_checkable 

64class _DatasetRefGroupedIterable(Protocol): 

65 """A package-private interface for iterables of `DatasetRef` that know how 

66 to efficiently group their contents by `DatasetType`. 

67 

68 """ 

69 

70 def _iter_by_dataset_type(self) -> Iterable[tuple[DatasetType, Iterable[DatasetRef]]]: 

71 """Iterate over `DatasetRef` instances, one `DatasetType` at a time. 

72 

73 Returns 

74 ------- 

75 grouped : `~collections.abc.Iterable` [ `tuple` [ `DatasetType`, \ 

76 `~collections.abc.Iterable` [ `DatasetRef` ] 

77 An iterable of tuples, in which the first element is a dataset type 

78 and the second is an iterable of `DatasetRef` objects with exactly 

79 that dataset type. 

80 """ 

81 ... 

82 

83 

84class DatasetIdGenEnum(enum.Enum): 

85 """Enum used to specify dataset ID generation options.""" 

86 

87 UNIQUE = 0 

88 """Unique mode generates unique ID for each inserted dataset, e.g. 

89 auto-generated by database or random UUID. 

90 """ 

91 

92 DATAID_TYPE = 1 

93 """In this mode ID is computed deterministically from a combination of 

94 dataset type and dataId. 

95 """ 

96 

97 DATAID_TYPE_RUN = 2 

98 """In this mode ID is computed deterministically from a combination of 

99 dataset type, dataId, and run collection name. 

100 """ 

101 

102 

103class DatasetIdFactory: 

104 """Factory for dataset IDs (UUIDs). 

105 

106 For now the logic is hard-coded and is controlled by the user-provided 

107 value of `DatasetIdGenEnum`. In the future we may implement a configurable 

108 logic that can guess `DatasetIdGenEnum` value from other parameters. 

109 """ 

110 

111 NS_UUID = uuid.UUID("840b31d9-05cd-5161-b2c8-00d32b280d0f") 

112 """Namespace UUID used for UUID5 generation. Do not change. This was 

113 produced by `uuid.uuid5(uuid.NAMESPACE_DNS, "lsst.org")`. 

114 """ 

115 

116 def makeDatasetId( 

117 self, 

118 run: str, 

119 datasetType: DatasetType, 

120 dataId: DataCoordinate, 

121 idGenerationMode: DatasetIdGenEnum, 

122 ) -> uuid.UUID: 

123 """Generate dataset ID for a dataset. 

124 

125 Parameters 

126 ---------- 

127 run : `str` 

128 Name of the RUN collection for the dataset. 

129 datasetType : `DatasetType` 

130 Dataset type. 

131 dataId : `DataCoordinate` 

132 Expanded data ID for the dataset. 

133 idGenerationMode : `DatasetIdGenEnum` 

134 ID generation option. `~DatasetIdGenEnum.UNIQUE` makes a random 

135 UUID4-type ID. `~DatasetIdGenEnum.DATAID_TYPE` makes a 

136 deterministic UUID5-type ID based on a dataset type name and 

137 ``dataId``. `~DatasetIdGenEnum.DATAID_TYPE_RUN` makes a 

138 deterministic UUID5-type ID based on a dataset type name, run 

139 collection name, and ``dataId``. 

140 

141 Returns 

142 ------- 

143 datasetId : `uuid.UUID` 

144 Dataset identifier. 

145 """ 

146 if idGenerationMode is DatasetIdGenEnum.UNIQUE: 

147 return uuid.uuid4() 

148 else: 

149 # WARNING: If you modify this code make sure that the order of 

150 # items in the `items` list below never changes. 

151 items: list[tuple[str, str]] = [] 

152 if idGenerationMode is DatasetIdGenEnum.DATAID_TYPE: 

153 items = [ 

154 ("dataset_type", datasetType.name), 

155 ] 

156 elif idGenerationMode is DatasetIdGenEnum.DATAID_TYPE_RUN: 

157 items = [ 

158 ("dataset_type", datasetType.name), 

159 ("run", run), 

160 ] 

161 else: 

162 raise ValueError(f"Unexpected ID generation mode: {idGenerationMode}") 

163 

164 for name, value in sorted(dataId.byName().items()): 

165 items.append((name, str(value))) 

166 data = ",".join(f"{key}={value}" for key, value in items) 

167 return uuid.uuid5(self.NS_UUID, data) 

168 

169 

170# This is constant, so don't recreate a set for each instance 

171_serializedDatasetRefFieldsSet = {"id", "datasetType", "dataId", "run", "component"} 

172 

173 

174class SerializedDatasetRef(_BaseModelCompat): 

175 """Simplified model of a `DatasetRef` suitable for serialization.""" 

176 

177 id: uuid.UUID 

178 datasetType: SerializedDatasetType | None = None 

179 dataId: SerializedDataCoordinate | None = None 

180 run: StrictStr | None = None 

181 component: StrictStr | None = None 

182 

183 if PYDANTIC_V2: 183 ↛ 186line 183 didn't jump to line 186, because the condition on line 183 was never true

184 # Can not use "after" validator since in some cases the validator 

185 # seems to trigger with the datasetType field not yet set. 

186 @pydantic.model_validator(mode="before") # type: ignore[attr-defined] 

187 @classmethod 

188 def check_consistent_parameters(cls, data: dict[str, Any]) -> dict[str, Any]: 

189 has_datasetType = data.get("datasetType") is not None 

190 has_dataId = data.get("dataId") is not None 

191 if has_datasetType is not has_dataId: 

192 raise ValueError("If specifying datasetType or dataId, must specify both.") 

193 

194 if data.get("component") is not None and has_datasetType: 

195 raise ValueError("datasetType can not be set if component is given.") 

196 return data 

197 

198 else: 

199 

200 @pydantic.validator("dataId") 

201 def _check_dataId(cls, v: Any, values: dict[str, Any]) -> Any: # noqa: N805 

202 if v and (d := "datasetType") in values and values[d] is None: 

203 raise ValueError("Can not specify 'dataId' without specifying 'datasetType'") 

204 return v 

205 

206 @pydantic.validator("component") 

207 def _check_component(cls, v: Any, values: dict[str, Any]) -> Any: # noqa: N805 

208 # Component should not be given if datasetType is given 

209 if v and (d := "datasetType") in values and values[d] is not None: 

210 raise ValueError(f"datasetType ({values[d]}) can not be set if component is given ({v}).") 

211 return v 

212 

213 @classmethod 

214 def direct( 

215 cls, 

216 *, 

217 id: str, 

218 run: str, 

219 datasetType: dict[str, Any] | None = None, 

220 dataId: dict[str, Any] | None = None, 

221 component: str | None = None, 

222 ) -> SerializedDatasetRef: 

223 """Construct a `SerializedDatasetRef` directly without validators. 

224 

225 Notes 

226 ----- 

227 This differs from the pydantic "construct" method in that the arguments 

228 are explicitly what the model requires, and it will recurse through 

229 members, constructing them from their corresponding `direct` methods. 

230 

231 The ``id`` parameter is a string representation of dataset ID, it is 

232 converted to UUID by this method. 

233 

234 This method should only be called when the inputs are trusted. 

235 """ 

236 serialized_datasetType = ( 

237 SerializedDatasetType.direct(**datasetType) if datasetType is not None else None 

238 ) 

239 serialized_dataId = SerializedDataCoordinate.direct(**dataId) if dataId is not None else None 

240 

241 node = cls.model_construct( 

242 _fields_set=_serializedDatasetRefFieldsSet, 

243 id=uuid.UUID(id), 

244 datasetType=serialized_datasetType, 

245 dataId=serialized_dataId, 

246 run=sys.intern(run), 

247 component=component, 

248 ) 

249 

250 return node 

251 

252 

253DatasetId: TypeAlias = uuid.UUID 

254"""A type-annotation alias for dataset ID providing typing flexibility. 

255""" 

256 

257 

258@immutable 

259class DatasetRef: 

260 """Reference to a Dataset in a `Registry`. 

261 

262 A `DatasetRef` may point to a Dataset that currently does not yet exist 

263 (e.g., because it is a predicted input for provenance). 

264 

265 Parameters 

266 ---------- 

267 datasetType : `DatasetType` 

268 The `DatasetType` for this Dataset. 

269 dataId : `DataCoordinate` 

270 A mapping of dimensions that labels the Dataset within a Collection. 

271 run : `str` 

272 The name of the run this dataset was associated with when it was 

273 created. 

274 id : `DatasetId`, optional 

275 The unique identifier assigned when the dataset is created. If ``id`` 

276 is not specified, a new unique ID will be created. 

277 conform : `bool`, optional 

278 If `True` (default), call `DataCoordinate.standardize` to ensure that 

279 the data ID's dimensions are consistent with the dataset type's. 

280 `DatasetRef` instances for which those dimensions are not equal should 

281 not be created in new code, but are still supported for backwards 

282 compatibility. New code should only pass `False` if it can guarantee 

283 that the dimensions are already consistent. 

284 id_generation_mode : `DatasetIdGenEnum` 

285 ID generation option. `~DatasetIdGenEnum.UNIQUE` makes a random 

286 UUID4-type ID. `~DatasetIdGenEnum.DATAID_TYPE` makes a 

287 deterministic UUID5-type ID based on a dataset type name and 

288 ``dataId``. `~DatasetIdGenEnum.DATAID_TYPE_RUN` makes a 

289 deterministic UUID5-type ID based on a dataset type name, run 

290 collection name, and ``dataId``. 

291 

292 See Also 

293 -------- 

294 :ref:`daf_butler_organizing_datasets` 

295 """ 

296 

297 _serializedType = SerializedDatasetRef 

298 __slots__ = ( 

299 "_id", 

300 "datasetType", 

301 "dataId", 

302 "run", 

303 ) 

304 

305 def __init__( 

306 self, 

307 datasetType: DatasetType, 

308 dataId: DataCoordinate, 

309 run: str, 

310 *, 

311 id: DatasetId | None = None, 

312 conform: bool = True, 

313 id_generation_mode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE, 

314 ): 

315 self.datasetType = datasetType 

316 if conform: 

317 self.dataId = DataCoordinate.standardize(dataId, graph=datasetType.dimensions) 

318 else: 

319 self.dataId = dataId 

320 self.run = run 

321 if id is not None: 

322 self._id = id.int 

323 else: 

324 self._id = ( 

325 DatasetIdFactory() 

326 .makeDatasetId(self.run, self.datasetType, self.dataId, id_generation_mode) 

327 .int 

328 ) 

329 

330 @property 

331 def id(self) -> DatasetId: 

332 """Primary key of the dataset (`DatasetId`). 

333 

334 Cannot be changed after a `DatasetRef` is constructed. 

335 """ 

336 return uuid.UUID(int=self._id) 

337 

338 def __eq__(self, other: Any) -> bool: 

339 try: 

340 return (self.datasetType, self.dataId, self.id) == (other.datasetType, other.dataId, other.id) 

341 except AttributeError: 

342 return NotImplemented 

343 

344 def __hash__(self) -> int: 

345 return hash((self.datasetType, self.dataId, self.id)) 

346 

347 @property 

348 def dimensions(self) -> DimensionGraph: 

349 """Dimensions associated with the underlying `DatasetType`.""" 

350 return self.datasetType.dimensions 

351 

352 def __repr__(self) -> str: 

353 # We delegate to __str__ (i.e use "!s") for the data ID) below because 

354 # DataCoordinate's __repr__ - while adhering to the guidelines for 

355 # __repr__ - is much harder to users to read, while its __str__ just 

356 # produces a dict that can also be passed to DatasetRef's constructor. 

357 return f"DatasetRef({self.datasetType!r}, {self.dataId!s}, run={self.run!r}, id={self.id})" 

358 

359 def __str__(self) -> str: 

360 s = ( 

361 f"{self.datasetType.name}@{self.dataId!s} [sc={self.datasetType.storageClass_name}]" 

362 f" (run={self.run} id={self.id})" 

363 ) 

364 return s 

365 

366 def __lt__(self, other: Any) -> bool: 

367 # Sort by run, DatasetType name and then by DataCoordinate 

368 # The __str__ representation is probably close enough but we 

369 # need to ensure that sorting a DatasetRef matches what you would 

370 # get if you sorted DatasetType+DataCoordinate 

371 if not isinstance(other, type(self)): 

372 return NotImplemented 

373 

374 # Group by run if defined, takes precedence over DatasetType 

375 self_run = "" if self.run is None else self.run 

376 other_run = "" if other.run is None else other.run 

377 

378 # Compare tuples in the priority order 

379 return (self_run, self.datasetType, self.dataId) < (other_run, other.datasetType, other.dataId) 

380 

381 def to_simple(self, minimal: bool = False) -> SerializedDatasetRef: 

382 """Convert this class to a simple python type. 

383 

384 This makes it suitable for serialization. 

385 

386 Parameters 

387 ---------- 

388 minimal : `bool`, optional 

389 Use minimal serialization. Requires Registry to convert 

390 back to a full type. 

391 

392 Returns 

393 ------- 

394 simple : `dict` or `int` 

395 The object converted to a dictionary. 

396 """ 

397 if minimal: 

398 # The only thing needed to uniquely define a DatasetRef is its id 

399 # so that can be used directly if it is not a component DatasetRef. 

400 # Store is in a dict to allow us to easily add the planned origin 

401 # information later without having to support an int and dict in 

402 # simple form. 

403 simple: dict[str, Any] = {"id": self.id} 

404 if self.isComponent(): 

405 # We can still be a little minimalist with a component 

406 # but we will also need to record the datasetType component 

407 simple["component"] = self.datasetType.component() 

408 return SerializedDatasetRef(**simple) 

409 

410 return SerializedDatasetRef( 

411 datasetType=self.datasetType.to_simple(minimal=minimal), 

412 dataId=self.dataId.to_simple(), 

413 run=self.run, 

414 id=self.id, 

415 ) 

416 

417 @classmethod 

418 def from_simple( 

419 cls, 

420 simple: SerializedDatasetRef, 

421 universe: DimensionUniverse | None = None, 

422 registry: Registry | None = None, 

423 datasetType: DatasetType | None = None, 

424 ) -> DatasetRef: 

425 """Construct a new object from simplified form. 

426 

427 Generally this is data returned from the `to_simple` method. 

428 

429 Parameters 

430 ---------- 

431 simple : `dict` of [`str`, `Any`] 

432 The value returned by `to_simple()`. 

433 universe : `DimensionUniverse` 

434 The special graph of all known dimensions. 

435 Can be `None` if a registry is provided. 

436 registry : `lsst.daf.butler.Registry`, optional 

437 Registry to use to convert simple form of a DatasetRef to 

438 a full `DatasetRef`. Can be `None` if a full description of 

439 the type is provided along with a universe. 

440 datasetType : DatasetType, optional 

441 If datasetType is supplied, this will be used as the datasetType 

442 object in the resulting DatasetRef instead of being read from 

443 the `SerializedDatasetRef`. This is useful when many refs share 

444 the same type as memory can be saved. Defaults to None. 

445 

446 Returns 

447 ------- 

448 ref : `DatasetRef` 

449 Newly-constructed object. 

450 """ 

451 cache = PersistenceContextVars.datasetRefs.get() 

452 localName = sys.intern( 

453 datasetType.name 

454 if datasetType is not None 

455 else (x.name if (x := simple.datasetType) is not None else "") 

456 ) 

457 key = (simple.id.int, localName) 

458 if cache is not None and (cachedRef := cache.get(key, None)) is not None: 

459 return cachedRef 

460 # Minimalist component will just specify component and id and 

461 # require registry to reconstruct 

462 if not (simple.datasetType is not None or simple.dataId is not None or simple.run is not None): 

463 if registry is None: 

464 raise ValueError("Registry is required to construct component DatasetRef from integer id") 

465 if simple.id is None: 

466 raise ValueError("For minimal DatasetRef the ID must be defined.") 

467 ref = registry.getDataset(simple.id) 

468 if ref is None: 

469 raise RuntimeError(f"No matching dataset found in registry for id {simple.id}") 

470 if simple.component: 

471 ref = ref.makeComponentRef(simple.component) 

472 if cache is not None: 

473 cache[key] = ref 

474 return ref 

475 

476 if universe is None and registry is None: 

477 raise ValueError("One of universe or registry must be provided.") 

478 

479 if universe is None and registry is not None: 

480 universe = registry.dimensions 

481 

482 if universe is None: 

483 # this is for mypy 

484 raise ValueError("Unable to determine a usable universe") 

485 

486 if simple.datasetType is None and datasetType is None: 

487 # mypy 

488 raise ValueError("The DatasetType must be specified to construct a DatasetRef") 

489 if datasetType is None: 

490 if simple.datasetType is None: 

491 raise ValueError("Cannot determine Dataset type of this serialized class") 

492 datasetType = DatasetType.from_simple(simple.datasetType, universe=universe, registry=registry) 

493 

494 if simple.dataId is None: 

495 # mypy 

496 raise ValueError("The DataId must be specified to construct a DatasetRef") 

497 dataId = DataCoordinate.from_simple(simple.dataId, universe=universe) 

498 

499 # Check that simple ref is resolved. 

500 if simple.run is None: 

501 dstr = "" 

502 if simple.datasetType is None: 

503 dstr = f" (datasetType={datasetType.name!r})" 

504 raise ValueError( 

505 "Run collection name is missing from serialized representation. " 

506 f"Encountered with {simple!r}{dstr}." 

507 ) 

508 

509 newRef = cls(datasetType, dataId, id=simple.id, run=simple.run) 

510 if cache is not None: 

511 cache[key] = newRef 

512 return newRef 

513 

514 to_json = to_json_pydantic 

515 from_json: ClassVar = classmethod(from_json_pydantic) 

516 

517 @classmethod 

518 def _unpickle( 

519 cls, 

520 datasetType: DatasetType, 

521 dataId: DataCoordinate, 

522 id: DatasetId, 

523 run: str, 

524 ) -> DatasetRef: 

525 """Create new `DatasetRef`. 

526 

527 A custom factory method for use by `__reduce__` as a workaround for 

528 its lack of support for keyword arguments. 

529 """ 

530 return cls(datasetType, dataId, id=id, run=run) 

531 

532 def __reduce__(self) -> tuple: 

533 return (self._unpickle, (self.datasetType, self.dataId, self.id, self.run)) 

534 

535 def __deepcopy__(self, memo: dict) -> DatasetRef: 

536 # DatasetRef is recursively immutable; see note in @immutable 

537 # decorator. 

538 return self 

539 

540 def expanded(self, dataId: DataCoordinate) -> DatasetRef: 

541 """Return a new `DatasetRef` with the given expanded data ID. 

542 

543 Parameters 

544 ---------- 

545 dataId : `DataCoordinate` 

546 Data ID for the new `DatasetRef`. Must compare equal to the 

547 original data ID. 

548 

549 Returns 

550 ------- 

551 ref : `DatasetRef` 

552 A new `DatasetRef` with the given data ID. 

553 """ 

554 assert dataId == self.dataId 

555 return DatasetRef( 

556 datasetType=self.datasetType, dataId=dataId, id=self.id, run=self.run, conform=False 

557 ) 

558 

559 def isComponent(self) -> bool: 

560 """Indicate whether this `DatasetRef` refers to a component. 

561 

562 Returns 

563 ------- 

564 isComponent : `bool` 

565 `True` if this `DatasetRef` is a component, `False` otherwise. 

566 """ 

567 return self.datasetType.isComponent() 

568 

569 def isComposite(self) -> bool: 

570 """Boolean indicating whether this `DatasetRef` is a composite type. 

571 

572 Returns 

573 ------- 

574 isComposite : `bool` 

575 `True` if this `DatasetRef` is a composite type, `False` 

576 otherwise. 

577 """ 

578 return self.datasetType.isComposite() 

579 

580 def _lookupNames(self) -> tuple[LookupKey, ...]: 

581 """Name keys to use when looking up this DatasetRef in a configuration. 

582 

583 The names are returned in order of priority. 

584 

585 Returns 

586 ------- 

587 names : `tuple` of `LookupKey` 

588 Tuple of the `DatasetType` name and the `StorageClass` name. 

589 If ``instrument`` is defined in the dataId, each of those names 

590 is added to the start of the tuple with a key derived from the 

591 value of ``instrument``. 

592 """ 

593 # Special case the instrument Dimension since we allow configs 

594 # to include the instrument name in the hierarchy. 

595 names: tuple[LookupKey, ...] = self.datasetType._lookupNames() 

596 

597 if "instrument" in self.dataId: 

598 names = tuple(n.clone(dataId={"instrument": self.dataId["instrument"]}) for n in names) + names 

599 

600 return names 

601 

602 @staticmethod 

603 def groupByType(refs: Iterable[DatasetRef]) -> NamedKeyDict[DatasetType, list[DatasetRef]]: 

604 """Group an iterable of `DatasetRef` by `DatasetType`. 

605 

606 Parameters 

607 ---------- 

608 refs : `~collections.abc.Iterable` [ `DatasetRef` ] 

609 `DatasetRef` instances to group. 

610 

611 Returns 

612 ------- 

613 grouped : `NamedKeyDict` [ `DatasetType`, `list` [ `DatasetRef` ] ] 

614 Grouped `DatasetRef` instances. 

615 

616 Notes 

617 ----- 

618 When lazy item-iterables are acceptable instead of a full mapping, 

619 `iter_by_type` can in some cases be far more efficient. 

620 """ 

621 result: NamedKeyDict[DatasetType, list[DatasetRef]] = NamedKeyDict() 

622 for ref in refs: 

623 result.setdefault(ref.datasetType, []).append(ref) 

624 return result 

625 

626 @staticmethod 

627 def iter_by_type( 

628 refs: Iterable[DatasetRef], 

629 ) -> Iterable[tuple[DatasetType, Iterable[DatasetRef]]]: 

630 """Group an iterable of `DatasetRef` by `DatasetType` with special 

631 hooks for custom iterables that can do this efficiently. 

632 

633 Parameters 

634 ---------- 

635 refs : `~collections.abc.Iterable` [ `DatasetRef` ] 

636 `DatasetRef` instances to group. If this satisfies the 

637 `_DatasetRefGroupedIterable` protocol, its 

638 `~_DatasetRefGroupedIterable._iter_by_dataset_type` method will 

639 be called. 

640 

641 Returns 

642 ------- 

643 grouped : `~collections.abc.Iterable` [ `tuple` [ `DatasetType`, \ 

644 `Iterable` [ `DatasetRef` ] ]] 

645 Grouped `DatasetRef` instances. 

646 """ 

647 if isinstance(refs, _DatasetRefGroupedIterable): 

648 return refs._iter_by_dataset_type() 

649 return DatasetRef.groupByType(refs).items() 

650 

651 def makeCompositeRef(self) -> DatasetRef: 

652 """Create a `DatasetRef` of the composite from a component ref. 

653 

654 Requires that this `DatasetRef` is a component. 

655 

656 Returns 

657 ------- 

658 ref : `DatasetRef` 

659 A `DatasetRef` with a dataset type that corresponds to the 

660 composite parent of this component, and the same ID and run 

661 (which may be `None`, if they are `None` in ``self``). 

662 """ 

663 # Assume that the data ID does not need to be standardized 

664 # and should match whatever this ref already has. 

665 return DatasetRef( 

666 self.datasetType.makeCompositeDatasetType(), self.dataId, id=self.id, run=self.run, conform=False 

667 ) 

668 

669 def makeComponentRef(self, name: str) -> DatasetRef: 

670 """Create a `DatasetRef` that corresponds to a component. 

671 

672 Parameters 

673 ---------- 

674 name : `str` 

675 Name of the component. 

676 

677 Returns 

678 ------- 

679 ref : `DatasetRef` 

680 A `DatasetRef` with a dataset type that corresponds to the given 

681 component, and the same ID and run 

682 (which may be `None`, if they are `None` in ``self``). 

683 """ 

684 # Assume that the data ID does not need to be standardized 

685 # and should match whatever this ref already has. 

686 return DatasetRef( 

687 self.datasetType.makeComponentDatasetType(name), 

688 self.dataId, 

689 id=self.id, 

690 run=self.run, 

691 conform=False, 

692 ) 

693 

694 def overrideStorageClass(self, storageClass: str | StorageClass) -> DatasetRef: 

695 """Create a new `DatasetRef` from this one, but with a modified 

696 `DatasetType` that has a different `StorageClass`. 

697 

698 Parameters 

699 ---------- 

700 storageClass : `str` or `StorageClass` 

701 The new storage class. 

702 

703 Returns 

704 ------- 

705 modified : `DatasetRef` 

706 A new dataset reference that is the same as the current one but 

707 with a different storage class in the `DatasetType`. 

708 """ 

709 return DatasetRef( 

710 datasetType=self.datasetType.overrideStorageClass(storageClass), 

711 dataId=self.dataId, 

712 id=self.id, 

713 run=self.run, 

714 conform=False, 

715 ) 

716 

717 def is_compatible_with(self, ref: DatasetRef) -> bool: 

718 """Determine if the given `DatasetRef` is compatible with this one. 

719 

720 Parameters 

721 ---------- 

722 other : `DatasetRef` 

723 Dataset ref to check. 

724 

725 Returns 

726 ------- 

727 is_compatible : `bool` 

728 Returns `True` if the other dataset ref is either the same as this 

729 or the dataset type associated with the other is compatible with 

730 this one and the dataId and dataset ID match. 

731 

732 Notes 

733 ----- 

734 Compatibility requires that the dataId and dataset ID match and the 

735 `DatasetType` is compatible. Compatibility is defined as the storage 

736 class associated with the dataset type of the other ref can be 

737 converted to this storage class. 

738 

739 Specifically this means that if you have done: 

740 

741 .. code-block:: py 

742 

743 new_ref = ref.overrideStorageClass(sc) 

744 

745 and this is successful, then the guarantee is that: 

746 

747 .. code-block:: py 

748 

749 assert ref.is_compatible_with(new_ref) is True 

750 

751 since we know that the python type associated with the new ref can 

752 be converted to the original python type. The reverse is not guaranteed 

753 and depends on whether bidirectional converters have been registered. 

754 """ 

755 if self.id != ref.id: 

756 return False 

757 if self.dataId != ref.dataId: 

758 return False 

759 if self.run != ref.run: 

760 return False 

761 return self.datasetType.is_compatible_with(ref.datasetType) 

762 

763 datasetType: DatasetType 

764 """The definition of this dataset (`DatasetType`). 

765 

766 Cannot be changed after a `DatasetRef` is constructed. 

767 """ 

768 

769 dataId: DataCoordinate 

770 """A mapping of `Dimension` primary key values that labels the dataset 

771 within a Collection (`DataCoordinate`). 

772 

773 Cannot be changed after a `DatasetRef` is constructed. 

774 """ 

775 

776 run: str 

777 """The name of the run that produced the dataset. 

778 

779 Cannot be changed after a `DatasetRef` is constructed. 

780 """