Coverage for python/lsst/daf/butler/core/datasets/ref.py: 29%

242 statements  

« prev     ^ index     » next       coverage.py v7.2.5, created at 2023-05-18 09:13 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = [ 

24 "AmbiguousDatasetError", 

25 "DatasetId", 

26 "DatasetIdFactory", 

27 "DatasetIdGenEnum", 

28 "DatasetRef", 

29 "SerializedDatasetRef", 

30 "UnresolvedRefWarning", 

31] 

32 

33import enum 

34import inspect 

35import uuid 

36import warnings 

37from typing import TYPE_CHECKING, Any, ClassVar, Dict, Iterable, List, Optional, Tuple, Union 

38 

39from deprecated.sphinx import deprecated 

40from lsst.utils.classes import immutable 

41from pydantic import BaseModel, ConstrainedInt, StrictStr, validator 

42 

43from ..configSupport import LookupKey 

44from ..dimensions import DataCoordinate, DimensionGraph, DimensionUniverse, SerializedDataCoordinate 

45from ..json import from_json_pydantic, to_json_pydantic 

46from ..named import NamedKeyDict 

47from .type import DatasetType, SerializedDatasetType 

48 

49if TYPE_CHECKING: 

50 from ...registry import Registry 

51 from ..storageClass import StorageClass 

52 

53 

54class UnresolvedRefWarning(FutureWarning): 

55 """Warnings concerning the usage of unresolved DatasetRefs.""" 

56 

57 

58class AmbiguousDatasetError(Exception): 

59 """Raised when a `DatasetRef` is not resolved but should be. 

60 

61 This happens when the `DatasetRef` has no ID or run but the requested 

62 operation requires one of them. 

63 """ 

64 

65 

66class PositiveInt(ConstrainedInt): 

67 ge = 0 

68 strict = True 

69 

70 

71def _find_outside_stacklevel() -> int: 

72 """Find the stacklevel for outside of lsst.daf.butler""" 

73 stacklevel = 1 

74 for i, s in enumerate(inspect.stack()): 

75 module = inspect.getmodule(s.frame) 

76 # Stack frames sometimes hang around so explicitly delete. 

77 del s 

78 if module is None: 

79 continue 

80 if not module.__name__.startswith("lsst.daf.butler"): 

81 # 0 will be this function. 

82 # 1 will be the caller 

83 # and so does not need adjustment. 

84 stacklevel = i 

85 break 

86 

87 return stacklevel 

88 

89 

90class DatasetIdGenEnum(enum.Enum): 

91 """This enum is used to specify dataset ID generation options.""" 

92 

93 UNIQUE = 0 

94 """Unique mode generates unique ID for each inserted dataset, e.g. 

95 auto-generated by database or random UUID. 

96 """ 

97 

98 DATAID_TYPE = 1 

99 """In this mode ID is computed deterministically from a combination of 

100 dataset type and dataId. 

101 """ 

102 

103 DATAID_TYPE_RUN = 2 

104 """In this mode ID is computed deterministically from a combination of 

105 dataset type, dataId, and run collection name. 

106 """ 

107 

108 

109class DatasetIdFactory: 

110 """Factory for dataset IDs (UUIDs). 

111 

112 For now the logic is hard-coded and is controlled by the user-provided 

113 value of `DatasetIdGenEnum`. In the future we may implement a configurable 

114 logic that can guess `DatasetIdGenEnum` value from other parameters. 

115 """ 

116 

117 NS_UUID = uuid.UUID("840b31d9-05cd-5161-b2c8-00d32b280d0f") 

118 """Namespace UUID used for UUID5 generation. Do not change. This was 

119 produced by `uuid.uuid5(uuid.NAMESPACE_DNS, "lsst.org")`. 

120 """ 

121 

122 def makeDatasetId( 

123 self, 

124 run: str, 

125 datasetType: DatasetType, 

126 dataId: DataCoordinate, 

127 idGenerationMode: DatasetIdGenEnum, 

128 ) -> uuid.UUID: 

129 """Generate dataset ID for a dataset. 

130 

131 Parameters 

132 ---------- 

133 run : `str` 

134 Name of the RUN collection for the dataset. 

135 datasetType : `DatasetType` 

136 Dataset type. 

137 dataId : `DataCoordinate` 

138 Expanded data ID for the dataset. 

139 idGenerationMode : `DatasetIdGenEnum` 

140 ID generation option. `~DatasetIdGenEnum.UNIQUE` makes a random 

141 UUID4-type ID. `~DatasetIdGenEnum.DATAID_TYPE` makes a 

142 deterministic UUID5-type ID based on a dataset type name and 

143 ``dataId``. `~DatasetIdGenEnum.DATAID_TYPE_RUN` makes a 

144 deterministic UUID5-type ID based on a dataset type name, run 

145 collection name, and ``dataId``. 

146 

147 Returns 

148 ------- 

149 datasetId : `uuid.UUID` 

150 Dataset identifier. 

151 """ 

152 if idGenerationMode is DatasetIdGenEnum.UNIQUE: 

153 return uuid.uuid4() 

154 else: 

155 # WARNING: If you modify this code make sure that the order of 

156 # items in the `items` list below never changes. 

157 items: list[tuple[str, str]] = [] 

158 if idGenerationMode is DatasetIdGenEnum.DATAID_TYPE: 

159 items = [ 

160 ("dataset_type", datasetType.name), 

161 ] 

162 elif idGenerationMode is DatasetIdGenEnum.DATAID_TYPE_RUN: 

163 items = [ 

164 ("dataset_type", datasetType.name), 

165 ("run", run), 

166 ] 

167 else: 

168 raise ValueError(f"Unexpected ID generation mode: {idGenerationMode}") 

169 

170 for name, value in sorted(dataId.byName().items()): 

171 items.append((name, str(value))) 

172 data = ",".join(f"{key}={value}" for key, value in items) 

173 return uuid.uuid5(self.NS_UUID, data) 

174 

175 @deprecated( 

176 "This method will soon be removed since it will be impossible to create an unresolved ref.", 

177 version="26.0", 

178 category=UnresolvedRefWarning, 

179 ) 

180 def resolveRef( 

181 self, 

182 ref: DatasetRef, 

183 run: str, 

184 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE, 

185 ) -> DatasetRef: 

186 """Generate resolved dataset reference for predicted datasets. 

187 

188 Parameters 

189 ---------- 

190 ref : `DatasetRef` 

191 Dataset ref, can be already resolved. 

192 run : `str` 

193 Name of the RUN collection for the dataset. 

194 idGenerationMode : `DatasetIdGenEnum` 

195 ID generation option. `~DatasetIdGenEnum.UNIQUE` makes a random 

196 UUID4-type ID. `~DatasetIdGenEnum.DATAID_TYPE` makes a 

197 deterministic UUID5-type ID based on a dataset type name and 

198 ``dataId``. `~DatasetIdGenEnum.DATAID_TYPE_RUN` makes a 

199 deterministic UUID5-type ID based on a dataset type name, run 

200 collection name, and ``dataId``. 

201 

202 Returns 

203 ------- 

204 resolved : `DatasetRef` 

205 Resolved dataset ref, if input reference is already resolved it 

206 is returned without modification. 

207 

208 Notes 

209 ----- 

210 This method can only be used for predicted dataset references that do 

211 not exist yet in the database. It does not resolve existing dataset 

212 references already stored in registry. 

213 """ 

214 if ref.id is not None: 

215 return ref 

216 datasetId = self.makeDatasetId(run, ref.datasetType, ref.dataId, idGenerationMode) 

217 # Hide the warning coming from ref.resolved() 

218 with warnings.catch_warnings(): 

219 warnings.simplefilter("ignore", category=UnresolvedRefWarning) 

220 resolved = ref.resolved(datasetId, run) 

221 return resolved 

222 

223 

224class SerializedDatasetRef(BaseModel): 

225 """Simplified model of a `DatasetRef` suitable for serialization.""" 

226 

227 # DO NOT change order in the Union, pydantic is sensitive to that! 

228 id: uuid.UUID | None = None 

229 datasetType: Optional[SerializedDatasetType] = None 

230 dataId: Optional[SerializedDataCoordinate] = None 

231 run: Optional[StrictStr] = None 

232 component: Optional[StrictStr] = None 

233 

234 @validator("dataId") 

235 def _check_dataId(cls, v: Any, values: Dict[str, Any]) -> Any: # noqa: N805 

236 if (d := "datasetType") in values and values[d] is None: 

237 raise ValueError("Can not specify 'dataId' without specifying 'datasetType'") 

238 return v 

239 

240 @validator("run") 

241 def _check_run(cls, v: Any, values: Dict[str, Any]) -> Any: # noqa: N805 

242 if v and (i := "id") in values and values[i] is None: 

243 raise ValueError("'run' cannot be provided unless 'id' is.") 

244 return v 

245 

246 @validator("component") 

247 def _check_component(cls, v: Any, values: Dict[str, Any]) -> Any: # noqa: N805 

248 # Component should not be given if datasetType is given 

249 if v and (d := "datasetType") in values and values[d] is not None: 

250 raise ValueError(f"datasetType ({values[d]}) can not be set if component is given ({v}).") 

251 return v 

252 

253 @classmethod 

254 def direct( 

255 cls, 

256 *, 

257 id: Optional[Union[str, int]] = None, 

258 datasetType: Optional[Dict[str, Any]] = None, 

259 dataId: Optional[Dict[str, Any]] = None, 

260 run: str | None = None, 

261 component: Optional[str] = None, 

262 ) -> SerializedDatasetRef: 

263 """Construct a `SerializedDatasetRef` directly without validators. 

264 

265 This differs from the pydantic "construct" method in that the arguments 

266 are explicitly what the model requires, and it will recurse through 

267 members, constructing them from their corresponding `direct` methods. 

268 

269 This method should only be called when the inputs are trusted. 

270 """ 

271 node = SerializedDatasetRef.__new__(cls) 

272 setter = object.__setattr__ 

273 setter(node, "id", uuid.UUID(id) if isinstance(id, str) else id) 

274 setter( 

275 node, 

276 "datasetType", 

277 datasetType if datasetType is None else SerializedDatasetType.direct(**datasetType), 

278 ) 

279 setter(node, "dataId", dataId if dataId is None else SerializedDataCoordinate.direct(**dataId)) 

280 setter(node, "run", run) 

281 setter(node, "component", component) 

282 setter(node, "__fields_set__", {"id", "datasetType", "dataId", "run", "component"}) 

283 return node 

284 

285 

286DatasetId = uuid.UUID 

287"""A type-annotation alias for dataset ID providing typing flexibility. 

288""" 

289 

290 

291@immutable 

292class DatasetRef: 

293 """Reference to a Dataset in a `Registry`. 

294 

295 A `DatasetRef` may point to a Dataset that currently does not yet exist 

296 (e.g., because it is a predicted input for provenance). 

297 

298 Parameters 

299 ---------- 

300 datasetType : `DatasetType` 

301 The `DatasetType` for this Dataset. 

302 dataId : `DataCoordinate` 

303 A mapping of dimensions that labels the Dataset within a Collection. 

304 id : `DatasetId`, optional 

305 The unique identifier assigned when the dataset is created. If ``run`` 

306 is specified and ``id`` is not specified, an ID will be created. 

307 run : `str`, optional 

308 The name of the run this dataset was associated with when it was 

309 created. Must be provided if ``id`` is. 

310 conform : `bool`, optional 

311 If `True` (default), call `DataCoordinate.standardize` to ensure that 

312 the data ID's dimensions are consistent with the dataset type's. 

313 `DatasetRef` instances for which those dimensions are not equal should 

314 not be created in new code, but are still supported for backwards 

315 compatibility. New code should only pass `False` if it can guarantee 

316 that the dimensions are already consistent. 

317 id_generation_mode : `DatasetIdGenEnum` 

318 ID generation option. `~DatasetIdGenEnum.UNIQUE` makes a random 

319 UUID4-type ID. `~DatasetIdGenEnum.DATAID_TYPE` makes a 

320 deterministic UUID5-type ID based on a dataset type name and 

321 ``dataId``. `~DatasetIdGenEnum.DATAID_TYPE_RUN` makes a 

322 deterministic UUID5-type ID based on a dataset type name, run 

323 collection name, and ``dataId``. 

324 

325 Raises 

326 ------ 

327 ValueError 

328 Raised if ``run`` is provided but ``id`` is not, or if ``id`` is 

329 provided but ``run`` is not. 

330 

331 See Also 

332 -------- 

333 :ref:`daf_butler_organizing_datasets` 

334 """ 

335 

336 _serializedType = SerializedDatasetRef 

337 __slots__ = ( 

338 "id", 

339 "datasetType", 

340 "dataId", 

341 "run", 

342 ) 

343 

344 def __init__( 

345 self, 

346 datasetType: DatasetType, 

347 dataId: DataCoordinate, 

348 *, 

349 id: Optional[DatasetId] = None, 

350 run: Optional[str] = None, 

351 conform: bool = True, 

352 id_generation_mode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE, 

353 ): 

354 self.datasetType = datasetType 

355 if conform: 

356 self.dataId = DataCoordinate.standardize(dataId, graph=datasetType.dimensions) 

357 else: 

358 self.dataId = dataId 

359 if id is not None: 

360 if run is None: 

361 raise ValueError( 

362 f"Cannot provide id without run for dataset with id={id}, " 

363 f"type={datasetType}, and dataId={dataId}." 

364 ) 

365 self.run = run 

366 self.id = id 

367 else: 

368 if run is not None: 

369 self.run = run 

370 self.id = DatasetIdFactory().makeDatasetId( 

371 self.run, self.datasetType, self.dataId, id_generation_mode 

372 ) 

373 else: 

374 self.id = None 

375 self.run = None 

376 warnings.warn( 

377 "Support for creating unresolved refs will soon be removed. Please contact the " 

378 "middleware team for advice on modifying your code to use resolved refs.", 

379 category=UnresolvedRefWarning, 

380 stacklevel=_find_outside_stacklevel(), 

381 ) 

382 

383 def __eq__(self, other: Any) -> bool: 

384 try: 

385 return (self.datasetType, self.dataId, self.id) == (other.datasetType, other.dataId, other.id) 

386 except AttributeError: 

387 return NotImplemented 

388 

389 def __hash__(self) -> int: 

390 return hash((self.datasetType, self.dataId, self.id)) 

391 

392 @property 

393 def dimensions(self) -> DimensionGraph: 

394 """Dimensions associated with the underlying `DatasetType`.""" 

395 return self.datasetType.dimensions 

396 

397 def __repr__(self) -> str: 

398 # We delegate to __str__ (i.e use "!s") for the data ID) below because 

399 # DataCoordinate's __repr__ - while adhering to the guidelines for 

400 # __repr__ - is much harder to users to read, while its __str__ just 

401 # produces a dict that can also be passed to DatasetRef's constructor. 

402 if self.id is not None: 

403 return f"DatasetRef({self.datasetType!r}, {self.dataId!s}, id={self.id}, run={self.run!r})" 

404 else: 

405 return f"DatasetRef({self.datasetType!r}, {self.dataId!s})" 

406 

407 def __str__(self) -> str: 

408 s = f"{self.datasetType.name}@{self.dataId!s}, sc={self.datasetType.storageClass_name}]" 

409 if self.id is not None: 

410 s += f" (id={self.id})" 

411 return s 

412 

413 def __lt__(self, other: Any) -> bool: 

414 # Sort by run, DatasetType name and then by DataCoordinate 

415 # The __str__ representation is probably close enough but we 

416 # need to ensure that sorting a DatasetRef matches what you would 

417 # get if you sorted DatasetType+DataCoordinate 

418 if not isinstance(other, type(self)): 

419 return NotImplemented 

420 

421 # Group by run if defined, takes precedence over DatasetType 

422 self_run = "" if self.run is None else self.run 

423 other_run = "" if other.run is None else other.run 

424 

425 # Compare tuples in the priority order 

426 return (self_run, self.datasetType, self.dataId) < (other_run, other.datasetType, other.dataId) 

427 

428 def to_simple(self, minimal: bool = False) -> SerializedDatasetRef: 

429 """Convert this class to a simple python type. 

430 

431 This makes it suitable for serialization. 

432 

433 Parameters 

434 ---------- 

435 minimal : `bool`, optional 

436 Use minimal serialization. Requires Registry to convert 

437 back to a full type. 

438 

439 Returns 

440 ------- 

441 simple : `dict` or `int` 

442 The object converted to a dictionary. 

443 """ 

444 if minimal and self.id is not None: 

445 # The only thing needed to uniquely define a DatasetRef 

446 # is its id so that can be used directly if it is 

447 # resolved and if it is not a component DatasetRef. 

448 # Store is in a dict to allow us to easily add the planned 

449 # origin information later without having to support 

450 # an int and dict in simple form. 

451 simple: Dict[str, Any] = {"id": self.id} 

452 if self.isComponent(): 

453 # We can still be a little minimalist with a component 

454 # but we will also need to record the datasetType component 

455 simple["component"] = self.datasetType.component() 

456 return SerializedDatasetRef(**simple) 

457 

458 # Convert to a dict form 

459 as_dict: Dict[str, Any] = { 

460 "datasetType": self.datasetType.to_simple(minimal=minimal), 

461 "dataId": self.dataId.to_simple(), 

462 } 

463 

464 # Only include the id entry if it is defined 

465 if self.id is not None: 

466 as_dict["run"] = self.run 

467 as_dict["id"] = self.id 

468 

469 return SerializedDatasetRef(**as_dict) 

470 

471 @classmethod 

472 def from_simple( 

473 cls, 

474 simple: SerializedDatasetRef, 

475 universe: Optional[DimensionUniverse] = None, 

476 registry: Optional[Registry] = None, 

477 datasetType: Optional[DatasetType] = None, 

478 ) -> DatasetRef: 

479 """Construct a new object from simplified form. 

480 

481 Generally this is data returned from the `to_simple` method. 

482 

483 Parameters 

484 ---------- 

485 simple : `dict` of [`str`, `Any`] 

486 The value returned by `to_simple()`. 

487 universe : `DimensionUniverse` 

488 The special graph of all known dimensions. 

489 Can be `None` if a registry is provided. 

490 registry : `lsst.daf.butler.Registry`, optional 

491 Registry to use to convert simple form of a DatasetRef to 

492 a full `DatasetRef`. Can be `None` if a full description of 

493 the type is provided along with a universe. 

494 datasetType : DatasetType, optional 

495 If datasetType is supplied, this will be used as the datasetType 

496 object in the resulting DatasetRef instead of being read from 

497 the `SerializedDatasetRef`. This is useful when many refs share 

498 the same type as memory can be saved. Defaults to None. 

499 

500 Returns 

501 ------- 

502 ref : `DatasetRef` 

503 Newly-constructed object. 

504 """ 

505 # Minimalist component will just specify component and id and 

506 # require registry to reconstruct 

507 if set(simple.dict(exclude_unset=True, exclude_defaults=True)).issubset({"id", "component"}): 

508 if registry is None: 

509 raise ValueError("Registry is required to construct component DatasetRef from integer id") 

510 if simple.id is None: 

511 raise ValueError("For minimal DatasetRef the ID must be defined.") 

512 ref = registry.getDataset(simple.id) 

513 if ref is None: 

514 raise RuntimeError(f"No matching dataset found in registry for id {simple.id}") 

515 if simple.component: 

516 ref = ref.makeComponentRef(simple.component) 

517 return ref 

518 

519 if universe is None and registry is None: 

520 raise ValueError("One of universe or registry must be provided.") 

521 

522 if universe is None and registry is not None: 

523 universe = registry.dimensions 

524 

525 if universe is None: 

526 # this is for mypy 

527 raise ValueError("Unable to determine a usable universe") 

528 

529 if simple.datasetType is None and datasetType is None: 

530 # mypy 

531 raise ValueError("The DatasetType must be specified to construct a DatasetRef") 

532 if datasetType is None: 

533 if simple.datasetType is None: 

534 raise ValueError("Cannot determine Dataset type of this serialized class") 

535 datasetType = DatasetType.from_simple(simple.datasetType, universe=universe, registry=registry) 

536 

537 if simple.dataId is None: 

538 # mypy 

539 raise ValueError("The DataId must be specified to construct a DatasetRef") 

540 dataId = DataCoordinate.from_simple(simple.dataId, universe=universe) 

541 

542 # Issue our own warning that could be more explicit. 

543 if simple.id is None and simple.run is None: 

544 dstr = "" 

545 if simple.datasetType is None: 

546 dstr = f" (datasetType={datasetType.name!r})" 

547 warnings.warn( 

548 "Attempting to create an unresolved ref from simple form is deprecated. " 

549 f"Encountered with {simple!r}{dstr}.", 

550 category=UnresolvedRefWarning, 

551 stacklevel=_find_outside_stacklevel(), 

552 ) 

553 

554 with warnings.catch_warnings(): 

555 warnings.simplefilter("ignore", category=UnresolvedRefWarning) 

556 return cls(datasetType, dataId, id=simple.id, run=simple.run) 

557 

558 to_json = to_json_pydantic 

559 from_json: ClassVar = classmethod(from_json_pydantic) 

560 

561 @classmethod 

562 def _unpickle( 

563 cls, 

564 datasetType: DatasetType, 

565 dataId: DataCoordinate, 

566 id: Optional[DatasetId], 

567 run: Optional[str], 

568 ) -> DatasetRef: 

569 """Create new `DatasetRef`. 

570 

571 A custom factory method for use by `__reduce__` as a workaround for 

572 its lack of support for keyword arguments. 

573 """ 

574 return cls(datasetType, dataId, id=id, run=run) 

575 

576 def __reduce__(self) -> tuple: 

577 return (self._unpickle, (self.datasetType, self.dataId, self.id, self.run)) 

578 

579 def __deepcopy__(self, memo: dict) -> DatasetRef: 

580 # DatasetRef is recursively immutable; see note in @immutable 

581 # decorator. 

582 return self 

583 

584 @deprecated( 

585 "This method will soon be a no-op since it will be impossible to create an unresolved ref.", 

586 version="26.0", 

587 category=UnresolvedRefWarning, 

588 ) 

589 def resolved(self, id: DatasetId, run: str) -> DatasetRef: 

590 """Return resolved `DatasetRef`. 

591 

592 This is a new `DatasetRef` with the same data ID and dataset type 

593 and the given ID and run. 

594 

595 Parameters 

596 ---------- 

597 id : `DatasetId` 

598 The unique identifier assigned when the dataset is created. 

599 run : `str` 

600 The run this dataset was associated with when it was created. 

601 

602 Returns 

603 ------- 

604 ref : `DatasetRef` 

605 A new `DatasetRef`. 

606 """ 

607 return DatasetRef(datasetType=self.datasetType, dataId=self.dataId, id=id, run=run, conform=False) 

608 

609 @deprecated( 

610 "Support for unresolved refs will soon be removed. Please contact middleware developers with" 

611 " advice on how to modify your code.", 

612 category=UnresolvedRefWarning, 

613 version="26.0", 

614 ) 

615 def unresolved(self) -> DatasetRef: 

616 """Return unresolved `DatasetRef`. 

617 

618 This is a new `DatasetRef` with the same data ID and dataset type, 

619 but no ID or run. 

620 

621 Returns 

622 ------- 

623 ref : `DatasetRef` 

624 A new `DatasetRef`. 

625 

626 Notes 

627 ----- 

628 This can be used to compare only the data ID and dataset type of a 

629 pair of `DatasetRef` instances, regardless of whether either is 

630 resolved:: 

631 

632 if ref1.unresolved() == ref2.unresolved(): 

633 ... 

634 """ 

635 # We have already warned about this so no need to warn again. 

636 with warnings.catch_warnings(): 

637 warnings.simplefilter("ignore", category=UnresolvedRefWarning) 

638 return DatasetRef(datasetType=self.datasetType, dataId=self.dataId, conform=False) 

639 

640 def expanded(self, dataId: DataCoordinate) -> DatasetRef: 

641 """Return a new `DatasetRef` with the given expanded data ID. 

642 

643 Parameters 

644 ---------- 

645 dataId : `DataCoordinate` 

646 Data ID for the new `DatasetRef`. Must compare equal to the 

647 original data ID. 

648 

649 Returns 

650 ------- 

651 ref : `DatasetRef` 

652 A new `DatasetRef` with the given data ID. 

653 """ 

654 assert dataId == self.dataId 

655 return DatasetRef( 

656 datasetType=self.datasetType, dataId=dataId, id=self.id, run=self.run, conform=False 

657 ) 

658 

659 def isComponent(self) -> bool: 

660 """Indicate whether this `DatasetRef` refers to a component. 

661 

662 Returns 

663 ------- 

664 isComponent : `bool` 

665 `True` if this `DatasetRef` is a component, `False` otherwise. 

666 """ 

667 return self.datasetType.isComponent() 

668 

669 def isComposite(self) -> bool: 

670 """Boolean indicating whether this `DatasetRef` is a composite type. 

671 

672 Returns 

673 ------- 

674 isComposite : `bool` 

675 `True` if this `DatasetRef` is a composite type, `False` 

676 otherwise. 

677 """ 

678 return self.datasetType.isComposite() 

679 

680 def _lookupNames(self) -> Tuple[LookupKey, ...]: 

681 """Name keys to use when looking up this DatasetRef in a configuration. 

682 

683 The names are returned in order of priority. 

684 

685 Returns 

686 ------- 

687 names : `tuple` of `LookupKey` 

688 Tuple of the `DatasetType` name and the `StorageClass` name. 

689 If ``instrument`` is defined in the dataId, each of those names 

690 is added to the start of the tuple with a key derived from the 

691 value of ``instrument``. 

692 """ 

693 # Special case the instrument Dimension since we allow configs 

694 # to include the instrument name in the hierarchy. 

695 names: Tuple[LookupKey, ...] = self.datasetType._lookupNames() 

696 

697 if "instrument" in self.dataId: 

698 names = tuple(n.clone(dataId={"instrument": self.dataId["instrument"]}) for n in names) + names 

699 

700 return names 

701 

702 @staticmethod 

703 def groupByType(refs: Iterable[DatasetRef]) -> NamedKeyDict[DatasetType, List[DatasetRef]]: 

704 """Group an iterable of `DatasetRef` by `DatasetType`. 

705 

706 Parameters 

707 ---------- 

708 refs : `Iterable` [ `DatasetRef` ] 

709 `DatasetRef` instances to group. 

710 

711 Returns 

712 ------- 

713 grouped : `NamedKeyDict` [ `DatasetType`, `list` [ `DatasetRef` ] ] 

714 Grouped `DatasetRef` instances. 

715 """ 

716 result: NamedKeyDict[DatasetType, List[DatasetRef]] = NamedKeyDict() 

717 for ref in refs: 

718 result.setdefault(ref.datasetType, []).append(ref) 

719 return result 

720 

721 def getCheckedId(self) -> DatasetId: 

722 """Return ``self.id``, or raise if it is `None`. 

723 

724 This trivial method exists to allow operations that would otherwise be 

725 natural list comprehensions to check that the ID is not `None` as well. 

726 

727 Returns 

728 ------- 

729 id : `DatasetId` 

730 ``self.id`` if it is not `None`. 

731 

732 Raises 

733 ------ 

734 AmbiguousDatasetError 

735 Raised if ``ref.id`` is `None`. 

736 """ 

737 if self.id is None: 

738 raise AmbiguousDatasetError(f"ID for dataset {self} is `None`; a resolved reference is required.") 

739 return self.id 

740 

741 def makeCompositeRef(self) -> DatasetRef: 

742 """Create a `DatasetRef` of the composite from a component ref. 

743 

744 Requires that this `DatasetRef` is a component. 

745 

746 Returns 

747 ------- 

748 ref : `DatasetRef` 

749 A `DatasetRef` with a dataset type that corresponds to the 

750 composite parent of this component, and the same ID and run 

751 (which may be `None`, if they are `None` in ``self``). 

752 """ 

753 # Assume that the data ID does not need to be standardized 

754 # and should match whatever this ref already has. 

755 return DatasetRef( 

756 self.datasetType.makeCompositeDatasetType(), self.dataId, id=self.id, run=self.run, conform=False 

757 ) 

758 

759 def makeComponentRef(self, name: str) -> DatasetRef: 

760 """Create a `DatasetRef` that corresponds to a component. 

761 

762 Parameters 

763 ---------- 

764 name : `str` 

765 Name of the component. 

766 

767 Returns 

768 ------- 

769 ref : `DatasetRef` 

770 A `DatasetRef` with a dataset type that corresponds to the given 

771 component, and the same ID and run 

772 (which may be `None`, if they are `None` in ``self``). 

773 """ 

774 # Assume that the data ID does not need to be standardized 

775 # and should match whatever this ref already has. 

776 return DatasetRef( 

777 self.datasetType.makeComponentDatasetType(name), 

778 self.dataId, 

779 id=self.id, 

780 run=self.run, 

781 conform=False, 

782 ) 

783 

784 def overrideStorageClass(self, storageClass: str | StorageClass) -> DatasetRef: 

785 """Create a new `DatasetRef` from this one, but with a modified 

786 `DatasetType` that has a different `StorageClass`. 

787 

788 Parameters 

789 ---------- 

790 storageClass : `str` or `StorageClass` 

791 The new storage class. 

792 

793 Returns 

794 ------- 

795 modified : `DatasetRef` 

796 A new dataset reference that is the same as the current one but 

797 with a different storage class in the `DatasetType`. 

798 """ 

799 return DatasetRef( 

800 datasetType=self.datasetType.overrideStorageClass(storageClass), 

801 dataId=self.dataId, 

802 id=self.id, 

803 run=self.run, 

804 conform=False, 

805 ) 

806 

807 datasetType: DatasetType 

808 """The definition of this dataset (`DatasetType`). 

809 

810 Cannot be changed after a `DatasetRef` is constructed. 

811 """ 

812 

813 dataId: DataCoordinate 

814 """A mapping of `Dimension` primary key values that labels the dataset 

815 within a Collection (`DataCoordinate`). 

816 

817 Cannot be changed after a `DatasetRef` is constructed. 

818 """ 

819 

820 run: Optional[str] 

821 """The name of the run that produced the dataset. 

822 

823 Cannot be changed after a `DatasetRef` is constructed; use `resolved` or 

824 `unresolved` to add or remove this information when creating a new 

825 `DatasetRef`. 

826 """ 

827 

828 id: Optional[DatasetId] 

829 """Primary key of the dataset (`DatasetId` or `None`). 

830 

831 Cannot be changed after a `DatasetRef` is constructed; use `resolved` or 

832 `unresolved` to add or remove this information when creating a new 

833 `DatasetRef`. 

834 """