Coverage for python/lsst/daf/butler/core/datasets/ref.py: 31%

212 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-07-12 10:56 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = [ 

24 "AmbiguousDatasetError", 

25 "DatasetId", 

26 "DatasetIdFactory", 

27 "DatasetIdGenEnum", 

28 "DatasetRef", 

29 "SerializedDatasetRef", 

30] 

31 

32import enum 

33import sys 

34import uuid 

35from collections.abc import Iterable 

36from typing import TYPE_CHECKING, Any, ClassVar 

37 

38from lsst.utils.classes import immutable 

39 

40try: 

41 from pydantic.v1 import BaseModel, StrictStr, validator 

42except ModuleNotFoundError: 

43 from pydantic import BaseModel, StrictStr, validator # type: ignore 

44 

45from ..configSupport import LookupKey 

46from ..dimensions import DataCoordinate, DimensionGraph, DimensionUniverse, SerializedDataCoordinate 

47from ..json import from_json_pydantic, to_json_pydantic 

48from ..named import NamedKeyDict 

49from ..persistenceContext import PersistenceContextVars 

50from .type import DatasetType, SerializedDatasetType 

51 

52if TYPE_CHECKING: 

53 from ...registry import Registry 

54 from ..storageClass import StorageClass 

55 

56 

57class AmbiguousDatasetError(Exception): 

58 """Raised when a `DatasetRef` is not resolved but should be. 

59 

60 This happens when the `DatasetRef` has no ID or run but the requested 

61 operation requires one of them. 

62 """ 

63 

64 

65class DatasetIdGenEnum(enum.Enum): 

66 """Enum used to specify dataset ID generation options.""" 

67 

68 UNIQUE = 0 

69 """Unique mode generates unique ID for each inserted dataset, e.g. 

70 auto-generated by database or random UUID. 

71 """ 

72 

73 DATAID_TYPE = 1 

74 """In this mode ID is computed deterministically from a combination of 

75 dataset type and dataId. 

76 """ 

77 

78 DATAID_TYPE_RUN = 2 

79 """In this mode ID is computed deterministically from a combination of 

80 dataset type, dataId, and run collection name. 

81 """ 

82 

83 

84class DatasetIdFactory: 

85 """Factory for dataset IDs (UUIDs). 

86 

87 For now the logic is hard-coded and is controlled by the user-provided 

88 value of `DatasetIdGenEnum`. In the future we may implement a configurable 

89 logic that can guess `DatasetIdGenEnum` value from other parameters. 

90 """ 

91 

92 NS_UUID = uuid.UUID("840b31d9-05cd-5161-b2c8-00d32b280d0f") 

93 """Namespace UUID used for UUID5 generation. Do not change. This was 

94 produced by `uuid.uuid5(uuid.NAMESPACE_DNS, "lsst.org")`. 

95 """ 

96 

97 def makeDatasetId( 

98 self, 

99 run: str, 

100 datasetType: DatasetType, 

101 dataId: DataCoordinate, 

102 idGenerationMode: DatasetIdGenEnum, 

103 ) -> uuid.UUID: 

104 """Generate dataset ID for a dataset. 

105 

106 Parameters 

107 ---------- 

108 run : `str` 

109 Name of the RUN collection for the dataset. 

110 datasetType : `DatasetType` 

111 Dataset type. 

112 dataId : `DataCoordinate` 

113 Expanded data ID for the dataset. 

114 idGenerationMode : `DatasetIdGenEnum` 

115 ID generation option. `~DatasetIdGenEnum.UNIQUE` makes a random 

116 UUID4-type ID. `~DatasetIdGenEnum.DATAID_TYPE` makes a 

117 deterministic UUID5-type ID based on a dataset type name and 

118 ``dataId``. `~DatasetIdGenEnum.DATAID_TYPE_RUN` makes a 

119 deterministic UUID5-type ID based on a dataset type name, run 

120 collection name, and ``dataId``. 

121 

122 Returns 

123 ------- 

124 datasetId : `uuid.UUID` 

125 Dataset identifier. 

126 """ 

127 if idGenerationMode is DatasetIdGenEnum.UNIQUE: 

128 return uuid.uuid4() 

129 else: 

130 # WARNING: If you modify this code make sure that the order of 

131 # items in the `items` list below never changes. 

132 items: list[tuple[str, str]] = [] 

133 if idGenerationMode is DatasetIdGenEnum.DATAID_TYPE: 

134 items = [ 

135 ("dataset_type", datasetType.name), 

136 ] 

137 elif idGenerationMode is DatasetIdGenEnum.DATAID_TYPE_RUN: 

138 items = [ 

139 ("dataset_type", datasetType.name), 

140 ("run", run), 

141 ] 

142 else: 

143 raise ValueError(f"Unexpected ID generation mode: {idGenerationMode}") 

144 

145 for name, value in sorted(dataId.byName().items()): 

146 items.append((name, str(value))) 

147 data = ",".join(f"{key}={value}" for key, value in items) 

148 return uuid.uuid5(self.NS_UUID, data) 

149 

150 

151# This is constant, so don't recreate a set for each instance 

152_serializedDatasetRefFieldsSet = {"id", "datasetType", "dataId", "run", "component"} 

153 

154 

155class SerializedDatasetRef(BaseModel): 

156 """Simplified model of a `DatasetRef` suitable for serialization.""" 

157 

158 id: uuid.UUID 

159 datasetType: SerializedDatasetType | None = None 

160 dataId: SerializedDataCoordinate | None = None 

161 run: StrictStr | None = None 

162 component: StrictStr | None = None 

163 

164 @validator("dataId") 

165 def _check_dataId(cls, v: Any, values: dict[str, Any]) -> Any: # noqa: N805 

166 if (d := "datasetType") in values and values[d] is None: 

167 raise ValueError("Can not specify 'dataId' without specifying 'datasetType'") 

168 return v 

169 

170 @validator("run") 

171 def _check_run(cls, v: Any, values: dict[str, Any]) -> Any: # noqa: N805 

172 if v and (i := "id") in values and values[i] is None: 

173 raise ValueError("'run' cannot be provided unless 'id' is.") 

174 return v 

175 

176 @validator("component") 

177 def _check_component(cls, v: Any, values: dict[str, Any]) -> Any: # noqa: N805 

178 # Component should not be given if datasetType is given 

179 if v and (d := "datasetType") in values and values[d] is not None: 

180 raise ValueError(f"datasetType ({values[d]}) can not be set if component is given ({v}).") 

181 return v 

182 

183 @classmethod 

184 def direct( 

185 cls, 

186 *, 

187 id: str, 

188 run: str, 

189 datasetType: dict[str, Any] | None = None, 

190 dataId: dict[str, Any] | None = None, 

191 component: str | None = None, 

192 ) -> SerializedDatasetRef: 

193 """Construct a `SerializedDatasetRef` directly without validators. 

194 

195 Notes 

196 ----- 

197 This differs from the pydantic "construct" method in that the arguments 

198 are explicitly what the model requires, and it will recurse through 

199 members, constructing them from their corresponding `direct` methods. 

200 

201 The ``id`` parameter is a string representation of dataset ID, it is 

202 converted to UUID by this method. 

203 

204 This method should only be called when the inputs are trusted. 

205 """ 

206 node = SerializedDatasetRef.__new__(cls) 

207 setter = object.__setattr__ 

208 setter(node, "id", uuid.UUID(id)) 

209 setter( 

210 node, 

211 "datasetType", 

212 datasetType if datasetType is None else SerializedDatasetType.direct(**datasetType), 

213 ) 

214 setter(node, "dataId", dataId if dataId is None else SerializedDataCoordinate.direct(**dataId)) 

215 setter(node, "run", sys.intern(run)) 

216 setter(node, "component", component) 

217 setter(node, "__fields_set__", _serializedDatasetRefFieldsSet) 

218 return node 

219 

220 

221DatasetId = uuid.UUID 

222"""A type-annotation alias for dataset ID providing typing flexibility. 

223""" 

224 

225 

226@immutable 

227class DatasetRef: 

228 """Reference to a Dataset in a `Registry`. 

229 

230 A `DatasetRef` may point to a Dataset that currently does not yet exist 

231 (e.g., because it is a predicted input for provenance). 

232 

233 Parameters 

234 ---------- 

235 datasetType : `DatasetType` 

236 The `DatasetType` for this Dataset. 

237 dataId : `DataCoordinate` 

238 A mapping of dimensions that labels the Dataset within a Collection. 

239 run : `str` 

240 The name of the run this dataset was associated with when it was 

241 created. 

242 id : `DatasetId`, optional 

243 The unique identifier assigned when the dataset is created. If ``id`` 

244 is not specified, a new unique ID will be created. 

245 conform : `bool`, optional 

246 If `True` (default), call `DataCoordinate.standardize` to ensure that 

247 the data ID's dimensions are consistent with the dataset type's. 

248 `DatasetRef` instances for which those dimensions are not equal should 

249 not be created in new code, but are still supported for backwards 

250 compatibility. New code should only pass `False` if it can guarantee 

251 that the dimensions are already consistent. 

252 id_generation_mode : `DatasetIdGenEnum` 

253 ID generation option. `~DatasetIdGenEnum.UNIQUE` makes a random 

254 UUID4-type ID. `~DatasetIdGenEnum.DATAID_TYPE` makes a 

255 deterministic UUID5-type ID based on a dataset type name and 

256 ``dataId``. `~DatasetIdGenEnum.DATAID_TYPE_RUN` makes a 

257 deterministic UUID5-type ID based on a dataset type name, run 

258 collection name, and ``dataId``. 

259 

260 See Also 

261 -------- 

262 :ref:`daf_butler_organizing_datasets` 

263 """ 

264 

265 _serializedType = SerializedDatasetRef 

266 __slots__ = ( 

267 "_id", 

268 "datasetType", 

269 "dataId", 

270 "run", 

271 ) 

272 

273 def __init__( 

274 self, 

275 datasetType: DatasetType, 

276 dataId: DataCoordinate, 

277 run: str, 

278 *, 

279 id: DatasetId | None = None, 

280 conform: bool = True, 

281 id_generation_mode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE, 

282 ): 

283 self.datasetType = datasetType 

284 if conform: 

285 self.dataId = DataCoordinate.standardize(dataId, graph=datasetType.dimensions) 

286 else: 

287 self.dataId = dataId 

288 self.run = run 

289 if id is not None: 

290 self._id = id.int 

291 else: 

292 self._id = ( 

293 DatasetIdFactory() 

294 .makeDatasetId(self.run, self.datasetType, self.dataId, id_generation_mode) 

295 .int 

296 ) 

297 

298 @property 

299 def id(self) -> DatasetId: 

300 """Primary key of the dataset (`DatasetId`). 

301 

302 Cannot be changed after a `DatasetRef` is constructed. 

303 """ 

304 return uuid.UUID(int=self._id) 

305 

306 def __eq__(self, other: Any) -> bool: 

307 try: 

308 return (self.datasetType, self.dataId, self.id) == (other.datasetType, other.dataId, other.id) 

309 except AttributeError: 

310 return NotImplemented 

311 

312 def __hash__(self) -> int: 

313 return hash((self.datasetType, self.dataId, self.id)) 

314 

315 @property 

316 def dimensions(self) -> DimensionGraph: 

317 """Dimensions associated with the underlying `DatasetType`.""" 

318 return self.datasetType.dimensions 

319 

320 def __repr__(self) -> str: 

321 # We delegate to __str__ (i.e use "!s") for the data ID) below because 

322 # DataCoordinate's __repr__ - while adhering to the guidelines for 

323 # __repr__ - is much harder to users to read, while its __str__ just 

324 # produces a dict that can also be passed to DatasetRef's constructor. 

325 return f"DatasetRef({self.datasetType!r}, {self.dataId!s}, run={self.run!r}, id={self.id})" 

326 

327 def __str__(self) -> str: 

328 s = ( 

329 f"{self.datasetType.name}@{self.dataId!s} [sc={self.datasetType.storageClass_name}]" 

330 f" (run={self.run} id={self.id})" 

331 ) 

332 return s 

333 

334 def __lt__(self, other: Any) -> bool: 

335 # Sort by run, DatasetType name and then by DataCoordinate 

336 # The __str__ representation is probably close enough but we 

337 # need to ensure that sorting a DatasetRef matches what you would 

338 # get if you sorted DatasetType+DataCoordinate 

339 if not isinstance(other, type(self)): 

340 return NotImplemented 

341 

342 # Group by run if defined, takes precedence over DatasetType 

343 self_run = "" if self.run is None else self.run 

344 other_run = "" if other.run is None else other.run 

345 

346 # Compare tuples in the priority order 

347 return (self_run, self.datasetType, self.dataId) < (other_run, other.datasetType, other.dataId) 

348 

349 def to_simple(self, minimal: bool = False) -> SerializedDatasetRef: 

350 """Convert this class to a simple python type. 

351 

352 This makes it suitable for serialization. 

353 

354 Parameters 

355 ---------- 

356 minimal : `bool`, optional 

357 Use minimal serialization. Requires Registry to convert 

358 back to a full type. 

359 

360 Returns 

361 ------- 

362 simple : `dict` or `int` 

363 The object converted to a dictionary. 

364 """ 

365 if minimal: 

366 # The only thing needed to uniquely define a DatasetRef is its id 

367 # so that can be used directly if it is not a component DatasetRef. 

368 # Store is in a dict to allow us to easily add the planned origin 

369 # information later without having to support an int and dict in 

370 # simple form. 

371 simple: dict[str, Any] = {"id": self.id} 

372 if self.isComponent(): 

373 # We can still be a little minimalist with a component 

374 # but we will also need to record the datasetType component 

375 simple["component"] = self.datasetType.component() 

376 return SerializedDatasetRef(**simple) 

377 

378 return SerializedDatasetRef( 

379 datasetType=self.datasetType.to_simple(minimal=minimal), 

380 dataId=self.dataId.to_simple(), 

381 run=self.run, 

382 id=self.id, 

383 ) 

384 

385 @classmethod 

386 def from_simple( 

387 cls, 

388 simple: SerializedDatasetRef, 

389 universe: DimensionUniverse | None = None, 

390 registry: Registry | None = None, 

391 datasetType: DatasetType | None = None, 

392 ) -> DatasetRef: 

393 """Construct a new object from simplified form. 

394 

395 Generally this is data returned from the `to_simple` method. 

396 

397 Parameters 

398 ---------- 

399 simple : `dict` of [`str`, `Any`] 

400 The value returned by `to_simple()`. 

401 universe : `DimensionUniverse` 

402 The special graph of all known dimensions. 

403 Can be `None` if a registry is provided. 

404 registry : `lsst.daf.butler.Registry`, optional 

405 Registry to use to convert simple form of a DatasetRef to 

406 a full `DatasetRef`. Can be `None` if a full description of 

407 the type is provided along with a universe. 

408 datasetType : DatasetType, optional 

409 If datasetType is supplied, this will be used as the datasetType 

410 object in the resulting DatasetRef instead of being read from 

411 the `SerializedDatasetRef`. This is useful when many refs share 

412 the same type as memory can be saved. Defaults to None. 

413 

414 Returns 

415 ------- 

416 ref : `DatasetRef` 

417 Newly-constructed object. 

418 """ 

419 cache = PersistenceContextVars.datasetRefs.get() 

420 localName = sys.intern( 

421 datasetType.name 

422 if datasetType is not None 

423 else (x.name if (x := simple.datasetType) is not None else "") 

424 ) 

425 key = (simple.id.int, localName) 

426 if cache is not None and (cachedRef := cache.get(key, None)) is not None: 

427 return cachedRef 

428 # Minimalist component will just specify component and id and 

429 # require registry to reconstruct 

430 if not (simple.datasetType is not None or simple.dataId is not None or simple.run is not None): 

431 if registry is None: 

432 raise ValueError("Registry is required to construct component DatasetRef from integer id") 

433 if simple.id is None: 

434 raise ValueError("For minimal DatasetRef the ID must be defined.") 

435 ref = registry.getDataset(simple.id) 

436 if ref is None: 

437 raise RuntimeError(f"No matching dataset found in registry for id {simple.id}") 

438 if simple.component: 

439 ref = ref.makeComponentRef(simple.component) 

440 if cache is not None: 

441 cache[key] = ref 

442 return ref 

443 

444 if universe is None and registry is None: 

445 raise ValueError("One of universe or registry must be provided.") 

446 

447 if universe is None and registry is not None: 

448 universe = registry.dimensions 

449 

450 if universe is None: 

451 # this is for mypy 

452 raise ValueError("Unable to determine a usable universe") 

453 

454 if simple.datasetType is None and datasetType is None: 

455 # mypy 

456 raise ValueError("The DatasetType must be specified to construct a DatasetRef") 

457 if datasetType is None: 

458 if simple.datasetType is None: 

459 raise ValueError("Cannot determine Dataset type of this serialized class") 

460 datasetType = DatasetType.from_simple(simple.datasetType, universe=universe, registry=registry) 

461 

462 if simple.dataId is None: 

463 # mypy 

464 raise ValueError("The DataId must be specified to construct a DatasetRef") 

465 dataId = DataCoordinate.from_simple(simple.dataId, universe=universe) 

466 

467 # Check that simple ref is resolved. 

468 if simple.run is None: 

469 dstr = "" 

470 if simple.datasetType is None: 

471 dstr = f" (datasetType={datasetType.name!r})" 

472 raise ValueError( 

473 "Run collection name is missing from serialized representation. " 

474 f"Encountered with {simple!r}{dstr}." 

475 ) 

476 

477 newRef = cls(datasetType, dataId, id=simple.id, run=simple.run) 

478 if cache is not None: 

479 cache[key] = newRef 

480 return newRef 

481 

482 to_json = to_json_pydantic 

483 from_json: ClassVar = classmethod(from_json_pydantic) 

484 

485 @classmethod 

486 def _unpickle( 

487 cls, 

488 datasetType: DatasetType, 

489 dataId: DataCoordinate, 

490 id: DatasetId, 

491 run: str, 

492 ) -> DatasetRef: 

493 """Create new `DatasetRef`. 

494 

495 A custom factory method for use by `__reduce__` as a workaround for 

496 its lack of support for keyword arguments. 

497 """ 

498 return cls(datasetType, dataId, id=id, run=run) 

499 

500 def __reduce__(self) -> tuple: 

501 return (self._unpickle, (self.datasetType, self.dataId, self.id, self.run)) 

502 

503 def __deepcopy__(self, memo: dict) -> DatasetRef: 

504 # DatasetRef is recursively immutable; see note in @immutable 

505 # decorator. 

506 return self 

507 

508 def expanded(self, dataId: DataCoordinate) -> DatasetRef: 

509 """Return a new `DatasetRef` with the given expanded data ID. 

510 

511 Parameters 

512 ---------- 

513 dataId : `DataCoordinate` 

514 Data ID for the new `DatasetRef`. Must compare equal to the 

515 original data ID. 

516 

517 Returns 

518 ------- 

519 ref : `DatasetRef` 

520 A new `DatasetRef` with the given data ID. 

521 """ 

522 assert dataId == self.dataId 

523 return DatasetRef( 

524 datasetType=self.datasetType, dataId=dataId, id=self.id, run=self.run, conform=False 

525 ) 

526 

527 def isComponent(self) -> bool: 

528 """Indicate whether this `DatasetRef` refers to a component. 

529 

530 Returns 

531 ------- 

532 isComponent : `bool` 

533 `True` if this `DatasetRef` is a component, `False` otherwise. 

534 """ 

535 return self.datasetType.isComponent() 

536 

537 def isComposite(self) -> bool: 

538 """Boolean indicating whether this `DatasetRef` is a composite type. 

539 

540 Returns 

541 ------- 

542 isComposite : `bool` 

543 `True` if this `DatasetRef` is a composite type, `False` 

544 otherwise. 

545 """ 

546 return self.datasetType.isComposite() 

547 

548 def _lookupNames(self) -> tuple[LookupKey, ...]: 

549 """Name keys to use when looking up this DatasetRef in a configuration. 

550 

551 The names are returned in order of priority. 

552 

553 Returns 

554 ------- 

555 names : `tuple` of `LookupKey` 

556 Tuple of the `DatasetType` name and the `StorageClass` name. 

557 If ``instrument`` is defined in the dataId, each of those names 

558 is added to the start of the tuple with a key derived from the 

559 value of ``instrument``. 

560 """ 

561 # Special case the instrument Dimension since we allow configs 

562 # to include the instrument name in the hierarchy. 

563 names: tuple[LookupKey, ...] = self.datasetType._lookupNames() 

564 

565 if "instrument" in self.dataId: 

566 names = tuple(n.clone(dataId={"instrument": self.dataId["instrument"]}) for n in names) + names 

567 

568 return names 

569 

570 @staticmethod 

571 def groupByType(refs: Iterable[DatasetRef]) -> NamedKeyDict[DatasetType, list[DatasetRef]]: 

572 """Group an iterable of `DatasetRef` by `DatasetType`. 

573 

574 Parameters 

575 ---------- 

576 refs : `~collections.abc.Iterable` [ `DatasetRef` ] 

577 `DatasetRef` instances to group. 

578 

579 Returns 

580 ------- 

581 grouped : `NamedKeyDict` [ `DatasetType`, `list` [ `DatasetRef` ] ] 

582 Grouped `DatasetRef` instances. 

583 """ 

584 result: NamedKeyDict[DatasetType, list[DatasetRef]] = NamedKeyDict() 

585 for ref in refs: 

586 result.setdefault(ref.datasetType, []).append(ref) 

587 return result 

588 

589 def makeCompositeRef(self) -> DatasetRef: 

590 """Create a `DatasetRef` of the composite from a component ref. 

591 

592 Requires that this `DatasetRef` is a component. 

593 

594 Returns 

595 ------- 

596 ref : `DatasetRef` 

597 A `DatasetRef` with a dataset type that corresponds to the 

598 composite parent of this component, and the same ID and run 

599 (which may be `None`, if they are `None` in ``self``). 

600 """ 

601 # Assume that the data ID does not need to be standardized 

602 # and should match whatever this ref already has. 

603 return DatasetRef( 

604 self.datasetType.makeCompositeDatasetType(), self.dataId, id=self.id, run=self.run, conform=False 

605 ) 

606 

607 def makeComponentRef(self, name: str) -> DatasetRef: 

608 """Create a `DatasetRef` that corresponds to a component. 

609 

610 Parameters 

611 ---------- 

612 name : `str` 

613 Name of the component. 

614 

615 Returns 

616 ------- 

617 ref : `DatasetRef` 

618 A `DatasetRef` with a dataset type that corresponds to the given 

619 component, and the same ID and run 

620 (which may be `None`, if they are `None` in ``self``). 

621 """ 

622 # Assume that the data ID does not need to be standardized 

623 # and should match whatever this ref already has. 

624 return DatasetRef( 

625 self.datasetType.makeComponentDatasetType(name), 

626 self.dataId, 

627 id=self.id, 

628 run=self.run, 

629 conform=False, 

630 ) 

631 

632 def overrideStorageClass(self, storageClass: str | StorageClass) -> DatasetRef: 

633 """Create a new `DatasetRef` from this one, but with a modified 

634 `DatasetType` that has a different `StorageClass`. 

635 

636 Parameters 

637 ---------- 

638 storageClass : `str` or `StorageClass` 

639 The new storage class. 

640 

641 Returns 

642 ------- 

643 modified : `DatasetRef` 

644 A new dataset reference that is the same as the current one but 

645 with a different storage class in the `DatasetType`. 

646 """ 

647 return DatasetRef( 

648 datasetType=self.datasetType.overrideStorageClass(storageClass), 

649 dataId=self.dataId, 

650 id=self.id, 

651 run=self.run, 

652 conform=False, 

653 ) 

654 

655 def is_compatible_with(self, ref: DatasetRef) -> bool: 

656 """Determine if the given `DatasetRef` is compatible with this one. 

657 

658 Parameters 

659 ---------- 

660 other : `DatasetRef` 

661 Dataset ref to check. 

662 

663 Returns 

664 ------- 

665 is_compatible : `bool` 

666 Returns `True` if the other dataset ref is either the same as this 

667 or the dataset type associated with the other is compatible with 

668 this one and the dataId and dataset ID match. 

669 

670 Notes 

671 ----- 

672 Compatibility requires that the dataId and dataset ID match and the 

673 `DatasetType` is compatible. Compatibility is defined as the storage 

674 class associated with the dataset type of the other ref can be 

675 converted to this storage class. 

676 

677 Specifically this means that if you have done: 

678 

679 .. code-block:: py 

680 

681 new_ref = ref.overrideStorageClass(sc) 

682 

683 and this is successful, then the guarantee is that: 

684 

685 .. code-block:: py 

686 

687 assert ref.is_compatible_with(new_ref) is True 

688 

689 since we know that the python type associated with the new ref can 

690 be converted to the original python type. The reverse is not guaranteed 

691 and depends on whether bidirectional converters have been registered. 

692 """ 

693 if self.id != ref.id: 

694 return False 

695 if self.dataId != ref.dataId: 

696 return False 

697 if self.run != ref.run: 

698 return False 

699 return self.datasetType.is_compatible_with(ref.datasetType) 

700 

701 datasetType: DatasetType 

702 """The definition of this dataset (`DatasetType`). 

703 

704 Cannot be changed after a `DatasetRef` is constructed. 

705 """ 

706 

707 dataId: DataCoordinate 

708 """A mapping of `Dimension` primary key values that labels the dataset 

709 within a Collection (`DataCoordinate`). 

710 

711 Cannot be changed after a `DatasetRef` is constructed. 

712 """ 

713 

714 run: str 

715 """The name of the run that produced the dataset. 

716 

717 Cannot be changed after a `DatasetRef` is constructed. 

718 """