Coverage for python/lsst/daf/butler/registry/interfaces/_datasets.py: 60%

112 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2022-08-30 02:26 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("DatasetRecordStorageManager", "DatasetRecordStorage", "DatasetIdFactory", "DatasetIdGenEnum") 

25 

26import enum 

27import uuid 

28from abc import ABC, abstractmethod 

29from typing import TYPE_CHECKING, Any, Iterable, Iterator, List, Optional, Tuple 

30 

31import sqlalchemy.sql 

32 

33from ...core import DataCoordinate, DatasetId, DatasetRef, DatasetType, SimpleQuery, Timespan, ddl 

34from ._versioning import VersionedExtension 

35 

36if TYPE_CHECKING: 36 ↛ 37line 36 didn't jump to line 37, because the condition on line 36 was never true

37 from ..summaries import CollectionSummary 

38 from ._collections import CollectionManager, CollectionRecord, RunRecord 

39 from ._database import Database, StaticTablesContext 

40 from ._dimensions import DimensionRecordStorageManager 

41 

42 

43class DatasetIdGenEnum(enum.Enum): 

44 """This enum is used to specify dataset ID generation options for 

45 ``insert()`` method. 

46 """ 

47 

48 UNIQUE = 0 

49 """Unique mode generates unique ID for each inserted dataset, e.g. 

50 auto-generated by database or random UUID. 

51 """ 

52 

53 DATAID_TYPE = 1 

54 """In this mode ID is computed deterministically from a combination of 

55 dataset type and dataId. 

56 """ 

57 

58 DATAID_TYPE_RUN = 2 

59 """In this mode ID is computed deterministically from a combination of 

60 dataset type, dataId, and run collection name. 

61 """ 

62 

63 

64class DatasetIdFactory: 

65 """Factory for dataset IDs (UUIDs). 

66 

67 For now the logic is hard-coded and is controlled by the user-provided 

68 value of `DatasetIdGenEnum`. In the future we may implement a configurable 

69 logic that can guess `DatasetIdGenEnum` value from other parameters. 

70 """ 

71 

72 NS_UUID = uuid.UUID("840b31d9-05cd-5161-b2c8-00d32b280d0f") 

73 """Namespace UUID used for UUID5 generation. Do not change. This was 

74 produced by `uuid.uuid5(uuid.NAMESPACE_DNS, "lsst.org")`. 

75 """ 

76 

77 def makeDatasetId( 

78 self, 

79 run: str, 

80 datasetType: DatasetType, 

81 dataId: DataCoordinate, 

82 idGenerationMode: DatasetIdGenEnum, 

83 ) -> uuid.UUID: 

84 """Generate dataset ID for a dataset. 

85 

86 Parameters 

87 ---------- 

88 run : `str` 

89 Name of the RUN collection for the dataset. 

90 datasetType : `DatasetType` 

91 Dataset type. 

92 dataId : `DataCoordinate` 

93 Expanded data ID for the dataset. 

94 idGenerationMode : `DatasetIdGenEnum` 

95 ID generation option. `~DatasetIdGenEnum.UNIQUE` makes a random 

96 UUID4-type ID. `~DatasetIdGenEnum.DATAID_TYPE` makes a 

97 deterministic UUID5-type ID based on a dataset type name and 

98 ``dataId``. `~DatasetIdGenEnum.DATAID_TYPE_RUN` makes a 

99 deterministic UUID5-type ID based on a dataset type name, run 

100 collection name, and ``dataId``. 

101 

102 Returns 

103 ------- 

104 datasetId : `uuid.UUID` 

105 Dataset identifier. 

106 """ 

107 if idGenerationMode is DatasetIdGenEnum.UNIQUE: 

108 return uuid.uuid4() 

109 else: 

110 # WARNING: If you modify this code make sure that the order of 

111 # items in the `items` list below never changes. 

112 items: List[Tuple[str, str]] = [] 

113 if idGenerationMode is DatasetIdGenEnum.DATAID_TYPE: 

114 items = [ 

115 ("dataset_type", datasetType.name), 

116 ] 

117 elif idGenerationMode is DatasetIdGenEnum.DATAID_TYPE_RUN: 

118 items = [ 

119 ("dataset_type", datasetType.name), 

120 ("run", run), 

121 ] 

122 else: 

123 raise ValueError(f"Unexpected ID generation mode: {idGenerationMode}") 

124 

125 for name, value in sorted(dataId.byName().items()): 

126 items.append((name, str(value))) 

127 data = ",".join(f"{key}={value}" for key, value in items) 

128 return uuid.uuid5(self.NS_UUID, data) 

129 

130 

131class DatasetRecordStorage(ABC): 

132 """An interface that manages the records associated with a particular 

133 `DatasetType`. 

134 

135 Parameters 

136 ---------- 

137 datasetType : `DatasetType` 

138 Dataset type whose records this object manages. 

139 """ 

140 

141 def __init__(self, datasetType: DatasetType): 

142 self.datasetType = datasetType 

143 

144 @abstractmethod 

145 def insert( 

146 self, 

147 run: RunRecord, 

148 dataIds: Iterable[DataCoordinate], 

149 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE, 

150 ) -> Iterator[DatasetRef]: 

151 """Insert one or more dataset entries into the database. 

152 

153 Parameters 

154 ---------- 

155 run : `RunRecord` 

156 The record object describing the `~CollectionType.RUN` collection 

157 this dataset will be associated with. 

158 dataIds : `Iterable` [ `DataCoordinate` ] 

159 Expanded data IDs (`DataCoordinate` instances) for the 

160 datasets to be added. The dimensions of all data IDs must be the 

161 same as ``self.datasetType.dimensions``. 

162 idMode : `DatasetIdGenEnum` 

163 With `UNIQUE` each new dataset is inserted with its new unique ID. 

164 With non-`UNIQUE` mode ID is computed from some combination of 

165 dataset type, dataId, and run collection name; if the same ID is 

166 already in the database then new record is not inserted. 

167 

168 Returns 

169 ------- 

170 datasets : `Iterable` [ `DatasetRef` ] 

171 References to the inserted datasets. 

172 """ 

173 raise NotImplementedError() 

174 

175 @abstractmethod 

176 def import_( 

177 self, 

178 run: RunRecord, 

179 datasets: Iterable[DatasetRef], 

180 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE, 

181 reuseIds: bool = False, 

182 ) -> Iterator[DatasetRef]: 

183 """Insert one or more dataset entries into the database. 

184 

185 Parameters 

186 ---------- 

187 run : `RunRecord` 

188 The record object describing the `~CollectionType.RUN` collection 

189 this dataset will be associated with. 

190 datasets : `~collections.abc.Iterable` of `DatasetRef` 

191 Datasets to be inserted. Datasets can specify ``id`` attribute 

192 which will be used for inserted datasets. All dataset IDs must 

193 have the same type (`int` or `uuid.UUID`), if type of dataset IDs 

194 does not match type supported by this class then IDs will be 

195 ignored and new IDs will be generated by backend. 

196 idGenerationMode : `DatasetIdGenEnum` 

197 With `UNIQUE` each new dataset is inserted with its new unique ID. 

198 With non-`UNIQUE` mode ID is computed from some combination of 

199 dataset type, dataId, and run collection name; if the same ID is 

200 already in the database then new record is not inserted. 

201 reuseIds : `bool`, optional 

202 If `True` then forces re-use of imported dataset IDs for integer 

203 IDs which are normally generated as auto-incremented; exception 

204 will be raised if imported IDs clash with existing ones. This 

205 option has no effect on the use of globally-unique IDs which are 

206 always re-used (or generated if integer IDs are being imported). 

207 

208 Returns 

209 ------- 

210 datasets : `Iterable` [ `DatasetRef` ] 

211 References to the inserted or existing datasets. 

212 

213 Notes 

214 ----- 

215 The ``datasetType`` and ``run`` attributes of datasets are supposed to 

216 be identical across all datasets but this is not checked and it should 

217 be enforced by higher level registry code. This method does not need 

218 to use those attributes from datasets, only ``dataId`` and ``id`` are 

219 relevant. 

220 """ 

221 raise NotImplementedError() 

222 

223 @abstractmethod 

224 def find( 

225 self, collection: CollectionRecord, dataId: DataCoordinate, timespan: Optional[Timespan] = None 

226 ) -> Optional[DatasetRef]: 

227 """Search a collection for a dataset with the given data ID. 

228 

229 Parameters 

230 ---------- 

231 collection : `CollectionRecord` 

232 The record object describing the collection to search for the 

233 dataset. May have any `CollectionType`. 

234 dataId: `DataCoordinate` 

235 Complete (but not necessarily expanded) data ID to search with, 

236 with ``dataId.graph == self.datasetType.dimensions``. 

237 timespan : `Timespan`, optional 

238 A timespan that the validity range of the dataset must overlap. 

239 Required if ``collection.type is CollectionType.CALIBRATION``, and 

240 ignored otherwise. 

241 

242 Returns 

243 ------- 

244 ref : `DatasetRef` 

245 A resolved `DatasetRef` (without components populated), or `None` 

246 if no matching dataset was found. 

247 """ 

248 raise NotImplementedError() 

249 

250 @abstractmethod 

251 def delete(self, datasets: Iterable[DatasetRef]) -> None: 

252 """Fully delete the given datasets from the registry. 

253 

254 Parameters 

255 ---------- 

256 datasets : `Iterable` [ `DatasetRef` ] 

257 Datasets to be deleted. All datasets must be resolved and have 

258 the same `DatasetType` as ``self``. 

259 

260 Raises 

261 ------ 

262 AmbiguousDatasetError 

263 Raised if any of the given `DatasetRef` instances is unresolved. 

264 """ 

265 raise NotImplementedError() 

266 

267 @abstractmethod 

268 def associate(self, collection: CollectionRecord, datasets: Iterable[DatasetRef]) -> None: 

269 """Associate one or more datasets with a collection. 

270 

271 Parameters 

272 ---------- 

273 collection : `CollectionRecord` 

274 The record object describing the collection. ``collection.type`` 

275 must be `~CollectionType.TAGGED`. 

276 datasets : `Iterable` [ `DatasetRef` ] 

277 Datasets to be associated. All datasets must be resolved and have 

278 the same `DatasetType` as ``self``. 

279 

280 Raises 

281 ------ 

282 AmbiguousDatasetError 

283 Raised if any of the given `DatasetRef` instances is unresolved. 

284 

285 Notes 

286 ----- 

287 Associating a dataset with into collection that already contains a 

288 different dataset with the same `DatasetType` and data ID will remove 

289 the existing dataset from that collection. 

290 

291 Associating the same dataset into a collection multiple times is a 

292 no-op, but is still not permitted on read-only databases. 

293 """ 

294 raise NotImplementedError() 

295 

296 @abstractmethod 

297 def disassociate(self, collection: CollectionRecord, datasets: Iterable[DatasetRef]) -> None: 

298 """Remove one or more datasets from a collection. 

299 

300 Parameters 

301 ---------- 

302 collection : `CollectionRecord` 

303 The record object describing the collection. ``collection.type`` 

304 must be `~CollectionType.TAGGED`. 

305 datasets : `Iterable` [ `DatasetRef` ] 

306 Datasets to be disassociated. All datasets must be resolved and 

307 have the same `DatasetType` as ``self``. 

308 

309 Raises 

310 ------ 

311 AmbiguousDatasetError 

312 Raised if any of the given `DatasetRef` instances is unresolved. 

313 """ 

314 raise NotImplementedError() 

315 

316 @abstractmethod 

317 def certify( 

318 self, collection: CollectionRecord, datasets: Iterable[DatasetRef], timespan: Timespan 

319 ) -> None: 

320 """Associate one or more datasets with a calibration collection and a 

321 validity range within it. 

322 

323 Parameters 

324 ---------- 

325 collection : `CollectionRecord` 

326 The record object describing the collection. ``collection.type`` 

327 must be `~CollectionType.CALIBRATION`. 

328 datasets : `Iterable` [ `DatasetRef` ] 

329 Datasets to be associated. All datasets must be resolved and have 

330 the same `DatasetType` as ``self``. 

331 timespan : `Timespan` 

332 The validity range for these datasets within the collection. 

333 

334 Raises 

335 ------ 

336 AmbiguousDatasetError 

337 Raised if any of the given `DatasetRef` instances is unresolved. 

338 ConflictingDefinitionError 

339 Raised if the collection already contains a different dataset with 

340 the same `DatasetType` and data ID and an overlapping validity 

341 range. 

342 CollectionTypeError 

343 Raised if 

344 ``collection.type is not CollectionType.CALIBRATION`` or if 

345 ``self.datasetType.isCalibration() is False``. 

346 """ 

347 raise NotImplementedError() 

348 

349 @abstractmethod 

350 def decertify( 

351 self, 

352 collection: CollectionRecord, 

353 timespan: Timespan, 

354 *, 

355 dataIds: Optional[Iterable[DataCoordinate]] = None, 

356 ) -> None: 

357 """Remove or adjust datasets to clear a validity range within a 

358 calibration collection. 

359 

360 Parameters 

361 ---------- 

362 collection : `CollectionRecord` 

363 The record object describing the collection. ``collection.type`` 

364 must be `~CollectionType.CALIBRATION`. 

365 timespan : `Timespan` 

366 The validity range to remove datasets from within the collection. 

367 Datasets that overlap this range but are not contained by it will 

368 have their validity ranges adjusted to not overlap it, which may 

369 split a single dataset validity range into two. 

370 dataIds : `Iterable` [ `DataCoordinate` ], optional 

371 Data IDs that should be decertified within the given validity range 

372 If `None`, all data IDs for ``self.datasetType`` will be 

373 decertified. 

374 

375 Raises 

376 ------ 

377 CollectionTypeError 

378 Raised if ``collection.type is not CollectionType.CALIBRATION``. 

379 """ 

380 raise NotImplementedError() 

381 

382 @abstractmethod 

383 def select( 

384 self, 

385 *collections: CollectionRecord, 

386 dataId: SimpleQuery.Select.Or[DataCoordinate] = SimpleQuery.Select, 

387 id: SimpleQuery.Select.Or[Optional[DatasetId]] = SimpleQuery.Select, 

388 run: SimpleQuery.Select.Or[None] = SimpleQuery.Select, 

389 timespan: SimpleQuery.Select.Or[Optional[Timespan]] = SimpleQuery.Select, 

390 ingestDate: SimpleQuery.Select.Or[Optional[Timespan]] = None, 

391 ) -> sqlalchemy.sql.Selectable: 

392 """Return a SQLAlchemy object that represents a ``SELECT`` query for 

393 this `DatasetType`. 

394 

395 All arguments can either be a value that constrains the query or 

396 the `SimpleQuery.Select` tag object to indicate that the value should 

397 be returned in the columns in the ``SELECT`` clause. The default is 

398 `SimpleQuery.Select`. 

399 

400 Parameters 

401 ---------- 

402 *collections : `CollectionRecord` 

403 The record object(s) describing the collection(s) to query. May 

404 not be of type `CollectionType.CHAINED`. If multiple collections 

405 are passed, the query will search all of them in an unspecified 

406 order, and all collections must have the same type. 

407 dataId : `DataCoordinate` or `Select` 

408 The data ID to restrict results with, or an instruction to return 

409 the data ID via columns with names 

410 ``self.datasetType.dimensions.names``. 

411 id : `DatasetId`, `Select` or None, 

412 The primary key value for the dataset, an instruction to return it 

413 via a ``id`` column, or `None` to ignore it entirely. 

414 run : `None` or `Select` 

415 If `Select` (default), include the dataset's run key value (as 

416 column labeled with the return value of 

417 ``CollectionManager.getRunForeignKeyName``). 

418 If `None`, do not include this column (to constrain the run, 

419 pass a `RunRecord` as the ``collection`` argument instead). 

420 timespan : `None`, `Select`, or `Timespan` 

421 If `Select` (default), include the validity range timespan in the 

422 result columns. If a `Timespan` instance, constrain the results to 

423 those whose validity ranges overlap that given timespan. Ignored 

424 for collection types other than `~CollectionType.CALIBRATION``, 

425 but `None` should be passed explicitly if a mix of 

426 `~CollectionType.CALIBRATION` and other types are passed in. 

427 ingestDate : `None`, `Select`, or `Timespan` 

428 If `Select` include the ingest timestamp in the result columns. 

429 If a `Timespan` instance, constrain the results to those whose 

430 ingest times which are inside given timespan and also include 

431 timestamp in the result columns. If `None` (default) then there is 

432 no constraint and timestamp is not returned. 

433 

434 Returns 

435 ------- 

436 query : `sqlalchemy.sql.Selectable` 

437 A SQLAlchemy object representing a simple ``SELECT`` query. 

438 """ 

439 raise NotImplementedError() 

440 

441 datasetType: DatasetType 

442 """Dataset type whose records this object manages (`DatasetType`). 

443 """ 

444 

445 

446class DatasetRecordStorageManager(VersionedExtension): 

447 """An interface that manages the tables that describe datasets. 

448 

449 `DatasetRecordStorageManager` primarily serves as a container and factory 

450 for `DatasetRecordStorage` instances, which each provide access to the 

451 records for a different `DatasetType`. 

452 """ 

453 

454 @classmethod 

455 @abstractmethod 

456 def initialize( 

457 cls, 

458 db: Database, 

459 context: StaticTablesContext, 

460 *, 

461 collections: CollectionManager, 

462 dimensions: DimensionRecordStorageManager, 

463 ) -> DatasetRecordStorageManager: 

464 """Construct an instance of the manager. 

465 

466 Parameters 

467 ---------- 

468 db : `Database` 

469 Interface to the underlying database engine and namespace. 

470 context : `StaticTablesContext` 

471 Context object obtained from `Database.declareStaticTables`; used 

472 to declare any tables that should always be present. 

473 collections: `CollectionManager` 

474 Manager object for the collections in this `Registry`. 

475 dimensions : `DimensionRecordStorageManager` 

476 Manager object for the dimensions in this `Registry`. 

477 

478 Returns 

479 ------- 

480 manager : `DatasetRecordStorageManager` 

481 An instance of a concrete `DatasetRecordStorageManager` subclass. 

482 """ 

483 raise NotImplementedError() 

484 

485 @classmethod 

486 @abstractmethod 

487 def getIdColumnType(cls) -> type: 

488 """Return type used for columns storing dataset IDs. 

489 

490 This type is used for columns storing `DatasetRef.id` values, usually 

491 a `type` subclass provided by SQLAlchemy. 

492 

493 Returns 

494 ------- 

495 dtype : `type` 

496 Type used for dataset identification in database. 

497 """ 

498 raise NotImplementedError() 

499 

500 @classmethod 

501 @abstractmethod 

502 def supportsIdGenerationMode(cls, mode: DatasetIdGenEnum) -> bool: 

503 """Test whether the given dataset ID generation mode is supported by 

504 `insert`. 

505 

506 Parameters 

507 ---------- 

508 mode : `DatasetIdGenEnum` 

509 Enum value for the mode to test. 

510 

511 Returns 

512 ------- 

513 supported : `bool` 

514 Whether the given mode is supported. 

515 """ 

516 raise NotImplementedError() 

517 

518 @classmethod 

519 @abstractmethod 

520 def addDatasetForeignKey( 

521 cls, 

522 tableSpec: ddl.TableSpec, 

523 *, 

524 name: str = "dataset", 

525 constraint: bool = True, 

526 onDelete: Optional[str] = None, 

527 **kwargs: Any, 

528 ) -> ddl.FieldSpec: 

529 """Add a foreign key (field and constraint) referencing the dataset 

530 table. 

531 

532 Parameters 

533 ---------- 

534 tableSpec : `ddl.TableSpec` 

535 Specification for the table that should reference the dataset 

536 table. Will be modified in place. 

537 name: `str`, optional 

538 A name to use for the prefix of the new field; the full name is 

539 ``{name}_id``. 

540 onDelete: `str`, optional 

541 One of "CASCADE" or "SET NULL", indicating what should happen to 

542 the referencing row if the collection row is deleted. `None` 

543 indicates that this should be an integrity error. 

544 constraint: `bool`, optional 

545 If `False` (`True` is default), add a field that can be joined to 

546 the dataset primary key, but do not add a foreign key constraint. 

547 **kwargs 

548 Additional keyword arguments are forwarded to the `ddl.FieldSpec` 

549 constructor (only the ``name`` and ``dtype`` arguments are 

550 otherwise provided). 

551 

552 Returns 

553 ------- 

554 idSpec : `ddl.FieldSpec` 

555 Specification for the ID field. 

556 """ 

557 raise NotImplementedError() 

558 

559 @abstractmethod 

560 def refresh(self) -> None: 

561 """Ensure all other operations on this manager are aware of any 

562 dataset types that may have been registered by other clients since 

563 it was initialized or last refreshed. 

564 """ 

565 raise NotImplementedError() 

566 

567 def __getitem__(self, name: str) -> DatasetRecordStorage: 

568 """Return the object that provides access to the records associated 

569 with the given `DatasetType` name. 

570 

571 This is simply a convenience wrapper for `find` that raises `KeyError` 

572 when the dataset type is not found. 

573 

574 Returns 

575 ------- 

576 records : `DatasetRecordStorage` 

577 The object representing the records for the given dataset type. 

578 

579 Raises 

580 ------ 

581 KeyError 

582 Raised if there is no dataset type with the given name. 

583 

584 Notes 

585 ----- 

586 Dataset types registered by another client of the same repository since 

587 the last call to `initialize` or `refresh` may not be found. 

588 """ 

589 result = self.find(name) 

590 if result is None: 

591 raise KeyError(f"Dataset type with name '{name}' not found.") 

592 return result 

593 

594 @abstractmethod 

595 def find(self, name: str) -> Optional[DatasetRecordStorage]: 

596 """Return an object that provides access to the records associated with 

597 the given `DatasetType` name, if one exists. 

598 

599 Parameters 

600 ---------- 

601 name : `str` 

602 Name of the dataset type. 

603 

604 Returns 

605 ------- 

606 records : `DatasetRecordStorage` or `None` 

607 The object representing the records for the given dataset type, or 

608 `None` if there are no records for that dataset type. 

609 

610 Notes 

611 ----- 

612 Dataset types registered by another client of the same repository since 

613 the last call to `initialize` or `refresh` may not be found. 

614 """ 

615 raise NotImplementedError() 

616 

617 @abstractmethod 

618 def register(self, datasetType: DatasetType) -> Tuple[DatasetRecordStorage, bool]: 

619 """Ensure that this `Registry` can hold records for the given 

620 `DatasetType`, creating new tables as necessary. 

621 

622 Parameters 

623 ---------- 

624 datasetType : `DatasetType` 

625 Dataset type for which a table should created (as necessary) and 

626 an associated `DatasetRecordStorage` returned. 

627 

628 Returns 

629 ------- 

630 records : `DatasetRecordStorage` 

631 The object representing the records for the given dataset type. 

632 inserted : `bool` 

633 `True` if the dataset type did not exist in the registry before. 

634 

635 Notes 

636 ----- 

637 This operation may not be invoked within a `Database.transaction` 

638 context. 

639 """ 

640 raise NotImplementedError() 

641 

642 @abstractmethod 

643 def remove(self, name: str) -> None: 

644 """Remove the dataset type. 

645 

646 Parameters 

647 ---------- 

648 name : `str` 

649 Name of the dataset type. 

650 """ 

651 raise NotImplementedError() 

652 

653 @abstractmethod 

654 def __iter__(self) -> Iterator[DatasetType]: 

655 """Return an iterator over the the dataset types present in this layer. 

656 

657 Notes 

658 ----- 

659 Dataset types registered by another client of the same layer since 

660 the last call to `initialize` or `refresh` may not be included. 

661 """ 

662 raise NotImplementedError() 

663 

664 @abstractmethod 

665 def getDatasetRef(self, id: DatasetId) -> Optional[DatasetRef]: 

666 """Return a `DatasetRef` for the given dataset primary key 

667 value. 

668 

669 Parameters 

670 ---------- 

671 id : `DatasetId` 

672 Primary key value for the dataset. 

673 

674 Returns 

675 ------- 

676 ref : `DatasetRef` or `None` 

677 Object representing the dataset, or `None` if no dataset with the 

678 given primary key values exists in this layer. 

679 """ 

680 raise NotImplementedError() 

681 

682 @abstractmethod 

683 def getCollectionSummary(self, collection: CollectionRecord) -> CollectionSummary: 

684 """Return a summary for the given collection. 

685 

686 Parameters 

687 ---------- 

688 collection : `CollectionRecord` 

689 Record describing the collection for which a summary is to be 

690 retrieved. 

691 

692 Returns 

693 ------- 

694 summary : `CollectionSummary` 

695 Summary of the dataset types and governor dimension values in 

696 this collection. 

697 """ 

698 raise NotImplementedError()