Coverage for python/lsst/daf/butler/registry/interfaces/_datasets.py: 67%

97 statements  

« prev     ^ index     » next       coverage.py v7.2.3, created at 2023-04-19 03:42 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("DatasetRecordStorageManager", "DatasetRecordStorage", "DatasetIdFactory", "DatasetIdGenEnum") 

25 

26import enum 

27import uuid 

28from abc import ABC, abstractmethod 

29from collections.abc import Iterable, Iterator, Set 

30from typing import TYPE_CHECKING, Any 

31 

32from lsst.daf.relation import Relation 

33 

34from ...core import DataCoordinate, DatasetId, DatasetRef, DatasetType, Timespan, ddl 

35from .._exceptions import MissingDatasetTypeError 

36from ._versioning import VersionedExtension, VersionTuple 

37 

38if TYPE_CHECKING: 

39 from .._collection_summary import CollectionSummary 

40 from ..queries import SqlQueryContext 

41 from ._collections import CollectionManager, CollectionRecord, RunRecord 

42 from ._database import Database, StaticTablesContext 

43 from ._dimensions import DimensionRecordStorageManager 

44 

45 

46class DatasetIdGenEnum(enum.Enum): 

47 """This enum is used to specify dataset ID generation options for 

48 ``insert()`` method. 

49 """ 

50 

51 UNIQUE = 0 

52 """Unique mode generates unique ID for each inserted dataset, e.g. 

53 auto-generated by database or random UUID. 

54 """ 

55 

56 DATAID_TYPE = 1 

57 """In this mode ID is computed deterministically from a combination of 

58 dataset type and dataId. 

59 """ 

60 

61 DATAID_TYPE_RUN = 2 

62 """In this mode ID is computed deterministically from a combination of 

63 dataset type, dataId, and run collection name. 

64 """ 

65 

66 

67class DatasetIdFactory: 

68 """Factory for dataset IDs (UUIDs). 

69 

70 For now the logic is hard-coded and is controlled by the user-provided 

71 value of `DatasetIdGenEnum`. In the future we may implement a configurable 

72 logic that can guess `DatasetIdGenEnum` value from other parameters. 

73 """ 

74 

75 NS_UUID = uuid.UUID("840b31d9-05cd-5161-b2c8-00d32b280d0f") 

76 """Namespace UUID used for UUID5 generation. Do not change. This was 

77 produced by `uuid.uuid5(uuid.NAMESPACE_DNS, "lsst.org")`. 

78 """ 

79 

80 def makeDatasetId( 

81 self, 

82 run: str, 

83 datasetType: DatasetType, 

84 dataId: DataCoordinate, 

85 idGenerationMode: DatasetIdGenEnum, 

86 ) -> uuid.UUID: 

87 """Generate dataset ID for a dataset. 

88 

89 Parameters 

90 ---------- 

91 run : `str` 

92 Name of the RUN collection for the dataset. 

93 datasetType : `DatasetType` 

94 Dataset type. 

95 dataId : `DataCoordinate` 

96 Expanded data ID for the dataset. 

97 idGenerationMode : `DatasetIdGenEnum` 

98 ID generation option. `~DatasetIdGenEnum.UNIQUE` makes a random 

99 UUID4-type ID. `~DatasetIdGenEnum.DATAID_TYPE` makes a 

100 deterministic UUID5-type ID based on a dataset type name and 

101 ``dataId``. `~DatasetIdGenEnum.DATAID_TYPE_RUN` makes a 

102 deterministic UUID5-type ID based on a dataset type name, run 

103 collection name, and ``dataId``. 

104 

105 Returns 

106 ------- 

107 datasetId : `uuid.UUID` 

108 Dataset identifier. 

109 """ 

110 if idGenerationMode is DatasetIdGenEnum.UNIQUE: 

111 return uuid.uuid4() 

112 else: 

113 # WARNING: If you modify this code make sure that the order of 

114 # items in the `items` list below never changes. 

115 items: list[tuple[str, str]] = [] 

116 if idGenerationMode is DatasetIdGenEnum.DATAID_TYPE: 

117 items = [ 

118 ("dataset_type", datasetType.name), 

119 ] 

120 elif idGenerationMode is DatasetIdGenEnum.DATAID_TYPE_RUN: 

121 items = [ 

122 ("dataset_type", datasetType.name), 

123 ("run", run), 

124 ] 

125 else: 

126 raise ValueError(f"Unexpected ID generation mode: {idGenerationMode}") 

127 

128 for name, value in sorted(dataId.byName().items()): 

129 items.append((name, str(value))) 

130 data = ",".join(f"{key}={value}" for key, value in items) 

131 return uuid.uuid5(self.NS_UUID, data) 

132 

133 def resolveRef( 

134 self, 

135 ref: DatasetRef, 

136 run: str, 

137 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE, 

138 ) -> DatasetRef: 

139 """Generate resolved dataset reference for predicted datasets. 

140 

141 Parameters 

142 ---------- 

143 ref : `DatasetRef` 

144 Dataset ref, can be already resolved. 

145 run : `str` 

146 Name of the RUN collection for the dataset. 

147 idGenerationMode : `DatasetIdGenEnum` 

148 ID generation option. `~DatasetIdGenEnum.UNIQUE` makes a random 

149 UUID4-type ID. `~DatasetIdGenEnum.DATAID_TYPE` makes a 

150 deterministic UUID5-type ID based on a dataset type name and 

151 ``dataId``. `~DatasetIdGenEnum.DATAID_TYPE_RUN` makes a 

152 deterministic UUID5-type ID based on a dataset type name, run 

153 collection name, and ``dataId``. 

154 

155 Returns 

156 ------- 

157 resolved : `DatasetRef` 

158 Resolved dataset ref, if input reference is already resolved it 

159 is returned without modification. 

160 

161 Notes 

162 ----- 

163 This method can only be used for predicted dataset references that do 

164 not exist yet in the database. It does not resolve existing dataset 

165 references already stored in registry. 

166 """ 

167 if ref.id is not None: 

168 return ref 

169 datasetId = self.makeDatasetId(run, ref.datasetType, ref.dataId, idGenerationMode) 

170 resolved = ref.resolved(datasetId, run) 

171 return resolved 

172 

173 

174class DatasetRecordStorage(ABC): 

175 """An interface that manages the records associated with a particular 

176 `DatasetType`. 

177 

178 Parameters 

179 ---------- 

180 datasetType : `DatasetType` 

181 Dataset type whose records this object manages. 

182 """ 

183 

184 def __init__(self, datasetType: DatasetType): 

185 self.datasetType = datasetType 

186 

187 @abstractmethod 

188 def insert( 

189 self, 

190 run: RunRecord, 

191 dataIds: Iterable[DataCoordinate], 

192 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE, 

193 ) -> Iterator[DatasetRef]: 

194 """Insert one or more dataset entries into the database. 

195 

196 Parameters 

197 ---------- 

198 run : `RunRecord` 

199 The record object describing the `~CollectionType.RUN` collection 

200 this dataset will be associated with. 

201 dataIds : `Iterable` [ `DataCoordinate` ] 

202 Expanded data IDs (`DataCoordinate` instances) for the 

203 datasets to be added. The dimensions of all data IDs must be the 

204 same as ``self.datasetType.dimensions``. 

205 idMode : `DatasetIdGenEnum` 

206 With `UNIQUE` each new dataset is inserted with its new unique ID. 

207 With non-`UNIQUE` mode ID is computed from some combination of 

208 dataset type, dataId, and run collection name; if the same ID is 

209 already in the database then new record is not inserted. 

210 

211 Returns 

212 ------- 

213 datasets : `Iterable` [ `DatasetRef` ] 

214 References to the inserted datasets. 

215 """ 

216 raise NotImplementedError() 

217 

218 @abstractmethod 

219 def import_( 

220 self, 

221 run: RunRecord, 

222 datasets: Iterable[DatasetRef], 

223 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE, 

224 reuseIds: bool = False, 

225 ) -> Iterator[DatasetRef]: 

226 """Insert one or more dataset entries into the database. 

227 

228 Parameters 

229 ---------- 

230 run : `RunRecord` 

231 The record object describing the `~CollectionType.RUN` collection 

232 this dataset will be associated with. 

233 datasets : `~collections.abc.Iterable` of `DatasetRef` 

234 Datasets to be inserted. Datasets can specify ``id`` attribute 

235 which will be used for inserted datasets. All dataset IDs must 

236 have the same type (`int` or `uuid.UUID`), if type of dataset IDs 

237 does not match type supported by this class then IDs will be 

238 ignored and new IDs will be generated by backend. 

239 idGenerationMode : `DatasetIdGenEnum` 

240 With `UNIQUE` each new dataset is inserted with its new unique ID. 

241 With non-`UNIQUE` mode ID is computed from some combination of 

242 dataset type, dataId, and run collection name; if the same ID is 

243 already in the database then new record is not inserted. 

244 reuseIds : `bool`, optional 

245 If `True` then forces re-use of imported dataset IDs for integer 

246 IDs which are normally generated as auto-incremented; exception 

247 will be raised if imported IDs clash with existing ones. This 

248 option has no effect on the use of globally-unique IDs which are 

249 always re-used (or generated if integer IDs are being imported). 

250 

251 Returns 

252 ------- 

253 datasets : `Iterable` [ `DatasetRef` ] 

254 References to the inserted or existing datasets. 

255 

256 Notes 

257 ----- 

258 The ``datasetType`` and ``run`` attributes of datasets are supposed to 

259 be identical across all datasets but this is not checked and it should 

260 be enforced by higher level registry code. This method does not need 

261 to use those attributes from datasets, only ``dataId`` and ``id`` are 

262 relevant. 

263 """ 

264 raise NotImplementedError() 

265 

266 @abstractmethod 

267 def delete(self, datasets: Iterable[DatasetRef]) -> None: 

268 """Fully delete the given datasets from the registry. 

269 

270 Parameters 

271 ---------- 

272 datasets : `Iterable` [ `DatasetRef` ] 

273 Datasets to be deleted. All datasets must be resolved and have 

274 the same `DatasetType` as ``self``. 

275 

276 Raises 

277 ------ 

278 AmbiguousDatasetError 

279 Raised if any of the given `DatasetRef` instances is unresolved. 

280 """ 

281 raise NotImplementedError() 

282 

283 @abstractmethod 

284 def associate(self, collection: CollectionRecord, datasets: Iterable[DatasetRef]) -> None: 

285 """Associate one or more datasets with a collection. 

286 

287 Parameters 

288 ---------- 

289 collection : `CollectionRecord` 

290 The record object describing the collection. ``collection.type`` 

291 must be `~CollectionType.TAGGED`. 

292 datasets : `Iterable` [ `DatasetRef` ] 

293 Datasets to be associated. All datasets must be resolved and have 

294 the same `DatasetType` as ``self``. 

295 

296 Raises 

297 ------ 

298 AmbiguousDatasetError 

299 Raised if any of the given `DatasetRef` instances is unresolved. 

300 

301 Notes 

302 ----- 

303 Associating a dataset with into collection that already contains a 

304 different dataset with the same `DatasetType` and data ID will remove 

305 the existing dataset from that collection. 

306 

307 Associating the same dataset into a collection multiple times is a 

308 no-op, but is still not permitted on read-only databases. 

309 """ 

310 raise NotImplementedError() 

311 

312 @abstractmethod 

313 def disassociate(self, collection: CollectionRecord, datasets: Iterable[DatasetRef]) -> None: 

314 """Remove one or more datasets from a collection. 

315 

316 Parameters 

317 ---------- 

318 collection : `CollectionRecord` 

319 The record object describing the collection. ``collection.type`` 

320 must be `~CollectionType.TAGGED`. 

321 datasets : `Iterable` [ `DatasetRef` ] 

322 Datasets to be disassociated. All datasets must be resolved and 

323 have the same `DatasetType` as ``self``. 

324 

325 Raises 

326 ------ 

327 AmbiguousDatasetError 

328 Raised if any of the given `DatasetRef` instances is unresolved. 

329 """ 

330 raise NotImplementedError() 

331 

332 @abstractmethod 

333 def certify( 

334 self, 

335 collection: CollectionRecord, 

336 datasets: Iterable[DatasetRef], 

337 timespan: Timespan, 

338 context: SqlQueryContext, 

339 ) -> None: 

340 """Associate one or more datasets with a calibration collection and a 

341 validity range within it. 

342 

343 Parameters 

344 ---------- 

345 collection : `CollectionRecord` 

346 The record object describing the collection. ``collection.type`` 

347 must be `~CollectionType.CALIBRATION`. 

348 datasets : `Iterable` [ `DatasetRef` ] 

349 Datasets to be associated. All datasets must be resolved and have 

350 the same `DatasetType` as ``self``. 

351 timespan : `Timespan` 

352 The validity range for these datasets within the collection. 

353 

354 Raises 

355 ------ 

356 AmbiguousDatasetError 

357 Raised if any of the given `DatasetRef` instances is unresolved. 

358 ConflictingDefinitionError 

359 Raised if the collection already contains a different dataset with 

360 the same `DatasetType` and data ID and an overlapping validity 

361 range. 

362 CollectionTypeError 

363 Raised if 

364 ``collection.type is not CollectionType.CALIBRATION`` or if 

365 ``self.datasetType.isCalibration() is False``. 

366 """ 

367 raise NotImplementedError() 

368 

369 @abstractmethod 

370 def decertify( 

371 self, 

372 collection: CollectionRecord, 

373 timespan: Timespan, 

374 *, 

375 dataIds: Iterable[DataCoordinate] | None = None, 

376 context: SqlQueryContext, 

377 ) -> None: 

378 """Remove or adjust datasets to clear a validity range within a 

379 calibration collection. 

380 

381 Parameters 

382 ---------- 

383 collection : `CollectionRecord` 

384 The record object describing the collection. ``collection.type`` 

385 must be `~CollectionType.CALIBRATION`. 

386 timespan : `Timespan` 

387 The validity range to remove datasets from within the collection. 

388 Datasets that overlap this range but are not contained by it will 

389 have their validity ranges adjusted to not overlap it, which may 

390 split a single dataset validity range into two. 

391 dataIds : `Iterable` [ `DataCoordinate` ], optional 

392 Data IDs that should be decertified within the given validity range 

393 If `None`, all data IDs for ``self.datasetType`` will be 

394 decertified. 

395 

396 Raises 

397 ------ 

398 CollectionTypeError 

399 Raised if ``collection.type is not CollectionType.CALIBRATION``. 

400 """ 

401 raise NotImplementedError() 

402 

403 @abstractmethod 

404 def make_relation( 

405 self, 

406 *collections: CollectionRecord, 

407 columns: Set[str], 

408 context: SqlQueryContext, 

409 ) -> Relation: 

410 """Return a `sql.Relation` that represents a query for for this 

411 `DatasetType` in one or more collections. 

412 

413 Parameters 

414 ---------- 

415 *collections : `CollectionRecord` 

416 The record object(s) describing the collection(s) to query. May 

417 not be of type `CollectionType.CHAINED`. If multiple collections 

418 are passed, the query will search all of them in an unspecified 

419 order, and all collections must have the same type. Must include 

420 at least one collection. 

421 columns : `~collections.abc.Set` [ `str` ] 

422 Columns to include in the relation. See `Query.find_datasets` for 

423 most options, but this method supports one more: 

424 

425 - ``rank``: a calculated integer column holding the index of the 

426 collection the dataset was found in, within the ``collections`` 

427 sequence given. 

428 context : `SqlQueryContext` 

429 The object that manages database connections, temporary tables and 

430 relation engines for this query. 

431 

432 Returns 

433 ------- 

434 relation : `~lsst.daf.relation.Relation` 

435 Representation of the query. 

436 """ 

437 raise NotImplementedError() 

438 

439 datasetType: DatasetType 

440 """Dataset type whose records this object manages (`DatasetType`). 

441 """ 

442 

443 

444class DatasetRecordStorageManager(VersionedExtension): 

445 """An interface that manages the tables that describe datasets. 

446 

447 `DatasetRecordStorageManager` primarily serves as a container and factory 

448 for `DatasetRecordStorage` instances, which each provide access to the 

449 records for a different `DatasetType`. 

450 """ 

451 

452 def __init__(self, *, registry_schema_version: VersionTuple | None = None) -> None: 

453 super().__init__(registry_schema_version=registry_schema_version) 

454 

455 @classmethod 

456 @abstractmethod 

457 def initialize( 

458 cls, 

459 db: Database, 

460 context: StaticTablesContext, 

461 *, 

462 collections: CollectionManager, 

463 dimensions: DimensionRecordStorageManager, 

464 registry_schema_version: VersionTuple | None = None, 

465 ) -> DatasetRecordStorageManager: 

466 """Construct an instance of the manager. 

467 

468 Parameters 

469 ---------- 

470 db : `Database` 

471 Interface to the underlying database engine and namespace. 

472 context : `StaticTablesContext` 

473 Context object obtained from `Database.declareStaticTables`; used 

474 to declare any tables that should always be present. 

475 collections: `CollectionManager` 

476 Manager object for the collections in this `Registry`. 

477 dimensions : `DimensionRecordStorageManager` 

478 Manager object for the dimensions in this `Registry`. 

479 registry_schema_version : `VersionTuple` or `None` 

480 Schema version of this extension as defined in registry. 

481 

482 Returns 

483 ------- 

484 manager : `DatasetRecordStorageManager` 

485 An instance of a concrete `DatasetRecordStorageManager` subclass. 

486 """ 

487 raise NotImplementedError() 

488 

489 @classmethod 

490 @abstractmethod 

491 def getIdColumnType(cls) -> type: 

492 """Return type used for columns storing dataset IDs. 

493 

494 This type is used for columns storing `DatasetRef.id` values, usually 

495 a `type` subclass provided by SQLAlchemy. 

496 

497 Returns 

498 ------- 

499 dtype : `type` 

500 Type used for dataset identification in database. 

501 """ 

502 raise NotImplementedError() 

503 

504 @classmethod 

505 @abstractmethod 

506 def supportsIdGenerationMode(cls, mode: DatasetIdGenEnum) -> bool: 

507 """Test whether the given dataset ID generation mode is supported by 

508 `insert`. 

509 

510 Parameters 

511 ---------- 

512 mode : `DatasetIdGenEnum` 

513 Enum value for the mode to test. 

514 

515 Returns 

516 ------- 

517 supported : `bool` 

518 Whether the given mode is supported. 

519 """ 

520 raise NotImplementedError() 

521 

522 @classmethod 

523 @abstractmethod 

524 def addDatasetForeignKey( 

525 cls, 

526 tableSpec: ddl.TableSpec, 

527 *, 

528 name: str = "dataset", 

529 constraint: bool = True, 

530 onDelete: str | None = None, 

531 **kwargs: Any, 

532 ) -> ddl.FieldSpec: 

533 """Add a foreign key (field and constraint) referencing the dataset 

534 table. 

535 

536 Parameters 

537 ---------- 

538 tableSpec : `ddl.TableSpec` 

539 Specification for the table that should reference the dataset 

540 table. Will be modified in place. 

541 name: `str`, optional 

542 A name to use for the prefix of the new field; the full name is 

543 ``{name}_id``. 

544 onDelete: `str`, optional 

545 One of "CASCADE" or "SET NULL", indicating what should happen to 

546 the referencing row if the collection row is deleted. `None` 

547 indicates that this should be an integrity error. 

548 constraint: `bool`, optional 

549 If `False` (`True` is default), add a field that can be joined to 

550 the dataset primary key, but do not add a foreign key constraint. 

551 **kwargs 

552 Additional keyword arguments are forwarded to the `ddl.FieldSpec` 

553 constructor (only the ``name`` and ``dtype`` arguments are 

554 otherwise provided). 

555 

556 Returns 

557 ------- 

558 idSpec : `ddl.FieldSpec` 

559 Specification for the ID field. 

560 """ 

561 raise NotImplementedError() 

562 

563 @abstractmethod 

564 def refresh(self) -> None: 

565 """Ensure all other operations on this manager are aware of any 

566 dataset types that may have been registered by other clients since 

567 it was initialized or last refreshed. 

568 """ 

569 raise NotImplementedError() 

570 

571 def __getitem__(self, name: str) -> DatasetRecordStorage: 

572 """Return the object that provides access to the records associated 

573 with the given `DatasetType` name. 

574 

575 This is simply a convenience wrapper for `find` that raises `KeyError` 

576 when the dataset type is not found. 

577 

578 Returns 

579 ------- 

580 records : `DatasetRecordStorage` 

581 The object representing the records for the given dataset type. 

582 

583 Raises 

584 ------ 

585 KeyError 

586 Raised if there is no dataset type with the given name. 

587 

588 Notes 

589 ----- 

590 Dataset types registered by another client of the same repository since 

591 the last call to `initialize` or `refresh` may not be found. 

592 """ 

593 result = self.find(name) 

594 if result is None: 

595 raise MissingDatasetTypeError(f"Dataset type with name '{name}' not found.") 

596 return result 

597 

598 @abstractmethod 

599 def find(self, name: str) -> DatasetRecordStorage | None: 

600 """Return an object that provides access to the records associated with 

601 the given `DatasetType` name, if one exists. 

602 

603 Parameters 

604 ---------- 

605 name : `str` 

606 Name of the dataset type. 

607 

608 Returns 

609 ------- 

610 records : `DatasetRecordStorage` or `None` 

611 The object representing the records for the given dataset type, or 

612 `None` if there are no records for that dataset type. 

613 

614 Notes 

615 ----- 

616 Dataset types registered by another client of the same repository since 

617 the last call to `initialize` or `refresh` may not be found. 

618 """ 

619 raise NotImplementedError() 

620 

621 @abstractmethod 

622 def register(self, datasetType: DatasetType) -> tuple[DatasetRecordStorage, bool]: 

623 """Ensure that this `Registry` can hold records for the given 

624 `DatasetType`, creating new tables as necessary. 

625 

626 Parameters 

627 ---------- 

628 datasetType : `DatasetType` 

629 Dataset type for which a table should created (as necessary) and 

630 an associated `DatasetRecordStorage` returned. 

631 

632 Returns 

633 ------- 

634 records : `DatasetRecordStorage` 

635 The object representing the records for the given dataset type. 

636 inserted : `bool` 

637 `True` if the dataset type did not exist in the registry before. 

638 

639 Notes 

640 ----- 

641 This operation may not be invoked within a `Database.transaction` 

642 context. 

643 """ 

644 raise NotImplementedError() 

645 

646 @abstractmethod 

647 def remove(self, name: str) -> None: 

648 """Remove the dataset type. 

649 

650 Parameters 

651 ---------- 

652 name : `str` 

653 Name of the dataset type. 

654 """ 

655 raise NotImplementedError() 

656 

657 @abstractmethod 

658 def resolve_wildcard( 

659 self, 

660 expression: Any, 

661 components: bool | None = None, 

662 missing: list[str] | None = None, 

663 explicit_only: bool = False, 

664 components_deprecated: bool = True, 

665 ) -> dict[DatasetType, list[str | None]]: 

666 """Resolve a dataset type wildcard expression. 

667 

668 Parameters 

669 ---------- 

670 expression 

671 Expression to resolve. Will be passed to 

672 `DatasetTypeWildcard.from_expression`. 

673 components : `bool`, optional 

674 If `True`, apply all expression patterns to component dataset type 

675 names as well. If `False`, never apply patterns to components. If 

676 `None` (default), apply patterns to components only if their parent 

677 datasets were not matched by the expression. Fully-specified 

678 component datasets (`str` or `DatasetType` instances) are always 

679 included. 

680 missing : `list` of `str`, optional 

681 String dataset type names that were explicitly given (i.e. not 

682 regular expression patterns) but not found will be appended to this 

683 list, if it is provided. 

684 explicit_only : `bool`, optional 

685 If `True`, require explicit `DatasetType` instances or `str` names, 

686 with `re.Pattern` instances deprecated and ``...`` prohibited. 

687 components_deprecated : `bool`, optional 

688 If `True`, this is a context in which component dataset support is 

689 deprecated. This will result in a deprecation warning when 

690 ``components=True`` or ``components=None`` and a component dataset 

691 is matched. In the future this will become an error. 

692 

693 Returns 

694 ------- 

695 dataset_types : `dict` [ `DatasetType`, `list` [ `None`, `str` ] ] 

696 A mapping with resolved dataset types as keys and lists of 

697 matched component names as values, where `None` indicates the 

698 parent composite dataset type was matched. 

699 """ 

700 raise NotImplementedError() 

701 

702 @abstractmethod 

703 def getDatasetRef(self, id: DatasetId) -> DatasetRef | None: 

704 """Return a `DatasetRef` for the given dataset primary key 

705 value. 

706 

707 Parameters 

708 ---------- 

709 id : `DatasetId` 

710 Primary key value for the dataset. 

711 

712 Returns 

713 ------- 

714 ref : `DatasetRef` or `None` 

715 Object representing the dataset, or `None` if no dataset with the 

716 given primary key values exists in this layer. 

717 """ 

718 raise NotImplementedError() 

719 

720 @abstractmethod 

721 def getCollectionSummary(self, collection: CollectionRecord) -> CollectionSummary: 

722 """Return a summary for the given collection. 

723 

724 Parameters 

725 ---------- 

726 collection : `CollectionRecord` 

727 Record describing the collection for which a summary is to be 

728 retrieved. 

729 

730 Returns 

731 ------- 

732 summary : `CollectionSummary` 

733 Summary of the dataset types and governor dimension values in 

734 this collection. 

735 """ 

736 raise NotImplementedError() 

737 

738 @abstractmethod 

739 def ingest_date_dtype(self) -> type: 

740 """Return type of the ``ingest_date`` column.""" 

741 raise NotImplementedError()