Coverage for python/lsst/daf/butler/registry/interfaces/_datasets.py: 58%

114 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-01-05 10:36 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("DatasetRecordStorageManager", "DatasetRecordStorage", "DatasetIdFactory", "DatasetIdGenEnum") 

25 

26import enum 

27import uuid 

28from abc import ABC, abstractmethod 

29from collections.abc import Iterable, Iterator 

30from typing import TYPE_CHECKING, Any 

31 

32import sqlalchemy.sql 

33 

34from ...core import ( 

35 DataCoordinate, 

36 DatasetId, 

37 DatasetRef, 

38 DatasetType, 

39 SimpleQuery, 

40 StorageClass, 

41 Timespan, 

42 ddl, 

43) 

44from .._exceptions import MissingDatasetTypeError 

45from ._versioning import VersionedExtension 

46 

47if TYPE_CHECKING: 47 ↛ 48line 47 didn't jump to line 48, because the condition on line 47 was never true

48 from .._collection_summary import CollectionSummary 

49 from ._collections import CollectionManager, CollectionRecord, RunRecord 

50 from ._database import Database, StaticTablesContext 

51 from ._dimensions import DimensionRecordStorageManager 

52 

53 

54class DatasetIdGenEnum(enum.Enum): 

55 """This enum is used to specify dataset ID generation options for 

56 ``insert()`` method. 

57 """ 

58 

59 UNIQUE = 0 

60 """Unique mode generates unique ID for each inserted dataset, e.g. 

61 auto-generated by database or random UUID. 

62 """ 

63 

64 DATAID_TYPE = 1 

65 """In this mode ID is computed deterministically from a combination of 

66 dataset type and dataId. 

67 """ 

68 

69 DATAID_TYPE_RUN = 2 

70 """In this mode ID is computed deterministically from a combination of 

71 dataset type, dataId, and run collection name. 

72 """ 

73 

74 

75class DatasetIdFactory: 

76 """Factory for dataset IDs (UUIDs). 

77 

78 For now the logic is hard-coded and is controlled by the user-provided 

79 value of `DatasetIdGenEnum`. In the future we may implement a configurable 

80 logic that can guess `DatasetIdGenEnum` value from other parameters. 

81 """ 

82 

83 NS_UUID = uuid.UUID("840b31d9-05cd-5161-b2c8-00d32b280d0f") 

84 """Namespace UUID used for UUID5 generation. Do not change. This was 

85 produced by `uuid.uuid5(uuid.NAMESPACE_DNS, "lsst.org")`. 

86 """ 

87 

88 def makeDatasetId( 

89 self, 

90 run: str, 

91 datasetType: DatasetType, 

92 dataId: DataCoordinate, 

93 idGenerationMode: DatasetIdGenEnum, 

94 ) -> uuid.UUID: 

95 """Generate dataset ID for a dataset. 

96 

97 Parameters 

98 ---------- 

99 run : `str` 

100 Name of the RUN collection for the dataset. 

101 datasetType : `DatasetType` 

102 Dataset type. 

103 dataId : `DataCoordinate` 

104 Expanded data ID for the dataset. 

105 idGenerationMode : `DatasetIdGenEnum` 

106 ID generation option. `~DatasetIdGenEnum.UNIQUE` makes a random 

107 UUID4-type ID. `~DatasetIdGenEnum.DATAID_TYPE` makes a 

108 deterministic UUID5-type ID based on a dataset type name and 

109 ``dataId``. `~DatasetIdGenEnum.DATAID_TYPE_RUN` makes a 

110 deterministic UUID5-type ID based on a dataset type name, run 

111 collection name, and ``dataId``. 

112 

113 Returns 

114 ------- 

115 datasetId : `uuid.UUID` 

116 Dataset identifier. 

117 """ 

118 if idGenerationMode is DatasetIdGenEnum.UNIQUE: 

119 return uuid.uuid4() 

120 else: 

121 # WARNING: If you modify this code make sure that the order of 

122 # items in the `items` list below never changes. 

123 items: list[tuple[str, str]] = [] 

124 if idGenerationMode is DatasetIdGenEnum.DATAID_TYPE: 

125 items = [ 

126 ("dataset_type", datasetType.name), 

127 ] 

128 elif idGenerationMode is DatasetIdGenEnum.DATAID_TYPE_RUN: 

129 items = [ 

130 ("dataset_type", datasetType.name), 

131 ("run", run), 

132 ] 

133 else: 

134 raise ValueError(f"Unexpected ID generation mode: {idGenerationMode}") 

135 

136 for name, value in sorted(dataId.byName().items()): 

137 items.append((name, str(value))) 

138 data = ",".join(f"{key}={value}" for key, value in items) 

139 return uuid.uuid5(self.NS_UUID, data) 

140 

141 

142class DatasetRecordStorage(ABC): 

143 """An interface that manages the records associated with a particular 

144 `DatasetType`. 

145 

146 Parameters 

147 ---------- 

148 datasetType : `DatasetType` 

149 Dataset type whose records this object manages. 

150 """ 

151 

152 def __init__(self, datasetType: DatasetType): 

153 self.datasetType = datasetType 

154 

155 @abstractmethod 

156 def insert( 

157 self, 

158 run: RunRecord, 

159 dataIds: Iterable[DataCoordinate], 

160 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE, 

161 ) -> Iterator[DatasetRef]: 

162 """Insert one or more dataset entries into the database. 

163 

164 Parameters 

165 ---------- 

166 run : `RunRecord` 

167 The record object describing the `~CollectionType.RUN` collection 

168 this dataset will be associated with. 

169 dataIds : `Iterable` [ `DataCoordinate` ] 

170 Expanded data IDs (`DataCoordinate` instances) for the 

171 datasets to be added. The dimensions of all data IDs must be the 

172 same as ``self.datasetType.dimensions``. 

173 idMode : `DatasetIdGenEnum` 

174 With `UNIQUE` each new dataset is inserted with its new unique ID. 

175 With non-`UNIQUE` mode ID is computed from some combination of 

176 dataset type, dataId, and run collection name; if the same ID is 

177 already in the database then new record is not inserted. 

178 

179 Returns 

180 ------- 

181 datasets : `Iterable` [ `DatasetRef` ] 

182 References to the inserted datasets. 

183 """ 

184 raise NotImplementedError() 

185 

186 @abstractmethod 

187 def import_( 

188 self, 

189 run: RunRecord, 

190 datasets: Iterable[DatasetRef], 

191 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE, 

192 reuseIds: bool = False, 

193 ) -> Iterator[DatasetRef]: 

194 """Insert one or more dataset entries into the database. 

195 

196 Parameters 

197 ---------- 

198 run : `RunRecord` 

199 The record object describing the `~CollectionType.RUN` collection 

200 this dataset will be associated with. 

201 datasets : `~collections.abc.Iterable` of `DatasetRef` 

202 Datasets to be inserted. Datasets can specify ``id`` attribute 

203 which will be used for inserted datasets. All dataset IDs must 

204 have the same type (`int` or `uuid.UUID`), if type of dataset IDs 

205 does not match type supported by this class then IDs will be 

206 ignored and new IDs will be generated by backend. 

207 idGenerationMode : `DatasetIdGenEnum` 

208 With `UNIQUE` each new dataset is inserted with its new unique ID. 

209 With non-`UNIQUE` mode ID is computed from some combination of 

210 dataset type, dataId, and run collection name; if the same ID is 

211 already in the database then new record is not inserted. 

212 reuseIds : `bool`, optional 

213 If `True` then forces re-use of imported dataset IDs for integer 

214 IDs which are normally generated as auto-incremented; exception 

215 will be raised if imported IDs clash with existing ones. This 

216 option has no effect on the use of globally-unique IDs which are 

217 always re-used (or generated if integer IDs are being imported). 

218 

219 Returns 

220 ------- 

221 datasets : `Iterable` [ `DatasetRef` ] 

222 References to the inserted or existing datasets. 

223 

224 Notes 

225 ----- 

226 The ``datasetType`` and ``run`` attributes of datasets are supposed to 

227 be identical across all datasets but this is not checked and it should 

228 be enforced by higher level registry code. This method does not need 

229 to use those attributes from datasets, only ``dataId`` and ``id`` are 

230 relevant. 

231 """ 

232 raise NotImplementedError() 

233 

234 @abstractmethod 

235 def find( 

236 self, 

237 collection: CollectionRecord, 

238 dataId: DataCoordinate, 

239 timespan: Timespan | None = None, 

240 storage_class: str | StorageClass | None = None, 

241 ) -> DatasetRef | None: 

242 """Search a collection for a dataset with the given data ID. 

243 

244 Parameters 

245 ---------- 

246 collection : `CollectionRecord` 

247 The record object describing the collection to search for the 

248 dataset. May have any `CollectionType`. 

249 dataId: `DataCoordinate` 

250 Complete (but not necessarily expanded) data ID to search with, 

251 with ``dataId.graph == self.datasetType.dimensions``. 

252 timespan : `Timespan`, optional 

253 A timespan that the validity range of the dataset must overlap. 

254 Required if ``collection.type is CollectionType.CALIBRATION``, and 

255 ignored otherwise. 

256 storage_class : `str` or `StorageClass`, optional 

257 Storage class override to apply to returned dataset references. 

258 

259 Returns 

260 ------- 

261 ref : `DatasetRef` 

262 A resolved `DatasetRef` (without components populated), or `None` 

263 if no matching dataset was found. 

264 """ 

265 raise NotImplementedError() 

266 

267 @abstractmethod 

268 def delete(self, datasets: Iterable[DatasetRef]) -> None: 

269 """Fully delete the given datasets from the registry. 

270 

271 Parameters 

272 ---------- 

273 datasets : `Iterable` [ `DatasetRef` ] 

274 Datasets to be deleted. All datasets must be resolved and have 

275 the same `DatasetType` as ``self``. 

276 

277 Raises 

278 ------ 

279 AmbiguousDatasetError 

280 Raised if any of the given `DatasetRef` instances is unresolved. 

281 """ 

282 raise NotImplementedError() 

283 

284 @abstractmethod 

285 def associate(self, collection: CollectionRecord, datasets: Iterable[DatasetRef]) -> None: 

286 """Associate one or more datasets with a collection. 

287 

288 Parameters 

289 ---------- 

290 collection : `CollectionRecord` 

291 The record object describing the collection. ``collection.type`` 

292 must be `~CollectionType.TAGGED`. 

293 datasets : `Iterable` [ `DatasetRef` ] 

294 Datasets to be associated. All datasets must be resolved and have 

295 the same `DatasetType` as ``self``. 

296 

297 Raises 

298 ------ 

299 AmbiguousDatasetError 

300 Raised if any of the given `DatasetRef` instances is unresolved. 

301 

302 Notes 

303 ----- 

304 Associating a dataset with into collection that already contains a 

305 different dataset with the same `DatasetType` and data ID will remove 

306 the existing dataset from that collection. 

307 

308 Associating the same dataset into a collection multiple times is a 

309 no-op, but is still not permitted on read-only databases. 

310 """ 

311 raise NotImplementedError() 

312 

313 @abstractmethod 

314 def disassociate(self, collection: CollectionRecord, datasets: Iterable[DatasetRef]) -> None: 

315 """Remove one or more datasets from a collection. 

316 

317 Parameters 

318 ---------- 

319 collection : `CollectionRecord` 

320 The record object describing the collection. ``collection.type`` 

321 must be `~CollectionType.TAGGED`. 

322 datasets : `Iterable` [ `DatasetRef` ] 

323 Datasets to be disassociated. All datasets must be resolved and 

324 have the same `DatasetType` as ``self``. 

325 

326 Raises 

327 ------ 

328 AmbiguousDatasetError 

329 Raised if any of the given `DatasetRef` instances is unresolved. 

330 """ 

331 raise NotImplementedError() 

332 

333 @abstractmethod 

334 def certify( 

335 self, collection: CollectionRecord, datasets: Iterable[DatasetRef], timespan: Timespan 

336 ) -> None: 

337 """Associate one or more datasets with a calibration collection and a 

338 validity range within it. 

339 

340 Parameters 

341 ---------- 

342 collection : `CollectionRecord` 

343 The record object describing the collection. ``collection.type`` 

344 must be `~CollectionType.CALIBRATION`. 

345 datasets : `Iterable` [ `DatasetRef` ] 

346 Datasets to be associated. All datasets must be resolved and have 

347 the same `DatasetType` as ``self``. 

348 timespan : `Timespan` 

349 The validity range for these datasets within the collection. 

350 

351 Raises 

352 ------ 

353 AmbiguousDatasetError 

354 Raised if any of the given `DatasetRef` instances is unresolved. 

355 ConflictingDefinitionError 

356 Raised if the collection already contains a different dataset with 

357 the same `DatasetType` and data ID and an overlapping validity 

358 range. 

359 CollectionTypeError 

360 Raised if 

361 ``collection.type is not CollectionType.CALIBRATION`` or if 

362 ``self.datasetType.isCalibration() is False``. 

363 """ 

364 raise NotImplementedError() 

365 

366 @abstractmethod 

367 def decertify( 

368 self, 

369 collection: CollectionRecord, 

370 timespan: Timespan, 

371 *, 

372 dataIds: Iterable[DataCoordinate] | None = None, 

373 ) -> None: 

374 """Remove or adjust datasets to clear a validity range within a 

375 calibration collection. 

376 

377 Parameters 

378 ---------- 

379 collection : `CollectionRecord` 

380 The record object describing the collection. ``collection.type`` 

381 must be `~CollectionType.CALIBRATION`. 

382 timespan : `Timespan` 

383 The validity range to remove datasets from within the collection. 

384 Datasets that overlap this range but are not contained by it will 

385 have their validity ranges adjusted to not overlap it, which may 

386 split a single dataset validity range into two. 

387 dataIds : `Iterable` [ `DataCoordinate` ], optional 

388 Data IDs that should be decertified within the given validity range 

389 If `None`, all data IDs for ``self.datasetType`` will be 

390 decertified. 

391 

392 Raises 

393 ------ 

394 CollectionTypeError 

395 Raised if ``collection.type is not CollectionType.CALIBRATION``. 

396 """ 

397 raise NotImplementedError() 

398 

399 @abstractmethod 

400 def select( 

401 self, 

402 *collections: CollectionRecord, 

403 dataId: SimpleQuery.Select.Or[DataCoordinate] = SimpleQuery.Select, 

404 id: SimpleQuery.Select.Or[DatasetId | None] = SimpleQuery.Select, 

405 run: SimpleQuery.Select.Or[None] = SimpleQuery.Select, 

406 timespan: SimpleQuery.Select.Or[Timespan | None] = SimpleQuery.Select, 

407 ingestDate: SimpleQuery.Select.Or[Timespan | None] = None, 

408 rank: SimpleQuery.Select.Or[None] = None, 

409 ) -> sqlalchemy.sql.Selectable: 

410 """Return a SQLAlchemy object that represents a ``SELECT`` query for 

411 this `DatasetType`. 

412 

413 All arguments can either be a value that constrains the query or 

414 the `SimpleQuery.Select` tag object to indicate that the value should 

415 be returned in the columns in the ``SELECT`` clause. The default is 

416 `SimpleQuery.Select`. 

417 

418 Parameters 

419 ---------- 

420 *collections : `CollectionRecord` 

421 The record object(s) describing the collection(s) to query. May 

422 not be of type `CollectionType.CHAINED`. If multiple collections 

423 are passed, the query will search all of them in an unspecified 

424 order, and all collections must have the same type. 

425 dataId : `DataCoordinate` or `Select` 

426 The data ID to restrict results with, or an instruction to return 

427 the data ID via columns with names 

428 ``self.datasetType.dimensions.names``. 

429 id : `DatasetId`, `Select` or None, 

430 The primary key value for the dataset, an instruction to return it 

431 via a ``id`` column, or `None` to ignore it entirely. 

432 run : `None` or `Select` 

433 If `Select` (default), include the dataset's run key value (as 

434 column labeled with the return value of 

435 ``CollectionManager.getRunForeignKeyName``). 

436 If `None`, do not include this column (to constrain the run, 

437 pass a `RunRecord` as the ``collection`` argument instead). 

438 timespan : `None`, `Select`, or `Timespan` 

439 If `Select` (default), include the validity range timespan in the 

440 result columns. If a `Timespan` instance, constrain the results to 

441 those whose validity ranges overlap that given timespan. For 

442 collections whose type is not `~CollectionType.CALIBRATION`, if 

443 `Select` is passed a column with a literal ``NULL`` value will be 

444 added, and ``sqlalchemy.sql.expressions.Null` may be passed to 

445 force a constraint that the value be null (since `None` is 

446 interpreted as meaning "do not select or constrain this column"). 

447 ingestDate : `None`, `Select`, or `Timespan` 

448 If `Select` include the ingest timestamp in the result columns. 

449 If a `Timespan` instance, constrain the results to those whose 

450 ingest times which are inside given timespan and also include 

451 timestamp in the result columns. If `None` (default) then there is 

452 no constraint and timestamp is not returned. 

453 rank : `Select` or `None` 

454 If `Select`, include a calculated column that is the integer rank 

455 of the row's collection in the given list of collections, starting 

456 from zero. 

457 

458 Returns 

459 ------- 

460 query : `sqlalchemy.sql.Selectable` 

461 A SQLAlchemy object representing a simple ``SELECT`` query. 

462 """ 

463 raise NotImplementedError() 

464 

465 datasetType: DatasetType 

466 """Dataset type whose records this object manages (`DatasetType`). 

467 """ 

468 

469 

470class DatasetRecordStorageManager(VersionedExtension): 

471 """An interface that manages the tables that describe datasets. 

472 

473 `DatasetRecordStorageManager` primarily serves as a container and factory 

474 for `DatasetRecordStorage` instances, which each provide access to the 

475 records for a different `DatasetType`. 

476 """ 

477 

478 @classmethod 

479 @abstractmethod 

480 def initialize( 

481 cls, 

482 db: Database, 

483 context: StaticTablesContext, 

484 *, 

485 collections: CollectionManager, 

486 dimensions: DimensionRecordStorageManager, 

487 ) -> DatasetRecordStorageManager: 

488 """Construct an instance of the manager. 

489 

490 Parameters 

491 ---------- 

492 db : `Database` 

493 Interface to the underlying database engine and namespace. 

494 context : `StaticTablesContext` 

495 Context object obtained from `Database.declareStaticTables`; used 

496 to declare any tables that should always be present. 

497 collections: `CollectionManager` 

498 Manager object for the collections in this `Registry`. 

499 dimensions : `DimensionRecordStorageManager` 

500 Manager object for the dimensions in this `Registry`. 

501 

502 Returns 

503 ------- 

504 manager : `DatasetRecordStorageManager` 

505 An instance of a concrete `DatasetRecordStorageManager` subclass. 

506 """ 

507 raise NotImplementedError() 

508 

509 @classmethod 

510 @abstractmethod 

511 def getIdColumnType(cls) -> type: 

512 """Return type used for columns storing dataset IDs. 

513 

514 This type is used for columns storing `DatasetRef.id` values, usually 

515 a `type` subclass provided by SQLAlchemy. 

516 

517 Returns 

518 ------- 

519 dtype : `type` 

520 Type used for dataset identification in database. 

521 """ 

522 raise NotImplementedError() 

523 

524 @classmethod 

525 @abstractmethod 

526 def supportsIdGenerationMode(cls, mode: DatasetIdGenEnum) -> bool: 

527 """Test whether the given dataset ID generation mode is supported by 

528 `insert`. 

529 

530 Parameters 

531 ---------- 

532 mode : `DatasetIdGenEnum` 

533 Enum value for the mode to test. 

534 

535 Returns 

536 ------- 

537 supported : `bool` 

538 Whether the given mode is supported. 

539 """ 

540 raise NotImplementedError() 

541 

542 @classmethod 

543 @abstractmethod 

544 def addDatasetForeignKey( 

545 cls, 

546 tableSpec: ddl.TableSpec, 

547 *, 

548 name: str = "dataset", 

549 constraint: bool = True, 

550 onDelete: str | None = None, 

551 **kwargs: Any, 

552 ) -> ddl.FieldSpec: 

553 """Add a foreign key (field and constraint) referencing the dataset 

554 table. 

555 

556 Parameters 

557 ---------- 

558 tableSpec : `ddl.TableSpec` 

559 Specification for the table that should reference the dataset 

560 table. Will be modified in place. 

561 name: `str`, optional 

562 A name to use for the prefix of the new field; the full name is 

563 ``{name}_id``. 

564 onDelete: `str`, optional 

565 One of "CASCADE" or "SET NULL", indicating what should happen to 

566 the referencing row if the collection row is deleted. `None` 

567 indicates that this should be an integrity error. 

568 constraint: `bool`, optional 

569 If `False` (`True` is default), add a field that can be joined to 

570 the dataset primary key, but do not add a foreign key constraint. 

571 **kwargs 

572 Additional keyword arguments are forwarded to the `ddl.FieldSpec` 

573 constructor (only the ``name`` and ``dtype`` arguments are 

574 otherwise provided). 

575 

576 Returns 

577 ------- 

578 idSpec : `ddl.FieldSpec` 

579 Specification for the ID field. 

580 """ 

581 raise NotImplementedError() 

582 

583 @abstractmethod 

584 def refresh(self) -> None: 

585 """Ensure all other operations on this manager are aware of any 

586 dataset types that may have been registered by other clients since 

587 it was initialized or last refreshed. 

588 """ 

589 raise NotImplementedError() 

590 

591 def __getitem__(self, name: str) -> DatasetRecordStorage: 

592 """Return the object that provides access to the records associated 

593 with the given `DatasetType` name. 

594 

595 This is simply a convenience wrapper for `find` that raises `KeyError` 

596 when the dataset type is not found. 

597 

598 Returns 

599 ------- 

600 records : `DatasetRecordStorage` 

601 The object representing the records for the given dataset type. 

602 

603 Raises 

604 ------ 

605 KeyError 

606 Raised if there is no dataset type with the given name. 

607 

608 Notes 

609 ----- 

610 Dataset types registered by another client of the same repository since 

611 the last call to `initialize` or `refresh` may not be found. 

612 """ 

613 result = self.find(name) 

614 if result is None: 

615 raise MissingDatasetTypeError(f"Dataset type with name '{name}' not found.") 

616 return result 

617 

618 @abstractmethod 

619 def find(self, name: str) -> DatasetRecordStorage | None: 

620 """Return an object that provides access to the records associated with 

621 the given `DatasetType` name, if one exists. 

622 

623 Parameters 

624 ---------- 

625 name : `str` 

626 Name of the dataset type. 

627 

628 Returns 

629 ------- 

630 records : `DatasetRecordStorage` or `None` 

631 The object representing the records for the given dataset type, or 

632 `None` if there are no records for that dataset type. 

633 

634 Notes 

635 ----- 

636 Dataset types registered by another client of the same repository since 

637 the last call to `initialize` or `refresh` may not be found. 

638 """ 

639 raise NotImplementedError() 

640 

641 @abstractmethod 

642 def register(self, datasetType: DatasetType) -> tuple[DatasetRecordStorage, bool]: 

643 """Ensure that this `Registry` can hold records for the given 

644 `DatasetType`, creating new tables as necessary. 

645 

646 Parameters 

647 ---------- 

648 datasetType : `DatasetType` 

649 Dataset type for which a table should created (as necessary) and 

650 an associated `DatasetRecordStorage` returned. 

651 

652 Returns 

653 ------- 

654 records : `DatasetRecordStorage` 

655 The object representing the records for the given dataset type. 

656 inserted : `bool` 

657 `True` if the dataset type did not exist in the registry before. 

658 

659 Notes 

660 ----- 

661 This operation may not be invoked within a `Database.transaction` 

662 context. 

663 """ 

664 raise NotImplementedError() 

665 

666 @abstractmethod 

667 def remove(self, name: str) -> None: 

668 """Remove the dataset type. 

669 

670 Parameters 

671 ---------- 

672 name : `str` 

673 Name of the dataset type. 

674 """ 

675 raise NotImplementedError() 

676 

677 @abstractmethod 

678 def resolve_wildcard( 

679 self, 

680 expression: Any, 

681 components: bool | None = None, 

682 missing: list[str] | None = None, 

683 explicit_only: bool = False, 

684 ) -> dict[DatasetType, list[str | None]]: 

685 """Resolve a dataset type wildcard expression. 

686 

687 Parameters 

688 ---------- 

689 expression 

690 Expression to resolve. Will be passed to 

691 `DatasetTypeWildcard.from_expression`. 

692 components : `bool`, optional 

693 If `True`, apply all expression patterns to component dataset type 

694 names as well. If `False`, never apply patterns to components. If 

695 `None` (default), apply patterns to components only if their parent 

696 datasets were not matched by the expression. Fully-specified 

697 component datasets (`str` or `DatasetType` instances) are always 

698 included. 

699 

700 Values other than `False` are deprecated, and only `False` will be 

701 supported after v26. After v27 this argument will be removed 

702 entirely. 

703 missing : `list` of `str`, optional 

704 String dataset type names that were explicitly given (i.e. not 

705 regular expression patterns) but not found will be appended to this 

706 list, if it is provided. 

707 explicit_only : `bool`, optional 

708 If `True`, require explicit `DatasetType` instances or `str` names, 

709 with `re.Pattern` instances deprecated and ``...`` prohibited. 

710 

711 Returns 

712 ------- 

713 dataset_types : `dict` [ `DatasetType`, `list` [ `None`, `str` ] ] 

714 A mapping with resolved dataset types as keys and lists of 

715 matched component names as values, where `None` indicates the 

716 parent composite dataset type was matched. 

717 """ 

718 raise NotImplementedError() 

719 

720 @abstractmethod 

721 def getDatasetRef(self, id: DatasetId) -> DatasetRef | None: 

722 """Return a `DatasetRef` for the given dataset primary key 

723 value. 

724 

725 Parameters 

726 ---------- 

727 id : `DatasetId` 

728 Primary key value for the dataset. 

729 

730 Returns 

731 ------- 

732 ref : `DatasetRef` or `None` 

733 Object representing the dataset, or `None` if no dataset with the 

734 given primary key values exists in this layer. 

735 """ 

736 raise NotImplementedError() 

737 

738 @abstractmethod 

739 def getCollectionSummary(self, collection: CollectionRecord) -> CollectionSummary: 

740 """Return a summary for the given collection. 

741 

742 Parameters 

743 ---------- 

744 collection : `CollectionRecord` 

745 Record describing the collection for which a summary is to be 

746 retrieved. 

747 

748 Returns 

749 ------- 

750 summary : `CollectionSummary` 

751 Summary of the dataset types and governor dimension values in 

752 this collection. 

753 """ 

754 raise NotImplementedError()