Coverage for python/lsst/daf/butler/registry/interfaces/_datasets.py: 88%
66 statements
« prev ^ index » next coverage.py v7.2.5, created at 2023-05-03 09:15 +0000
« prev ^ index » next coverage.py v7.2.5, created at 2023-05-03 09:15 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ("DatasetRecordStorageManager", "DatasetRecordStorage")
26from abc import ABC, abstractmethod
27from collections.abc import Iterable, Iterator, Set
28from typing import TYPE_CHECKING, Any
30from lsst.daf.relation import Relation
32from ...core import DataCoordinate, DatasetId, DatasetIdGenEnum, DatasetRef, DatasetType, Timespan, ddl
33from .._exceptions import MissingDatasetTypeError
34from ._versioning import VersionedExtension, VersionTuple
36if TYPE_CHECKING:
37 from .._collection_summary import CollectionSummary
38 from ..queries import SqlQueryContext
39 from ._collections import CollectionManager, CollectionRecord, RunRecord
40 from ._database import Database, StaticTablesContext
41 from ._dimensions import DimensionRecordStorageManager
44class DatasetRecordStorage(ABC):
45 """An interface that manages the records associated with a particular
46 `DatasetType`.
48 Parameters
49 ----------
50 datasetType : `DatasetType`
51 Dataset type whose records this object manages.
52 """
54 def __init__(self, datasetType: DatasetType):
55 self.datasetType = datasetType
57 @abstractmethod
58 def insert(
59 self,
60 run: RunRecord,
61 dataIds: Iterable[DataCoordinate],
62 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE,
63 ) -> Iterator[DatasetRef]:
64 """Insert one or more dataset entries into the database.
66 Parameters
67 ----------
68 run : `RunRecord`
69 The record object describing the `~CollectionType.RUN` collection
70 this dataset will be associated with.
71 dataIds : `Iterable` [ `DataCoordinate` ]
72 Expanded data IDs (`DataCoordinate` instances) for the
73 datasets to be added. The dimensions of all data IDs must be the
74 same as ``self.datasetType.dimensions``.
75 idMode : `DatasetIdGenEnum`
76 With `UNIQUE` each new dataset is inserted with its new unique ID.
77 With non-`UNIQUE` mode ID is computed from some combination of
78 dataset type, dataId, and run collection name; if the same ID is
79 already in the database then new record is not inserted.
81 Returns
82 -------
83 datasets : `Iterable` [ `DatasetRef` ]
84 References to the inserted datasets.
85 """
86 raise NotImplementedError()
88 @abstractmethod
89 def import_(
90 self,
91 run: RunRecord,
92 datasets: Iterable[DatasetRef],
93 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE,
94 reuseIds: bool = False,
95 ) -> Iterator[DatasetRef]:
96 """Insert one or more dataset entries into the database.
98 Parameters
99 ----------
100 run : `RunRecord`
101 The record object describing the `~CollectionType.RUN` collection
102 this dataset will be associated with.
103 datasets : `~collections.abc.Iterable` of `DatasetRef`
104 Datasets to be inserted. Datasets can specify ``id`` attribute
105 which will be used for inserted datasets. All dataset IDs must
106 have the same type (`int` or `uuid.UUID`), if type of dataset IDs
107 does not match type supported by this class then IDs will be
108 ignored and new IDs will be generated by backend.
109 idGenerationMode : `DatasetIdGenEnum`
110 With `UNIQUE` each new dataset is inserted with its new unique ID.
111 With non-`UNIQUE` mode ID is computed from some combination of
112 dataset type, dataId, and run collection name; if the same ID is
113 already in the database then new record is not inserted.
114 reuseIds : `bool`, optional
115 If `True` then forces re-use of imported dataset IDs for integer
116 IDs which are normally generated as auto-incremented; exception
117 will be raised if imported IDs clash with existing ones. This
118 option has no effect on the use of globally-unique IDs which are
119 always re-used (or generated if integer IDs are being imported).
121 Returns
122 -------
123 datasets : `Iterable` [ `DatasetRef` ]
124 References to the inserted or existing datasets.
126 Notes
127 -----
128 The ``datasetType`` and ``run`` attributes of datasets are supposed to
129 be identical across all datasets but this is not checked and it should
130 be enforced by higher level registry code. This method does not need
131 to use those attributes from datasets, only ``dataId`` and ``id`` are
132 relevant.
133 """
134 raise NotImplementedError()
136 @abstractmethod
137 def delete(self, datasets: Iterable[DatasetRef]) -> None:
138 """Fully delete the given datasets from the registry.
140 Parameters
141 ----------
142 datasets : `Iterable` [ `DatasetRef` ]
143 Datasets to be deleted. All datasets must be resolved and have
144 the same `DatasetType` as ``self``.
146 Raises
147 ------
148 AmbiguousDatasetError
149 Raised if any of the given `DatasetRef` instances is unresolved.
150 """
151 raise NotImplementedError()
153 @abstractmethod
154 def associate(self, collection: CollectionRecord, datasets: Iterable[DatasetRef]) -> None:
155 """Associate one or more datasets with a collection.
157 Parameters
158 ----------
159 collection : `CollectionRecord`
160 The record object describing the collection. ``collection.type``
161 must be `~CollectionType.TAGGED`.
162 datasets : `Iterable` [ `DatasetRef` ]
163 Datasets to be associated. All datasets must be resolved and have
164 the same `DatasetType` as ``self``.
166 Raises
167 ------
168 AmbiguousDatasetError
169 Raised if any of the given `DatasetRef` instances is unresolved.
171 Notes
172 -----
173 Associating a dataset with into collection that already contains a
174 different dataset with the same `DatasetType` and data ID will remove
175 the existing dataset from that collection.
177 Associating the same dataset into a collection multiple times is a
178 no-op, but is still not permitted on read-only databases.
179 """
180 raise NotImplementedError()
182 @abstractmethod
183 def disassociate(self, collection: CollectionRecord, datasets: Iterable[DatasetRef]) -> None:
184 """Remove one or more datasets from a collection.
186 Parameters
187 ----------
188 collection : `CollectionRecord`
189 The record object describing the collection. ``collection.type``
190 must be `~CollectionType.TAGGED`.
191 datasets : `Iterable` [ `DatasetRef` ]
192 Datasets to be disassociated. All datasets must be resolved and
193 have the same `DatasetType` as ``self``.
195 Raises
196 ------
197 AmbiguousDatasetError
198 Raised if any of the given `DatasetRef` instances is unresolved.
199 """
200 raise NotImplementedError()
202 @abstractmethod
203 def certify(
204 self,
205 collection: CollectionRecord,
206 datasets: Iterable[DatasetRef],
207 timespan: Timespan,
208 context: SqlQueryContext,
209 ) -> None:
210 """Associate one or more datasets with a calibration collection and a
211 validity range within it.
213 Parameters
214 ----------
215 collection : `CollectionRecord`
216 The record object describing the collection. ``collection.type``
217 must be `~CollectionType.CALIBRATION`.
218 datasets : `Iterable` [ `DatasetRef` ]
219 Datasets to be associated. All datasets must be resolved and have
220 the same `DatasetType` as ``self``.
221 timespan : `Timespan`
222 The validity range for these datasets within the collection.
224 Raises
225 ------
226 AmbiguousDatasetError
227 Raised if any of the given `DatasetRef` instances is unresolved.
228 ConflictingDefinitionError
229 Raised if the collection already contains a different dataset with
230 the same `DatasetType` and data ID and an overlapping validity
231 range.
232 CollectionTypeError
233 Raised if
234 ``collection.type is not CollectionType.CALIBRATION`` or if
235 ``self.datasetType.isCalibration() is False``.
236 """
237 raise NotImplementedError()
239 @abstractmethod
240 def decertify(
241 self,
242 collection: CollectionRecord,
243 timespan: Timespan,
244 *,
245 dataIds: Iterable[DataCoordinate] | None = None,
246 context: SqlQueryContext,
247 ) -> None:
248 """Remove or adjust datasets to clear a validity range within a
249 calibration collection.
251 Parameters
252 ----------
253 collection : `CollectionRecord`
254 The record object describing the collection. ``collection.type``
255 must be `~CollectionType.CALIBRATION`.
256 timespan : `Timespan`
257 The validity range to remove datasets from within the collection.
258 Datasets that overlap this range but are not contained by it will
259 have their validity ranges adjusted to not overlap it, which may
260 split a single dataset validity range into two.
261 dataIds : `Iterable` [ `DataCoordinate` ], optional
262 Data IDs that should be decertified within the given validity range
263 If `None`, all data IDs for ``self.datasetType`` will be
264 decertified.
266 Raises
267 ------
268 CollectionTypeError
269 Raised if ``collection.type is not CollectionType.CALIBRATION``.
270 """
271 raise NotImplementedError()
273 @abstractmethod
274 def make_relation(
275 self,
276 *collections: CollectionRecord,
277 columns: Set[str],
278 context: SqlQueryContext,
279 ) -> Relation:
280 """Return a `sql.Relation` that represents a query for for this
281 `DatasetType` in one or more collections.
283 Parameters
284 ----------
285 *collections : `CollectionRecord`
286 The record object(s) describing the collection(s) to query. May
287 not be of type `CollectionType.CHAINED`. If multiple collections
288 are passed, the query will search all of them in an unspecified
289 order, and all collections must have the same type. Must include
290 at least one collection.
291 columns : `~collections.abc.Set` [ `str` ]
292 Columns to include in the relation. See `Query.find_datasets` for
293 most options, but this method supports one more:
295 - ``rank``: a calculated integer column holding the index of the
296 collection the dataset was found in, within the ``collections``
297 sequence given.
298 context : `SqlQueryContext`
299 The object that manages database connections, temporary tables and
300 relation engines for this query.
302 Returns
303 -------
304 relation : `~lsst.daf.relation.Relation`
305 Representation of the query.
306 """
307 raise NotImplementedError()
309 datasetType: DatasetType
310 """Dataset type whose records this object manages (`DatasetType`).
311 """
314class DatasetRecordStorageManager(VersionedExtension):
315 """An interface that manages the tables that describe datasets.
317 `DatasetRecordStorageManager` primarily serves as a container and factory
318 for `DatasetRecordStorage` instances, which each provide access to the
319 records for a different `DatasetType`.
320 """
322 def __init__(self, *, registry_schema_version: VersionTuple | None = None) -> None:
323 super().__init__(registry_schema_version=registry_schema_version)
325 @classmethod
326 @abstractmethod
327 def initialize(
328 cls,
329 db: Database,
330 context: StaticTablesContext,
331 *,
332 collections: CollectionManager,
333 dimensions: DimensionRecordStorageManager,
334 registry_schema_version: VersionTuple | None = None,
335 ) -> DatasetRecordStorageManager:
336 """Construct an instance of the manager.
338 Parameters
339 ----------
340 db : `Database`
341 Interface to the underlying database engine and namespace.
342 context : `StaticTablesContext`
343 Context object obtained from `Database.declareStaticTables`; used
344 to declare any tables that should always be present.
345 collections: `CollectionManager`
346 Manager object for the collections in this `Registry`.
347 dimensions : `DimensionRecordStorageManager`
348 Manager object for the dimensions in this `Registry`.
349 registry_schema_version : `VersionTuple` or `None`
350 Schema version of this extension as defined in registry.
352 Returns
353 -------
354 manager : `DatasetRecordStorageManager`
355 An instance of a concrete `DatasetRecordStorageManager` subclass.
356 """
357 raise NotImplementedError()
359 @classmethod
360 @abstractmethod
361 def getIdColumnType(cls) -> type:
362 """Return type used for columns storing dataset IDs.
364 This type is used for columns storing `DatasetRef.id` values, usually
365 a `type` subclass provided by SQLAlchemy.
367 Returns
368 -------
369 dtype : `type`
370 Type used for dataset identification in database.
371 """
372 raise NotImplementedError()
374 @classmethod
375 @abstractmethod
376 def supportsIdGenerationMode(cls, mode: DatasetIdGenEnum) -> bool:
377 """Test whether the given dataset ID generation mode is supported by
378 `insert`.
380 Parameters
381 ----------
382 mode : `DatasetIdGenEnum`
383 Enum value for the mode to test.
385 Returns
386 -------
387 supported : `bool`
388 Whether the given mode is supported.
389 """
390 raise NotImplementedError()
392 @classmethod
393 @abstractmethod
394 def addDatasetForeignKey(
395 cls,
396 tableSpec: ddl.TableSpec,
397 *,
398 name: str = "dataset",
399 constraint: bool = True,
400 onDelete: str | None = None,
401 **kwargs: Any,
402 ) -> ddl.FieldSpec:
403 """Add a foreign key (field and constraint) referencing the dataset
404 table.
406 Parameters
407 ----------
408 tableSpec : `ddl.TableSpec`
409 Specification for the table that should reference the dataset
410 table. Will be modified in place.
411 name: `str`, optional
412 A name to use for the prefix of the new field; the full name is
413 ``{name}_id``.
414 onDelete: `str`, optional
415 One of "CASCADE" or "SET NULL", indicating what should happen to
416 the referencing row if the collection row is deleted. `None`
417 indicates that this should be an integrity error.
418 constraint: `bool`, optional
419 If `False` (`True` is default), add a field that can be joined to
420 the dataset primary key, but do not add a foreign key constraint.
421 **kwargs
422 Additional keyword arguments are forwarded to the `ddl.FieldSpec`
423 constructor (only the ``name`` and ``dtype`` arguments are
424 otherwise provided).
426 Returns
427 -------
428 idSpec : `ddl.FieldSpec`
429 Specification for the ID field.
430 """
431 raise NotImplementedError()
433 @abstractmethod
434 def refresh(self) -> None:
435 """Ensure all other operations on this manager are aware of any
436 dataset types that may have been registered by other clients since
437 it was initialized or last refreshed.
438 """
439 raise NotImplementedError()
441 def __getitem__(self, name: str) -> DatasetRecordStorage:
442 """Return the object that provides access to the records associated
443 with the given `DatasetType` name.
445 This is simply a convenience wrapper for `find` that raises `KeyError`
446 when the dataset type is not found.
448 Returns
449 -------
450 records : `DatasetRecordStorage`
451 The object representing the records for the given dataset type.
453 Raises
454 ------
455 KeyError
456 Raised if there is no dataset type with the given name.
458 Notes
459 -----
460 Dataset types registered by another client of the same repository since
461 the last call to `initialize` or `refresh` may not be found.
462 """
463 result = self.find(name)
464 if result is None:
465 raise MissingDatasetTypeError(f"Dataset type with name '{name}' not found.")
466 return result
468 @abstractmethod
469 def find(self, name: str) -> DatasetRecordStorage | None:
470 """Return an object that provides access to the records associated with
471 the given `DatasetType` name, if one exists.
473 Parameters
474 ----------
475 name : `str`
476 Name of the dataset type.
478 Returns
479 -------
480 records : `DatasetRecordStorage` or `None`
481 The object representing the records for the given dataset type, or
482 `None` if there are no records for that dataset type.
484 Notes
485 -----
486 Dataset types registered by another client of the same repository since
487 the last call to `initialize` or `refresh` may not be found.
488 """
489 raise NotImplementedError()
491 @abstractmethod
492 def register(self, datasetType: DatasetType) -> tuple[DatasetRecordStorage, bool]:
493 """Ensure that this `Registry` can hold records for the given
494 `DatasetType`, creating new tables as necessary.
496 Parameters
497 ----------
498 datasetType : `DatasetType`
499 Dataset type for which a table should created (as necessary) and
500 an associated `DatasetRecordStorage` returned.
502 Returns
503 -------
504 records : `DatasetRecordStorage`
505 The object representing the records for the given dataset type.
506 inserted : `bool`
507 `True` if the dataset type did not exist in the registry before.
509 Notes
510 -----
511 This operation may not be invoked within a `Database.transaction`
512 context.
513 """
514 raise NotImplementedError()
516 @abstractmethod
517 def remove(self, name: str) -> None:
518 """Remove the dataset type.
520 Parameters
521 ----------
522 name : `str`
523 Name of the dataset type.
524 """
525 raise NotImplementedError()
527 @abstractmethod
528 def resolve_wildcard(
529 self,
530 expression: Any,
531 components: bool | None = None,
532 missing: list[str] | None = None,
533 explicit_only: bool = False,
534 components_deprecated: bool = True,
535 ) -> dict[DatasetType, list[str | None]]:
536 """Resolve a dataset type wildcard expression.
538 Parameters
539 ----------
540 expression
541 Expression to resolve. Will be passed to
542 `DatasetTypeWildcard.from_expression`.
543 components : `bool`, optional
544 If `True`, apply all expression patterns to component dataset type
545 names as well. If `False`, never apply patterns to components. If
546 `None` (default), apply patterns to components only if their parent
547 datasets were not matched by the expression. Fully-specified
548 component datasets (`str` or `DatasetType` instances) are always
549 included.
550 missing : `list` of `str`, optional
551 String dataset type names that were explicitly given (i.e. not
552 regular expression patterns) but not found will be appended to this
553 list, if it is provided.
554 explicit_only : `bool`, optional
555 If `True`, require explicit `DatasetType` instances or `str` names,
556 with `re.Pattern` instances deprecated and ``...`` prohibited.
557 components_deprecated : `bool`, optional
558 If `True`, this is a context in which component dataset support is
559 deprecated. This will result in a deprecation warning when
560 ``components=True`` or ``components=None`` and a component dataset
561 is matched. In the future this will become an error.
563 Returns
564 -------
565 dataset_types : `dict` [ `DatasetType`, `list` [ `None`, `str` ] ]
566 A mapping with resolved dataset types as keys and lists of
567 matched component names as values, where `None` indicates the
568 parent composite dataset type was matched.
569 """
570 raise NotImplementedError()
572 @abstractmethod
573 def getDatasetRef(self, id: DatasetId) -> DatasetRef | None:
574 """Return a `DatasetRef` for the given dataset primary key
575 value.
577 Parameters
578 ----------
579 id : `DatasetId`
580 Primary key value for the dataset.
582 Returns
583 -------
584 ref : `DatasetRef` or `None`
585 Object representing the dataset, or `None` if no dataset with the
586 given primary key values exists in this layer.
587 """
588 raise NotImplementedError()
590 @abstractmethod
591 def getCollectionSummary(self, collection: CollectionRecord) -> CollectionSummary:
592 """Return a summary for the given collection.
594 Parameters
595 ----------
596 collection : `CollectionRecord`
597 Record describing the collection for which a summary is to be
598 retrieved.
600 Returns
601 -------
602 summary : `CollectionSummary`
603 Summary of the dataset types and governor dimension values in
604 this collection.
605 """
606 raise NotImplementedError()
608 @abstractmethod
609 def ingest_date_dtype(self) -> type:
610 """Return type of the ``ingest_date`` column."""
611 raise NotImplementedError()