Coverage for python/lsst/daf/butler/registry/interfaces/_datasets.py: 88%
66 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-28 10:10 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-28 10:10 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ("DatasetRecordStorageManager", "DatasetRecordStorage")
26from abc import ABC, abstractmethod
27from collections.abc import Iterable, Iterator, Set
28from typing import TYPE_CHECKING, Any
30from lsst.daf.relation import Relation
32from ...core import DataCoordinate, DatasetId, DatasetIdGenEnum, DatasetRef, DatasetType, Timespan, ddl
33from .._exceptions import MissingDatasetTypeError
34from ._versioning import VersionedExtension, VersionTuple
36if TYPE_CHECKING:
37 from .._collection_summary import CollectionSummary
38 from ..queries import SqlQueryContext
39 from ._collections import CollectionManager, CollectionRecord, RunRecord
40 from ._database import Database, StaticTablesContext
41 from ._dimensions import DimensionRecordStorageManager
44class DatasetRecordStorage(ABC):
45 """An interface that manages the records associated with a particular
46 `DatasetType`.
48 Parameters
49 ----------
50 datasetType : `DatasetType`
51 Dataset type whose records this object manages.
52 """
54 def __init__(self, datasetType: DatasetType):
55 self.datasetType = datasetType
57 @abstractmethod
58 def insert(
59 self,
60 run: RunRecord,
61 dataIds: Iterable[DataCoordinate],
62 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE,
63 ) -> Iterator[DatasetRef]:
64 """Insert one or more dataset entries into the database.
66 Parameters
67 ----------
68 run : `RunRecord`
69 The record object describing the `~CollectionType.RUN` collection
70 this dataset will be associated with.
71 dataIds : `~collections.abc.Iterable` [ `DataCoordinate` ]
72 Expanded data IDs (`DataCoordinate` instances) for the
73 datasets to be added. The dimensions of all data IDs must be the
74 same as ``self.datasetType.dimensions``.
75 idMode : `DatasetIdGenEnum`
76 With `UNIQUE` each new dataset is inserted with its new unique ID.
77 With non-`UNIQUE` mode ID is computed from some combination of
78 dataset type, dataId, and run collection name; if the same ID is
79 already in the database then new record is not inserted.
81 Returns
82 -------
83 datasets : `~collections.abc.Iterable` [ `DatasetRef` ]
84 References to the inserted datasets.
85 """
86 raise NotImplementedError()
88 @abstractmethod
89 def import_(
90 self,
91 run: RunRecord,
92 datasets: Iterable[DatasetRef],
93 ) -> Iterator[DatasetRef]:
94 """Insert one or more dataset entries into the database.
96 Parameters
97 ----------
98 run : `RunRecord`
99 The record object describing the `~CollectionType.RUN` collection
100 this dataset will be associated with.
101 datasets : `~collections.abc.Iterable` of `DatasetRef`
102 Datasets to be inserted. Datasets can specify ``id`` attribute
103 which will be used for inserted datasets. All dataset IDs must
104 have the same type (`int` or `uuid.UUID`), if type of dataset IDs
105 does not match type supported by this class then IDs will be
106 ignored and new IDs will be generated by backend.
108 Returns
109 -------
110 datasets : `~collections.abc.Iterable` [ `DatasetRef` ]
111 References to the inserted or existing datasets.
113 Notes
114 -----
115 The ``datasetType`` and ``run`` attributes of datasets are supposed to
116 be identical across all datasets but this is not checked and it should
117 be enforced by higher level registry code. This method does not need
118 to use those attributes from datasets, only ``dataId`` and ``id`` are
119 relevant.
120 """
121 raise NotImplementedError()
123 @abstractmethod
124 def delete(self, datasets: Iterable[DatasetRef]) -> None:
125 """Fully delete the given datasets from the registry.
127 Parameters
128 ----------
129 datasets : `~collections.abc.Iterable` [ `DatasetRef` ]
130 Datasets to be deleted. All datasets must be resolved and have
131 the same `DatasetType` as ``self``.
133 Raises
134 ------
135 AmbiguousDatasetError
136 Raised if any of the given `DatasetRef` instances is unresolved.
137 """
138 raise NotImplementedError()
140 @abstractmethod
141 def associate(self, collection: CollectionRecord, datasets: Iterable[DatasetRef]) -> None:
142 """Associate one or more datasets with a collection.
144 Parameters
145 ----------
146 collection : `CollectionRecord`
147 The record object describing the collection. ``collection.type``
148 must be `~CollectionType.TAGGED`.
149 datasets : `~collections.abc.Iterable` [ `DatasetRef` ]
150 Datasets to be associated. All datasets must be resolved and have
151 the same `DatasetType` as ``self``.
153 Raises
154 ------
155 AmbiguousDatasetError
156 Raised if any of the given `DatasetRef` instances is unresolved.
158 Notes
159 -----
160 Associating a dataset with into collection that already contains a
161 different dataset with the same `DatasetType` and data ID will remove
162 the existing dataset from that collection.
164 Associating the same dataset into a collection multiple times is a
165 no-op, but is still not permitted on read-only databases.
166 """
167 raise NotImplementedError()
169 @abstractmethod
170 def disassociate(self, collection: CollectionRecord, datasets: Iterable[DatasetRef]) -> None:
171 """Remove one or more datasets from a collection.
173 Parameters
174 ----------
175 collection : `CollectionRecord`
176 The record object describing the collection. ``collection.type``
177 must be `~CollectionType.TAGGED`.
178 datasets : `~collections.abc.Iterable` [ `DatasetRef` ]
179 Datasets to be disassociated. All datasets must be resolved and
180 have the same `DatasetType` as ``self``.
182 Raises
183 ------
184 AmbiguousDatasetError
185 Raised if any of the given `DatasetRef` instances is unresolved.
186 """
187 raise NotImplementedError()
189 @abstractmethod
190 def certify(
191 self,
192 collection: CollectionRecord,
193 datasets: Iterable[DatasetRef],
194 timespan: Timespan,
195 context: SqlQueryContext,
196 ) -> None:
197 """Associate one or more datasets with a calibration collection and a
198 validity range within it.
200 Parameters
201 ----------
202 collection : `CollectionRecord`
203 The record object describing the collection. ``collection.type``
204 must be `~CollectionType.CALIBRATION`.
205 datasets : `~collections.abc.Iterable` [ `DatasetRef` ]
206 Datasets to be associated. All datasets must be resolved and have
207 the same `DatasetType` as ``self``.
208 timespan : `Timespan`
209 The validity range for these datasets within the collection.
211 Raises
212 ------
213 AmbiguousDatasetError
214 Raised if any of the given `DatasetRef` instances is unresolved.
215 ConflictingDefinitionError
216 Raised if the collection already contains a different dataset with
217 the same `DatasetType` and data ID and an overlapping validity
218 range.
219 CollectionTypeError
220 Raised if
221 ``collection.type is not CollectionType.CALIBRATION`` or if
222 ``self.datasetType.isCalibration() is False``.
223 """
224 raise NotImplementedError()
226 @abstractmethod
227 def decertify(
228 self,
229 collection: CollectionRecord,
230 timespan: Timespan,
231 *,
232 dataIds: Iterable[DataCoordinate] | None = None,
233 context: SqlQueryContext,
234 ) -> None:
235 """Remove or adjust datasets to clear a validity range within a
236 calibration collection.
238 Parameters
239 ----------
240 collection : `CollectionRecord`
241 The record object describing the collection. ``collection.type``
242 must be `~CollectionType.CALIBRATION`.
243 timespan : `Timespan`
244 The validity range to remove datasets from within the collection.
245 Datasets that overlap this range but are not contained by it will
246 have their validity ranges adjusted to not overlap it, which may
247 split a single dataset validity range into two.
248 dataIds : `~collections.abc.Iterable` [ `DataCoordinate` ], optional
249 Data IDs that should be decertified within the given validity range
250 If `None`, all data IDs for ``self.datasetType`` will be
251 decertified.
253 Raises
254 ------
255 CollectionTypeError
256 Raised if ``collection.type is not CollectionType.CALIBRATION``.
257 """
258 raise NotImplementedError()
260 @abstractmethod
261 def make_relation(
262 self,
263 *collections: CollectionRecord,
264 columns: Set[str],
265 context: SqlQueryContext,
266 ) -> Relation:
267 """Return a `sql.Relation` that represents a query for for this
268 `DatasetType` in one or more collections.
270 Parameters
271 ----------
272 *collections : `CollectionRecord`
273 The record object(s) describing the collection(s) to query. May
274 not be of type `CollectionType.CHAINED`. If multiple collections
275 are passed, the query will search all of them in an unspecified
276 order, and all collections must have the same type. Must include
277 at least one collection.
278 columns : `~collections.abc.Set` [ `str` ]
279 Columns to include in the relation. See `Query.find_datasets` for
280 most options, but this method supports one more:
282 - ``rank``: a calculated integer column holding the index of the
283 collection the dataset was found in, within the ``collections``
284 sequence given.
285 context : `SqlQueryContext`
286 The object that manages database connections, temporary tables and
287 relation engines for this query.
289 Returns
290 -------
291 relation : `~lsst.daf.relation.Relation`
292 Representation of the query.
293 """
294 raise NotImplementedError()
296 datasetType: DatasetType
297 """Dataset type whose records this object manages (`DatasetType`).
298 """
301class DatasetRecordStorageManager(VersionedExtension):
302 """An interface that manages the tables that describe datasets.
304 `DatasetRecordStorageManager` primarily serves as a container and factory
305 for `DatasetRecordStorage` instances, which each provide access to the
306 records for a different `DatasetType`.
307 """
309 def __init__(self, *, registry_schema_version: VersionTuple | None = None) -> None:
310 super().__init__(registry_schema_version=registry_schema_version)
312 @classmethod
313 @abstractmethod
314 def initialize(
315 cls,
316 db: Database,
317 context: StaticTablesContext,
318 *,
319 collections: CollectionManager,
320 dimensions: DimensionRecordStorageManager,
321 registry_schema_version: VersionTuple | None = None,
322 ) -> DatasetRecordStorageManager:
323 """Construct an instance of the manager.
325 Parameters
326 ----------
327 db : `Database`
328 Interface to the underlying database engine and namespace.
329 context : `StaticTablesContext`
330 Context object obtained from `Database.declareStaticTables`; used
331 to declare any tables that should always be present.
332 collections: `CollectionManager`
333 Manager object for the collections in this `Registry`.
334 dimensions : `DimensionRecordStorageManager`
335 Manager object for the dimensions in this `Registry`.
336 registry_schema_version : `VersionTuple` or `None`
337 Schema version of this extension as defined in registry.
339 Returns
340 -------
341 manager : `DatasetRecordStorageManager`
342 An instance of a concrete `DatasetRecordStorageManager` subclass.
343 """
344 raise NotImplementedError()
346 @classmethod
347 @abstractmethod
348 def getIdColumnType(cls) -> type:
349 """Return type used for columns storing dataset IDs.
351 This type is used for columns storing `DatasetRef.id` values, usually
352 a `type` subclass provided by SQLAlchemy.
354 Returns
355 -------
356 dtype : `type`
357 Type used for dataset identification in database.
358 """
359 raise NotImplementedError()
361 @classmethod
362 @abstractmethod
363 def supportsIdGenerationMode(cls, mode: DatasetIdGenEnum) -> bool:
364 """Test whether the given dataset ID generation mode is supported by
365 `insert`.
367 Parameters
368 ----------
369 mode : `DatasetIdGenEnum`
370 Enum value for the mode to test.
372 Returns
373 -------
374 supported : `bool`
375 Whether the given mode is supported.
376 """
377 raise NotImplementedError()
379 @classmethod
380 @abstractmethod
381 def addDatasetForeignKey(
382 cls,
383 tableSpec: ddl.TableSpec,
384 *,
385 name: str = "dataset",
386 constraint: bool = True,
387 onDelete: str | None = None,
388 **kwargs: Any,
389 ) -> ddl.FieldSpec:
390 """Add a foreign key (field and constraint) referencing the dataset
391 table.
393 Parameters
394 ----------
395 tableSpec : `ddl.TableSpec`
396 Specification for the table that should reference the dataset
397 table. Will be modified in place.
398 name: `str`, optional
399 A name to use for the prefix of the new field; the full name is
400 ``{name}_id``.
401 onDelete: `str`, optional
402 One of "CASCADE" or "SET NULL", indicating what should happen to
403 the referencing row if the collection row is deleted. `None`
404 indicates that this should be an integrity error.
405 constraint: `bool`, optional
406 If `False` (`True` is default), add a field that can be joined to
407 the dataset primary key, but do not add a foreign key constraint.
408 **kwargs
409 Additional keyword arguments are forwarded to the `ddl.FieldSpec`
410 constructor (only the ``name`` and ``dtype`` arguments are
411 otherwise provided).
413 Returns
414 -------
415 idSpec : `ddl.FieldSpec`
416 Specification for the ID field.
417 """
418 raise NotImplementedError()
420 @abstractmethod
421 def refresh(self) -> None:
422 """Ensure all other operations on this manager are aware of any
423 dataset types that may have been registered by other clients since
424 it was initialized or last refreshed.
425 """
426 raise NotImplementedError()
428 def __getitem__(self, name: str) -> DatasetRecordStorage:
429 """Return the object that provides access to the records associated
430 with the given `DatasetType` name.
432 This is simply a convenience wrapper for `find` that raises `KeyError`
433 when the dataset type is not found.
435 Returns
436 -------
437 records : `DatasetRecordStorage`
438 The object representing the records for the given dataset type.
440 Raises
441 ------
442 KeyError
443 Raised if there is no dataset type with the given name.
445 Notes
446 -----
447 Dataset types registered by another client of the same repository since
448 the last call to `initialize` or `refresh` may not be found.
449 """
450 result = self.find(name)
451 if result is None:
452 raise MissingDatasetTypeError(f"Dataset type with name '{name}' not found.")
453 return result
455 @abstractmethod
456 def find(self, name: str) -> DatasetRecordStorage | None:
457 """Return an object that provides access to the records associated with
458 the given `DatasetType` name, if one exists.
460 Parameters
461 ----------
462 name : `str`
463 Name of the dataset type.
465 Returns
466 -------
467 records : `DatasetRecordStorage` or `None`
468 The object representing the records for the given dataset type, or
469 `None` if there are no records for that dataset type.
471 Notes
472 -----
473 Dataset types registered by another client of the same repository since
474 the last call to `initialize` or `refresh` may not be found.
475 """
476 raise NotImplementedError()
478 @abstractmethod
479 def register(self, datasetType: DatasetType) -> tuple[DatasetRecordStorage, bool]:
480 """Ensure that this `Registry` can hold records for the given
481 `DatasetType`, creating new tables as necessary.
483 Parameters
484 ----------
485 datasetType : `DatasetType`
486 Dataset type for which a table should created (as necessary) and
487 an associated `DatasetRecordStorage` returned.
489 Returns
490 -------
491 records : `DatasetRecordStorage`
492 The object representing the records for the given dataset type.
493 inserted : `bool`
494 `True` if the dataset type did not exist in the registry before.
496 Notes
497 -----
498 This operation may not be invoked within a `Database.transaction`
499 context.
500 """
501 raise NotImplementedError()
503 @abstractmethod
504 def remove(self, name: str) -> None:
505 """Remove the dataset type.
507 Parameters
508 ----------
509 name : `str`
510 Name of the dataset type.
511 """
512 raise NotImplementedError()
514 @abstractmethod
515 def resolve_wildcard(
516 self,
517 expression: Any,
518 components: bool | None = None,
519 missing: list[str] | None = None,
520 explicit_only: bool = False,
521 components_deprecated: bool = True,
522 ) -> dict[DatasetType, list[str | None]]:
523 """Resolve a dataset type wildcard expression.
525 Parameters
526 ----------
527 expression
528 Expression to resolve. Will be passed to
529 `DatasetTypeWildcard.from_expression`.
530 components : `bool`, optional
531 If `True`, apply all expression patterns to component dataset type
532 names as well. If `False`, never apply patterns to components. If
533 `None` (default), apply patterns to components only if their parent
534 datasets were not matched by the expression. Fully-specified
535 component datasets (`str` or `DatasetType` instances) are always
536 included.
537 missing : `list` of `str`, optional
538 String dataset type names that were explicitly given (i.e. not
539 regular expression patterns) but not found will be appended to this
540 list, if it is provided.
541 explicit_only : `bool`, optional
542 If `True`, require explicit `DatasetType` instances or `str` names,
543 with `re.Pattern` instances deprecated and ``...`` prohibited.
544 components_deprecated : `bool`, optional
545 If `True`, this is a context in which component dataset support is
546 deprecated. This will result in a deprecation warning when
547 ``components=True`` or ``components=None`` and a component dataset
548 is matched. In the future this will become an error.
550 Returns
551 -------
552 dataset_types : `dict` [ `DatasetType`, `list` [ `None`, `str` ] ]
553 A mapping with resolved dataset types as keys and lists of
554 matched component names as values, where `None` indicates the
555 parent composite dataset type was matched.
556 """
557 raise NotImplementedError()
559 @abstractmethod
560 def getDatasetRef(self, id: DatasetId) -> DatasetRef | None:
561 """Return a `DatasetRef` for the given dataset primary key
562 value.
564 Parameters
565 ----------
566 id : `DatasetId`
567 Primary key value for the dataset.
569 Returns
570 -------
571 ref : `DatasetRef` or `None`
572 Object representing the dataset, or `None` if no dataset with the
573 given primary key values exists in this layer.
574 """
575 raise NotImplementedError()
577 @abstractmethod
578 def getCollectionSummary(self, collection: CollectionRecord) -> CollectionSummary:
579 """Return a summary for the given collection.
581 Parameters
582 ----------
583 collection : `CollectionRecord`
584 Record describing the collection for which a summary is to be
585 retrieved.
587 Returns
588 -------
589 summary : `CollectionSummary`
590 Summary of the dataset types and governor dimension values in
591 this collection.
592 """
593 raise NotImplementedError()
595 @abstractmethod
596 def ingest_date_dtype(self) -> type:
597 """Return type of the ``ingest_date`` column."""
598 raise NotImplementedError()