Coverage for python/lsst/daf/butler/registry/interfaces/_datasets.py : 56%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ("DatasetRecordStorageManager", "DatasetRecordStorage")
26from abc import ABC, abstractmethod
27from typing import (
28 Any,
29 Iterable,
30 Iterator,
31 Optional,
32 Tuple,
33 TYPE_CHECKING,
34)
36from ...core import (
37 DataCoordinate,
38 DatasetRef,
39 DatasetType,
40 ddl,
41 SimpleQuery,
42 Timespan,
43)
44from ._versioning import VersionedExtension
46if TYPE_CHECKING: 46 ↛ 47line 46 didn't jump to line 47, because the condition on line 46 was never true
47 from ..summaries import CollectionSummary
48 from ._database import Database, StaticTablesContext
49 from ._dimensions import DimensionRecordStorageManager
50 from ._collections import CollectionManager, CollectionRecord, RunRecord
53class DatasetRecordStorage(ABC):
54 """An interface that manages the records associated with a particular
55 `DatasetType`.
57 Parameters
58 ----------
59 datasetType : `DatasetType`
60 Dataset type whose records this object manages.
61 """
62 def __init__(self, datasetType: DatasetType):
63 self.datasetType = datasetType
65 @abstractmethod
66 def insert(self, run: RunRecord, dataIds: Iterable[DataCoordinate]) -> Iterator[DatasetRef]:
67 """Insert one or more dataset entries into the database.
69 Parameters
70 ----------
71 run : `RunRecord`
72 The record object describing the `~CollectionType.RUN` collection
73 this dataset will be associated with.
74 dataIds : `Iterable` [ `DataCoordinate` ]
75 Expanded data IDs (`DataCoordinate` instances) for the
76 datasets to be added. The dimensions of all data IDs must be the
77 same as ``self.datasetType.dimensions``.
79 Returns
80 -------
81 datasets : `Iterable` [ `DatasetRef` ]
82 References to the inserted datasets.
83 """
84 raise NotImplementedError()
86 @abstractmethod
87 def find(self, collection: CollectionRecord, dataId: DataCoordinate,
88 timespan: Optional[Timespan] = None) -> Optional[DatasetRef]:
89 """Search a collection for a dataset with the given data ID.
91 Parameters
92 ----------
93 collection : `CollectionRecord`
94 The record object describing the collection to search for the
95 dataset. May have any `CollectionType`.
96 dataId: `DataCoordinate`
97 Complete (but not necessarily expanded) data ID to search with,
98 with ``dataId.graph == self.datasetType.dimensions``.
99 timespan : `Timespan`, optional
100 A timespan that the validity range of the dataset must overlap.
101 Required if ``collection.type is CollectionType.CALIBRATION``, and
102 ignored otherwise.
104 Returns
105 -------
106 ref : `DatasetRef`
107 A resolved `DatasetRef` (without components populated), or `None`
108 if no matching dataset was found.
109 """
110 raise NotImplementedError()
112 @abstractmethod
113 def delete(self, datasets: Iterable[DatasetRef]) -> None:
114 """Fully delete the given datasets from the registry.
116 Parameters
117 ----------
118 datasets : `Iterable` [ `DatasetRef` ]
119 Datasets to be deleted. All datasets must be resolved and have
120 the same `DatasetType` as ``self``.
122 Raises
123 ------
124 AmbiguousDatasetError
125 Raised if any of the given `DatasetRef` instances is unresolved.
126 """
127 raise NotImplementedError()
129 @abstractmethod
130 def associate(self, collection: CollectionRecord, datasets: Iterable[DatasetRef]) -> None:
131 """Associate one or more datasets with a collection.
133 Parameters
134 ----------
135 collection : `CollectionRecord`
136 The record object describing the collection. ``collection.type``
137 must be `~CollectionType.TAGGED`.
138 datasets : `Iterable` [ `DatasetRef` ]
139 Datasets to be associated. All datasets must be resolved and have
140 the same `DatasetType` as ``self``.
142 Raises
143 ------
144 AmbiguousDatasetError
145 Raised if any of the given `DatasetRef` instances is unresolved.
147 Notes
148 -----
149 Associating a dataset with into collection that already contains a
150 different dataset with the same `DatasetType` and data ID will remove
151 the existing dataset from that collection.
153 Associating the same dataset into a collection multiple times is a
154 no-op, but is still not permitted on read-only databases.
155 """
156 raise NotImplementedError()
158 @abstractmethod
159 def disassociate(self, collection: CollectionRecord, datasets: Iterable[DatasetRef]) -> None:
160 """Remove one or more datasets from a collection.
162 Parameters
163 ----------
164 collection : `CollectionRecord`
165 The record object describing the collection. ``collection.type``
166 must be `~CollectionType.TAGGED`.
167 datasets : `Iterable` [ `DatasetRef` ]
168 Datasets to be disassociated. All datasets must be resolved and
169 have the same `DatasetType` as ``self``.
171 Raises
172 ------
173 AmbiguousDatasetError
174 Raised if any of the given `DatasetRef` instances is unresolved.
175 """
176 raise NotImplementedError()
178 @abstractmethod
179 def certify(self, collection: CollectionRecord, datasets: Iterable[DatasetRef],
180 timespan: Timespan) -> None:
181 """Associate one or more datasets with a calibration collection and a
182 validity range within it.
184 Parameters
185 ----------
186 collection : `CollectionRecord`
187 The record object describing the collection. ``collection.type``
188 must be `~CollectionType.CALIBRATION`.
189 datasets : `Iterable` [ `DatasetRef` ]
190 Datasets to be associated. All datasets must be resolved and have
191 the same `DatasetType` as ``self``.
192 timespan : `Timespan`
193 The validity range for these datasets within the collection.
195 Raises
196 ------
197 AmbiguousDatasetError
198 Raised if any of the given `DatasetRef` instances is unresolved.
199 ConflictingDefinitionError
200 Raised if the collection already contains a different dataset with
201 the same `DatasetType` and data ID and an overlapping validity
202 range.
203 TypeError
204 Raised if
205 ``collection.type is not CollectionType.CALIBRATION`` or if
206 ``self.datasetType.isCalibration() is False``.
207 """
208 raise NotImplementedError()
210 @abstractmethod
211 def decertify(self, collection: CollectionRecord, timespan: Timespan, *,
212 dataIds: Optional[Iterable[DataCoordinate]] = None) -> None:
213 """Remove or adjust datasets to clear a validity range within a
214 calibration collection.
216 Parameters
217 ----------
218 collection : `CollectionRecord`
219 The record object describing the collection. ``collection.type``
220 must be `~CollectionType.CALIBRATION`.
221 timespan : `Timespan`
222 The validity range to remove datasets from within the collection.
223 Datasets that overlap this range but are not contained by it will
224 have their validity ranges adjusted to not overlap it, which may
225 split a single dataset validity range into two.
226 dataIds : `Iterable` [ `DataCoordinate` ], optional
227 Data IDs that should be decertified within the given validity range
228 If `None`, all data IDs for ``self.datasetType`` will be
229 decertified.
231 Raises
232 ------
233 TypeError
234 Raised if ``collection.type is not CollectionType.CALIBRATION``.
235 """
236 raise NotImplementedError()
238 @abstractmethod
239 def select(self, collection: CollectionRecord,
240 dataId: SimpleQuery.Select.Or[DataCoordinate] = SimpleQuery.Select,
241 id: SimpleQuery.Select.Or[Optional[int]] = SimpleQuery.Select,
242 run: SimpleQuery.Select.Or[None] = SimpleQuery.Select,
243 timespan: SimpleQuery.Select.Or[Optional[Timespan]] = SimpleQuery.Select,
244 ingestDate: SimpleQuery.Select.Or[Optional[Timespan]] = None,
245 ) -> Optional[SimpleQuery]:
246 """Return a SQLAlchemy object that represents a ``SELECT`` query for
247 this `DatasetType`.
249 All arguments can either be a value that constrains the query or
250 the `SimpleQuery.Select` tag object to indicate that the value should
251 be returned in the columns in the ``SELECT`` clause. The default is
252 `SimpleQuery.Select`.
254 Parameters
255 ----------
256 collection : `CollectionRecord`
257 The record object describing the collection to query. May not be
258 of type `CollectionType.CHAINED`.
259 dataId : `DataCoordinate` or `Select`
260 The data ID to restrict results with, or an instruction to return
261 the data ID via columns with names
262 ``self.datasetType.dimensions.names``.
263 id : `int`, `Select` or None,
264 The integer primary key value for the dataset, an instruction to
265 return it via a ``id`` column, or `None` to ignore it
266 entirely.
267 run : `None` or `Select`
268 If `Select` (default), include the dataset's run key value (as
269 column labeled with the return value of
270 ``CollectionManager.getRunForiegnKeyName``).
271 If `None`, do not include this column (to constrain the run,
272 pass a `RunRecord` as the ``collection`` argument instead).
273 timespan : `None`, `Select`, or `Timespan`
274 If `Select` (default), include the validity range timespan in the
275 result columns. If a `Timespan` instance, constrain the results to
276 those whose validity ranges overlap that given timespan. Ignored
277 unless ``collection.type is CollectionType.CALIBRATION``.
278 ingestDate : `None`, `Select`, or `Timespan`
279 If `Select` include the ingest timestamp in the result columns.
280 If a `Timespan` instance, constrain the results to those whose
281 ingest times which are inside given timespan and also include
282 timestamp in the result columns. If `None` (default) then there is
283 no constraint and timestamp is not returned.
285 Returns
286 -------
287 query : `SimpleQuery` or `None`
288 A struct containing the SQLAlchemy object that representing a
289 simple ``SELECT`` query, or `None` if it is known that there are
290 no datasets of this `DatasetType` that match the given constraints.
291 """
292 raise NotImplementedError()
294 datasetType: DatasetType
295 """Dataset type whose records this object manages (`DatasetType`).
296 """
299class DatasetRecordStorageManager(VersionedExtension):
300 """An interface that manages the tables that describe datasets.
302 `DatasetRecordStorageManager` primarily serves as a container and factory
303 for `DatasetRecordStorage` instances, which each provide access to the
304 records for a different `DatasetType`.
305 """
307 @classmethod
308 @abstractmethod
309 def initialize(
310 cls,
311 db: Database,
312 context: StaticTablesContext, *,
313 collections: CollectionManager,
314 dimensions: DimensionRecordStorageManager,
315 ) -> DatasetRecordStorageManager:
316 """Construct an instance of the manager.
318 Parameters
319 ----------
320 db : `Database`
321 Interface to the underlying database engine and namespace.
322 context : `StaticTablesContext`
323 Context object obtained from `Database.declareStaticTables`; used
324 to declare any tables that should always be present.
325 collections: `CollectionManager`
326 Manager object for the collections in this `Registry`.
327 dimensions : `DimensionRecordStorageManager`
328 Manager object for the dimensions in this `Registry`.
330 Returns
331 -------
332 manager : `DatasetRecordStorageManager`
333 An instance of a concrete `DatasetRecordStorageManager` subclass.
334 """
335 raise NotImplementedError()
337 @classmethod
338 @abstractmethod
339 def addDatasetForeignKey(cls, tableSpec: ddl.TableSpec, *,
340 name: str = "dataset", constraint: bool = True, onDelete: Optional[str] = None,
341 **kwargs: Any) -> ddl.FieldSpec:
342 """Add a foreign key (field and constraint) referencing the dataset
343 table.
345 Parameters
346 ----------
347 tableSpec : `ddl.TableSpec`
348 Specification for the table that should reference the dataset
349 table. Will be modified in place.
350 name: `str`, optional
351 A name to use for the prefix of the new field; the full name is
352 ``{name}_id``.
353 onDelete: `str`, optional
354 One of "CASCADE" or "SET NULL", indicating what should happen to
355 the referencing row if the collection row is deleted. `None`
356 indicates that this should be an integrity error.
357 constraint: `bool`, optional
358 If `False` (`True` is default), add a field that can be joined to
359 the dataset primary key, but do not add a foreign key constraint.
360 **kwargs
361 Additional keyword arguments are forwarded to the `ddl.FieldSpec`
362 constructor (only the ``name`` and ``dtype`` arguments are
363 otherwise provided).
365 Returns
366 -------
367 idSpec : `ddl.FieldSpec`
368 Specification for the ID field.
369 """
370 raise NotImplementedError()
372 @abstractmethod
373 def refresh(self) -> None:
374 """Ensure all other operations on this manager are aware of any
375 dataset types that may have been registered by other clients since
376 it was initialized or last refreshed.
377 """
378 raise NotImplementedError()
380 def __getitem__(self, name: str) -> DatasetRecordStorage:
381 """Return the object that provides access to the records associated
382 with the given `DatasetType` name.
384 This is simply a convenience wrapper for `find` that raises `KeyError`
385 when the dataset type is not found.
387 Returns
388 -------
389 records : `DatasetRecordStorage`
390 The object representing the records for the given dataset type.
392 Raises
393 ------
394 KeyError
395 Raised if there is no dataset type with the given name.
397 Notes
398 -----
399 Dataset types registered by another client of the same repository since
400 the last call to `initialize` or `refresh` may not be found.
401 """
402 result = self.find(name)
403 if result is None:
404 raise KeyError(f"Dataset type with name '{name}' not found.")
405 return result
407 @abstractmethod
408 def find(self, name: str) -> Optional[DatasetRecordStorage]:
409 """Return an object that provides access to the records associated with
410 the given `DatasetType` name, if one exists.
412 Parameters
413 ----------
414 name : `str`
415 Name of the dataset type.
417 Returns
418 -------
419 records : `DatasetRecordStorage` or `None`
420 The object representing the records for the given dataset type, or
421 `None` if there are no records for that dataset type.
423 Notes
424 -----
425 Dataset types registered by another client of the same repository since
426 the last call to `initialize` or `refresh` may not be found.
427 """
428 raise NotImplementedError()
430 @abstractmethod
431 def register(self, datasetType: DatasetType) -> Tuple[DatasetRecordStorage, bool]:
432 """Ensure that this `Registry` can hold records for the given
433 `DatasetType`, creating new tables as necessary.
435 Parameters
436 ----------
437 datasetType : `DatasetType`
438 Dataset type for which a table should created (as necessary) and
439 an associated `DatasetRecordStorage` returned.
441 Returns
442 -------
443 records : `DatasetRecordStorage`
444 The object representing the records for the given dataset type.
445 inserted : `bool`
446 `True` if the dataset type did not exist in the registry before.
448 Notes
449 -----
450 This operation may not be invoked within a `Database.transaction`
451 context.
452 """
453 raise NotImplementedError()
455 @abstractmethod
456 def remove(self, name: str) -> None:
457 """Remove the dataset type.
459 Parameters
460 ----------
461 name : `str`
462 Name of the dataset type.
463 """
464 raise NotImplementedError()
466 @abstractmethod
467 def __iter__(self) -> Iterator[DatasetType]:
468 """Return an iterator over the the dataset types present in this layer.
470 Notes
471 -----
472 Dataset types registered by another client of the same layer since
473 the last call to `initialize` or `refresh` may not be included.
474 """
475 raise NotImplementedError()
477 @abstractmethod
478 def getDatasetRef(self, id: int) -> Optional[DatasetRef]:
479 """Return a `DatasetRef` for the given dataset primary key
480 value.
482 Parameters
483 ----------
484 id : `int`
485 Autoincrement primary key value for the dataset.
487 Returns
488 -------
489 ref : `DatasetRef` or `None`
490 Object representing the dataset, or `None` if no dataset with the
491 given primary key values exists in this layer.
492 """
493 raise NotImplementedError()
495 @abstractmethod
496 def getCollectionSummary(self, collection: CollectionRecord) -> CollectionSummary:
497 """Return a summary for the given collection.
499 Parameters
500 ----------
501 collection : `CollectionRecord`
502 Record describing the collection for which a summary is to be
503 retrieved.
505 Returns
506 -------
507 summary : `CollectionSummary`
508 Summary of the dataset types and governor dimension values in
509 this collection.
510 """
511 raise NotImplementedError()