Coverage for python/lsst/daf/butler/registry/interfaces/_datasets.py : 57%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ("DatasetRecordStorageManager", "DatasetRecordStorage")
26from abc import ABC, abstractmethod
27from typing import (
28 Any,
29 Iterable,
30 Iterator,
31 Optional,
32 Tuple,
33 TYPE_CHECKING,
34)
36from ...core import (
37 DataCoordinate,
38 DatasetRef,
39 DatasetType,
40 ddl,
41 SimpleQuery,
42 Timespan,
43)
44from ._versioning import VersionedExtension
46if TYPE_CHECKING: 46 ↛ 47line 46 didn't jump to line 47, because the condition on line 46 was never true
47 from ._database import Database, StaticTablesContext
48 from ._dimensions import DimensionRecordStorageManager
49 from ._collections import CollectionManager, CollectionRecord, RunRecord
52class DatasetRecordStorage(ABC):
53 """An interface that manages the records associated with a particular
54 `DatasetType`.
56 Parameters
57 ----------
58 datasetType : `DatasetType`
59 Dataset type whose records this object manages.
60 """
61 def __init__(self, datasetType: DatasetType):
62 self.datasetType = datasetType
64 @abstractmethod
65 def insert(self, run: RunRecord, dataIds: Iterable[DataCoordinate]) -> Iterator[DatasetRef]:
66 """Insert one or more dataset entries into the database.
68 Parameters
69 ----------
70 run : `RunRecord`
71 The record object describing the `~CollectionType.RUN` collection
72 this dataset will be associated with.
73 dataIds : `Iterable` [ `DataCoordinate` ]
74 Expanded data IDs (`DataCoordinate` instances) for the
75 datasets to be added. The dimensions of all data IDs must be the
76 same as ``self.datasetType.dimensions``.
78 Returns
79 -------
80 datasets : `Iterable` [ `DatasetRef` ]
81 References to the inserted datasets.
82 """
83 raise NotImplementedError()
85 @abstractmethod
86 def find(self, collection: CollectionRecord, dataId: DataCoordinate,
87 timespan: Optional[Timespan] = None) -> Optional[DatasetRef]:
88 """Search a collection for a dataset with the given data ID.
90 Parameters
91 ----------
92 collection : `CollectionRecord`
93 The record object describing the collection to search for the
94 dataset. May have any `CollectionType`.
95 dataId: `DataCoordinate`
96 Complete (but not necessarily expanded) data ID to search with,
97 with ``dataId.graph == self.datasetType.dimensions``.
98 timespan : `Timespan`, optional
99 A timespan that the validity range of the dataset must overlap.
100 Required if ``collection.type is CollectionType.CALIBRATION``, and
101 ignored otherwise.
103 Returns
104 -------
105 ref : `DatasetRef`
106 A resolved `DatasetRef` (without components populated), or `None`
107 if no matching dataset was found.
108 """
109 raise NotImplementedError()
111 @abstractmethod
112 def delete(self, datasets: Iterable[DatasetRef]) -> None:
113 """Fully delete the given datasets from the registry.
115 Parameters
116 ----------
117 datasets : `Iterable` [ `DatasetRef` ]
118 Datasets to be deleted. All datasets must be resolved and have
119 the same `DatasetType` as ``self``.
121 Raises
122 ------
123 AmbiguousDatasetError
124 Raised if any of the given `DatasetRef` instances is unresolved.
125 """
126 raise NotImplementedError()
128 @abstractmethod
129 def associate(self, collection: CollectionRecord, datasets: Iterable[DatasetRef]) -> None:
130 """Associate one or more datasets with a collection.
132 Parameters
133 ----------
134 collection : `CollectionRecord`
135 The record object describing the collection. ``collection.type``
136 must be `~CollectionType.TAGGED`.
137 datasets : `Iterable` [ `DatasetRef` ]
138 Datasets to be associated. All datasets must be resolved and have
139 the same `DatasetType` as ``self``.
141 Raises
142 ------
143 AmbiguousDatasetError
144 Raised if any of the given `DatasetRef` instances is unresolved.
146 Notes
147 -----
148 Associating a dataset with into collection that already contains a
149 different dataset with the same `DatasetType` and data ID will remove
150 the existing dataset from that collection.
152 Associating the same dataset into a collection multiple times is a
153 no-op, but is still not permitted on read-only databases.
154 """
155 raise NotImplementedError()
157 @abstractmethod
158 def disassociate(self, collection: CollectionRecord, datasets: Iterable[DatasetRef]) -> None:
159 """Remove one or more datasets from a collection.
161 Parameters
162 ----------
163 collection : `CollectionRecord`
164 The record object describing the collection. ``collection.type``
165 must be `~CollectionType.TAGGED`.
166 datasets : `Iterable` [ `DatasetRef` ]
167 Datasets to be disassociated. All datasets must be resolved and
168 have the same `DatasetType` as ``self``.
170 Raises
171 ------
172 AmbiguousDatasetError
173 Raised if any of the given `DatasetRef` instances is unresolved.
174 """
175 raise NotImplementedError()
177 @abstractmethod
178 def certify(self, collection: CollectionRecord, datasets: Iterable[DatasetRef],
179 timespan: Timespan) -> None:
180 """Associate one or more datasets with a calibration collection and a
181 validity range within it.
183 Parameters
184 ----------
185 collection : `CollectionRecord`
186 The record object describing the collection. ``collection.type``
187 must be `~CollectionType.CALIBRATION`.
188 datasets : `Iterable` [ `DatasetRef` ]
189 Datasets to be associated. All datasets must be resolved and have
190 the same `DatasetType` as ``self``.
191 timespan : `Timespan`
192 The validity range for these datasets within the collection.
194 Raises
195 ------
196 AmbiguousDatasetError
197 Raised if any of the given `DatasetRef` instances is unresolved.
198 ConflictingDefinitionError
199 Raised if the collection already contains a different dataset with
200 the same `DatasetType` and data ID and an overlapping validity
201 range.
202 TypeError
203 Raised if
204 ``collection.type is not CollectionType.CALIBRATION`` or if
205 ``self.datasetType.isCalibration() is False``.
206 """
207 raise NotImplementedError()
209 @abstractmethod
210 def decertify(self, collection: CollectionRecord, timespan: Timespan, *,
211 dataIds: Optional[Iterable[DataCoordinate]] = None) -> None:
212 """Remove or adjust datasets to clear a validity range within a
213 calibration collection.
215 Parameters
216 ----------
217 collection : `CollectionRecord`
218 The record object describing the collection. ``collection.type``
219 must be `~CollectionType.CALIBRATION`.
220 timespan : `Timespan`
221 The validity range to remove datasets from within the collection.
222 Datasets that overlap this range but are not contained by it will
223 have their validity ranges adjusted to not overlap it, which may
224 split a single dataset validity range into two.
225 dataIds : `Iterable` [ `DataCoordinate` ], optional
226 Data IDs that should be decertified within the given validity range
227 If `None`, all data IDs for ``self.datasetType`` will be
228 decertified.
230 Raises
231 ------
232 TypeError
233 Raised if ``collection.type is not CollectionType.CALIBRATION``.
234 """
235 raise NotImplementedError()
237 @abstractmethod
238 def select(self, collection: CollectionRecord,
239 dataId: SimpleQuery.Select.Or[DataCoordinate] = SimpleQuery.Select,
240 id: SimpleQuery.Select.Or[Optional[int]] = SimpleQuery.Select,
241 run: SimpleQuery.Select.Or[None] = SimpleQuery.Select,
242 timespan: SimpleQuery.Select.Or[Optional[Timespan]] = SimpleQuery.Select,
243 ) -> Optional[SimpleQuery]:
244 """Return a SQLAlchemy object that represents a ``SELECT`` query for
245 this `DatasetType`.
247 All arguments can either be a value that constrains the query or
248 the `SimpleQuery.Select` tag object to indicate that the value should
249 be returned in the columns in the ``SELECT`` clause. The default is
250 `SimpleQuery.Select`.
252 Parameters
253 ----------
254 collection : `CollectionRecord`
255 The record object describing the collection to query. May not be
256 of type `CollectionType.CHAINED`.
257 dataId : `DataCoordinate` or `Select`
258 The data ID to restrict results with, or an instruction to return
259 the data ID via columns with names
260 ``self.datasetType.dimensions.names``.
261 id : `int`, `Select` or None,
262 The integer primary key value for the dataset, an instruction to
263 return it via a ``id`` column, or `None` to ignore it
264 entirely.
265 run : `None` or `Select`
266 If `Select` (default), include the dataset's run key value (as
267 column labeled with the return value of
268 ``CollectionManager.getRunForiegnKeyName``).
269 If `None`, do not include this column (to constrain the run,
270 pass a `RunRecord` as the ``collection`` argument instead).
271 timespan : `None`, `Select`, or `Timespan`
272 If `Select` (default), include the validity range timespan in the
273 result columns. If a `Timespan` instance, constrain the results to
274 those whose validity ranges overlap that given timespan. Ignored
275 unless ``collection.type is CollectionType.CALIBRATION``.
277 Returns
278 -------
279 query : `SimpleQuery` or `None`
280 A struct containing the SQLAlchemy object that representing a
281 simple ``SELECT`` query, or `None` if it is known that there are
282 no datasets of this `DatasetType` that match the given constraints.
283 """
284 raise NotImplementedError()
286 datasetType: DatasetType
287 """Dataset type whose records this object manages (`DatasetType`).
288 """
291class DatasetRecordStorageManager(VersionedExtension):
292 """An interface that manages the tables that describe datasets.
294 `DatasetRecordStorageManager` primarily serves as a container and factory
295 for `DatasetRecordStorage` instances, which each provide access to the
296 records for a different `DatasetType`.
297 """
299 @classmethod
300 @abstractmethod
301 def initialize(
302 cls,
303 db: Database,
304 context: StaticTablesContext, *,
305 collections: CollectionManager,
306 dimensions: DimensionRecordStorageManager,
307 ) -> DatasetRecordStorageManager:
308 """Construct an instance of the manager.
310 Parameters
311 ----------
312 db : `Database`
313 Interface to the underlying database engine and namespace.
314 context : `StaticTablesContext`
315 Context object obtained from `Database.declareStaticTables`; used
316 to declare any tables that should always be present.
317 collections: `CollectionManager`
318 Manager object for the collections in this `Registry`.
319 dimensions : `DimensionRecordStorageManager`
320 Manager object for the dimensions in this `Registry`.
322 Returns
323 -------
324 manager : `DatasetRecordStorageManager`
325 An instance of a concrete `DatasetRecordStorageManager` subclass.
326 """
327 raise NotImplementedError()
329 @classmethod
330 @abstractmethod
331 def addDatasetForeignKey(cls, tableSpec: ddl.TableSpec, *,
332 name: str = "dataset", constraint: bool = True, onDelete: Optional[str] = None,
333 **kwargs: Any) -> ddl.FieldSpec:
334 """Add a foreign key (field and constraint) referencing the dataset
335 table.
337 Parameters
338 ----------
339 tableSpec : `ddl.TableSpec`
340 Specification for the table that should reference the dataset
341 table. Will be modified in place.
342 name: `str`, optional
343 A name to use for the prefix of the new field; the full name is
344 ``{name}_id``.
345 onDelete: `str`, optional
346 One of "CASCADE" or "SET NULL", indicating what should happen to
347 the referencing row if the collection row is deleted. `None`
348 indicates that this should be an integrity error.
349 constraint: `bool`, optional
350 If `False` (`True` is default), add a field that can be joined to
351 the dataset primary key, but do not add a foreign key constraint.
352 **kwargs
353 Additional keyword arguments are forwarded to the `ddl.FieldSpec`
354 constructor (only the ``name`` and ``dtype`` arguments are
355 otherwise provided).
357 Returns
358 -------
359 idSpec : `ddl.FieldSpec`
360 Specification for the ID field.
361 """
362 raise NotImplementedError()
364 @abstractmethod
365 def refresh(self) -> None:
366 """Ensure all other operations on this manager are aware of any
367 dataset types that may have been registered by other clients since
368 it was initialized or last refreshed.
369 """
370 raise NotImplementedError()
372 def __getitem__(self, name: str) -> DatasetRecordStorage:
373 """Return the object that provides access to the records associated
374 with the given `DatasetType` name.
376 This is simply a convenience wrapper for `find` that raises `KeyError`
377 when the dataset type is not found.
379 Returns
380 -------
381 records : `DatasetRecordStorage`
382 The object representing the records for the given dataset type.
384 Raises
385 ------
386 KeyError
387 Raised if there is no dataset type with the given name.
389 Notes
390 -----
391 Dataset types registered by another client of the same repository since
392 the last call to `initialize` or `refresh` may not be found.
393 """
394 result = self.find(name)
395 if result is None:
396 raise KeyError(f"Dataset type with name '{name}' not found.")
397 return result
399 @abstractmethod
400 def find(self, name: str) -> Optional[DatasetRecordStorage]:
401 """Return an object that provides access to the records associated with
402 the given `DatasetType` name, if one exists.
404 Parameters
405 ----------
406 name : `str`
407 Name of the dataset type.
409 Returns
410 -------
411 records : `DatasetRecordStorage` or `None`
412 The object representing the records for the given dataset type, or
413 `None` if there are no records for that dataset type.
415 Notes
416 -----
417 Dataset types registered by another client of the same repository since
418 the last call to `initialize` or `refresh` may not be found.
419 """
420 raise NotImplementedError()
422 @abstractmethod
423 def register(self, datasetType: DatasetType) -> Tuple[DatasetRecordStorage, bool]:
424 """Ensure that this `Registry` can hold records for the given
425 `DatasetType`, creating new tables as necessary.
427 Parameters
428 ----------
429 datasetType : `DatasetType`
430 Dataset type for which a table should created (as necessary) and
431 an associated `DatasetRecordStorage` returned.
433 Returns
434 -------
435 records : `DatasetRecordStorage`
436 The object representing the records for the given dataset type.
437 inserted : `bool`
438 `True` if the dataset type did not exist in the registry before.
440 Notes
441 -----
442 This operation may not be invoked within a `Database.transaction`
443 context.
444 """
445 raise NotImplementedError()
447 @abstractmethod
448 def remove(self, name: str) -> None:
449 """Remove the dataset type.
451 Parameters
452 ----------
453 name : `str`
454 Name of the dataset type.
455 """
456 raise NotImplementedError()
458 @abstractmethod
459 def __iter__(self) -> Iterator[DatasetType]:
460 """Return an iterator over the the dataset types present in this layer.
462 Notes
463 -----
464 Dataset types registered by another client of the same layer since
465 the last call to `initialize` or `refresh` may not be included.
466 """
467 raise NotImplementedError()
469 @abstractmethod
470 def getDatasetRef(self, id: int) -> Optional[DatasetRef]:
471 """Return a `DatasetRef` for the given dataset primary key
472 value.
474 Parameters
475 ----------
476 id : `int`
477 Autoincrement primary key value for the dataset.
479 Returns
480 -------
481 ref : `DatasetRef` or `None`
482 Object representing the dataset, or `None` if no dataset with the
483 given primary key values exists in this layer.
484 """
485 raise NotImplementedError()