Coverage for python/lsst/daf/butler/registry/interfaces/_datasets.py : 62%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ("DatasetRecordStorageManager", "DatasetRecordStorage")
26from abc import ABC, abstractmethod
27from typing import (
28 Any,
29 Iterable,
30 Iterator,
31 Optional,
32 Tuple,
33 TYPE_CHECKING,
34)
36from ...core import (
37 DataCoordinate,
38 DatasetRef,
39 DatasetType,
40 ddl,
41 ExpandedDataCoordinate,
42 Quantum,
43)
44from ..simpleQuery import Select
46if TYPE_CHECKING: 46 ↛ 47line 46 didn't jump to line 47, because the condition on line 46 was never true
47 from ...core import DimensionUniverse
48 from ._database import Database, StaticTablesContext
49 from ._collections import CollectionManager, CollectionRecord, RunRecord
50 from ..simpleQuery import SimpleQuery
53class DatasetRecordStorage(ABC):
54 """An interface that manages the records associated with a particular
55 `DatasetType`.
57 Parameters
58 ----------
59 datasetType : `DatasetType`
60 Dataset type whose records this object manages.
61 """
62 def __init__(self, datasetType: DatasetType):
63 self.datasetType = datasetType
65 @abstractmethod
66 def insert(self, run: RunRecord, dataIds: Iterable[ExpandedDataCoordinate], *,
67 quantum: Optional[Quantum] = None) -> Iterator[DatasetRef]:
68 """Insert one or more dataset entries into the database.
70 Parameters
71 ----------
72 run : `RunRecord`
73 The record object describing the `~CollectionType.RUN` collection
74 this dataset will be associated with.
75 dataIds : `Iterable` [ `ExpandedDataCoordinate` ]
76 Expanded data IDs (`ExpandedDataCoordinate` instances) for the
77 datasets to be added. The dimensions of all data IDs must be the
78 same as ``self.datasetType.dimensions``.
79 quantum : `Quantum`, optional
80 The `Quantum` instance that should be recorded as responsible for
81 producing this dataset.
83 Returns
84 -------
85 datasets : `Iterable` [ `DatasetRef` ]
86 References to the inserted datasets.
87 """
88 raise NotImplementedError()
90 @abstractmethod
91 def find(self, collection: CollectionRecord, dataId: DataCoordinate) -> Optional[DatasetRef]:
92 """Search a collection for a dataset with the given data ID.
94 Parameters
95 ----------
96 collection : `CollectionRecord`
97 The record object describing the collection to search for the
98 dataset. May have any `CollectionType`.
99 dataId: `DataCoordinate`
100 Complete (but not necessarily expanded) data ID to search with,
101 with ``dataId.graph == self.datasetType.dimensions``.
103 Returns
104 -------
105 ref : `DatasetRef` or `None`
106 A resolved `DatasetRef` (without components populated), or `None`
107 if no matching dataset was found.
108 """
109 raise NotImplementedError()
111 @abstractmethod
112 def associate(self, collection: CollectionRecord, datasets: Iterable[DatasetRef]) -> None:
113 """Associate one or more datasets with a collection.
115 Parameters
116 ----------
117 collection : `CollectionRecord`
118 The record object describing the collection. ``collection.type``
119 must be `~CollectionType.TAGGED`.
120 datasets : `Iterable` [ `DatasetRef` ]
121 Datasets to be associated. All datasets must be resolved and have
122 the same `DatasetType` as ``self``.
124 Raises
125 ------
126 AmbiguousDatasetError
127 Raised if any of the given `DatasetRef` instances is unresolved.
129 Notes
130 -----
131 Associating a dataset with into collection that already contains a
132 different dataset with the same `DatasetType` and data ID will remove
133 the existing dataset from that collection.
135 Associating the same dataset into a collection multiple times is a
136 no-op, but is still not permitted on read-only databases.
137 """
138 raise NotImplementedError()
140 @abstractmethod
141 def disassociate(self, collection: CollectionRecord, datasets: Iterable[DatasetRef]) -> None:
142 """Remove one or more datasets from a collection.
144 Parameters
145 ----------
146 collection : `CollectionRecord`
147 The record object describing the collection. ``collection.type``
148 must be `~CollectionType.TAGGED`.
149 datasets : `Iterable` [ `DatasetRef` ]
150 Datasets to be disassociated. All datasets must be resolved and
151 have the same `DatasetType` as ``self``.
153 Raises
154 ------
155 AmbiguousDatasetError
156 Raised if any of the given `DatasetRef` instances is unresolved.
157 """
158 raise NotImplementedError()
160 @abstractmethod
161 def select(self, collection: CollectionRecord,
162 dataId: Select.Or[DataCoordinate] = Select,
163 id: Select.Or[Optional[int]] = Select,
164 run: Select.Or[None] = Select,
165 ) -> Optional[SimpleQuery]:
166 """Return a SQLAlchemy object that represents a ``SELECT`` query for
167 this `DatasetType`.
169 All arguments can either be a value that constrains the query or
170 the `Select` tag object to indicate that the value should be returned
171 in the columns in the ``SELECT`` clause. The default is `Select`.
173 Parameters
174 ----------
175 collection : `CollectionRecord`
176 The record object describing the collection to query. May not be
177 of type `CollectionType.CHAINED`.
178 dataId : `DataCoordinate` or `Select`
179 The data ID to restrict results with, or an instruction to return
180 the data ID via columns with names
181 ``self.datasetType.dimensions.names``.
182 id : `int`, `Select` or None,
183 The integer primary key value for the dataset, an instruction to
184 return it via a ``id`` column, or `None` to ignore it
185 entirely.
186 run : `None` or `Select`
187 If `Select` (default), include the dataset's run key value (as
188 column labeled with the return value of
189 ``CollectionManager.getRunForiegnKeyName``).
190 If `None`, do not include this column (to constrain the run,
191 pass a `RunRecord` as the ``collection`` argument instead.)
193 Returns
194 -------
195 query : `SimpleQuery` or `None`
196 A struct containing the SQLAlchemy object that representing a
197 simple ``SELECT`` query, or `None` if it is known that there are
198 no datasets of this `DatasetType` that match the given constraints.
199 """
200 raise NotImplementedError()
202 datasetType: DatasetType
203 """Dataset type whose records this object manages (`DatasetType`).
204 """
207class DatasetRecordStorageManager(ABC):
208 """An interface that manages the tables that describe datasets.
210 `DatasetRecordStorageManager` primarily serves as a container and factory
211 for `DatasetRecordStorage` instances, which each provide access to the
212 records for a different `DatasetType`.
213 """
215 @classmethod
216 @abstractmethod
217 def initialize(cls, db: Database, context: StaticTablesContext, *, collections: CollectionManager,
218 universe: DimensionUniverse) -> DatasetRecordStorageManager:
219 """Construct an instance of the manager.
221 Parameters
222 ----------
223 db : `Database`
224 Interface to the underlying database engine and namespace.
225 context : `StaticTablesContext`
226 Context object obtained from `Database.declareStaticTables`; used
227 to declare any tables that should always be present.
228 collections: `CollectionManager`
229 Manager object for the collections in this `Registry`.
230 universe : `DimensionUniverse`
231 Universe graph containing all dimensions known to this `Registry`.
233 Returns
234 -------
235 manager : `DatasetRecordStorageManager`
236 An instance of a concrete `DatasetRecordStorageManager` subclass.
237 """
238 raise NotImplementedError()
240 @classmethod
241 @abstractmethod
242 def addDatasetForeignKey(cls, tableSpec: ddl.TableSpec, *,
243 name: str = "dataset", constraint: bool = True, onDelete: Optional[str] = None,
244 **kwargs: Any) -> ddl.FieldSpec:
245 """Add a foreign key (field and constraint) referencing the dataset
246 table.
248 Parameters
249 ----------
250 tableSpec : `ddl.TableSpec`
251 Specification for the table that should reference the dataset
252 table. Will be modified in place.
253 name: `str`, optional
254 A name to use for the prefix of the new field; the full name is
255 ``{name}_id``.
256 onDelete: `str`, optional
257 One of "CASCADE" or "SET NULL", indicating what should happen to
258 the referencing row if the collection row is deleted. `None`
259 indicates that this should be an integrity error.
260 constraint: `bool`, optional
261 If `False` (`True` is default), add a field that can be joined to
262 the dataset primary key, but do not add a foreign key constraint.
263 **kwargs
264 Additional keyword arguments are forwarded to the `ddl.FieldSpec`
265 constructor (only the ``name`` and ``dtype`` arguments are
266 otherwise provided).
268 Returns
269 -------
270 idSpec : `ddl.FieldSpec`
271 Specification for the ID field.
272 """
273 raise NotImplementedError()
275 @abstractmethod
276 def refresh(self, *, universe: DimensionUniverse) -> None:
277 """Ensure all other operations on this manager are aware of any
278 dataset types that may have been registered by other clients since
279 it was initialized or last refreshed.
280 """
281 raise NotImplementedError()
283 @abstractmethod
284 def find(self, name: str) -> Optional[DatasetRecordStorage]:
285 """Return an object that provides access to the records associated with
286 the given `DatasetType`, if one exists.
288 Parameters
289 ----------
290 name : `str`
291 Name of the dataset type.
293 Returns
294 -------
295 records : `DatasetRecordStorage` or `None`
296 The object representing the records for the given dataset type, or
297 `None` if there are no records for that dataset type.
299 Notes
300 -----
301 Dataset types registered by another client of the same repository since
302 the last call to `initialize` or `refresh` may not be found.
303 """
304 raise NotImplementedError()
306 @abstractmethod
307 def register(self, datasetType: DatasetType) -> Tuple[DatasetRecordStorage, bool]:
308 """Ensure that this `Registry` can hold records for the given
309 `DatasetType`, creating new tables as necessary.
311 Parameters
312 ----------
313 datasetType : `DatasetType`
314 Dataset type for which a table should created (as necessary) and
315 an associated `DatasetRecordStorage` returned.
317 Returns
318 -------
319 records : `DatasetRecordStorage`
320 The object representing the records for the given dataset type.
321 inserted : `bool`
322 `True` if the dataset type did not exist in the registry before.
324 Notes
325 -----
326 This operation may not be invoked within a `Database.transaction`
327 context.
328 """
329 raise NotImplementedError()
331 @abstractmethod
332 def __iter__(self) -> Iterator[DatasetType]:
333 """Return an iterator over the the dataset types present in this layer.
335 Notes
336 -----
337 Dataset types registered by another client of the same layer since
338 the last call to `initialize` or `refresh` may not be included.
339 """
340 raise NotImplementedError()
342 @abstractmethod
343 def getDatasetRef(self, id: int) -> Optional[DatasetRef]:
344 """Return a `DatasetRef` for the given dataset primary key
345 value.
347 Parameters
348 ----------
349 id : `int`
350 Autoincrement primary key value for the dataset.
352 Returns
353 -------
354 ref : `DatasetRef` or `None`
355 Object representing the dataset, or `None` if no dataset with the
356 given primary key values exists in this layer.
357 """
358 raise NotImplementedError()