Coverage for python/lsst/daf/butler/registry/interfaces/_datasets.py : 62%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ("DatasetRecordStorageManager", "DatasetRecordStorage")
26from abc import ABC, abstractmethod
27from typing import (
28 Any,
29 Iterable,
30 Iterator,
31 Optional,
32 Tuple,
33 TYPE_CHECKING,
34)
36from ...core import (
37 DataCoordinate,
38 DatasetRef,
39 DatasetType,
40 ddl,
41 SimpleQuery,
42)
44if TYPE_CHECKING: 44 ↛ 45line 44 didn't jump to line 45, because the condition on line 44 was never true
45 from ...core import DimensionUniverse
46 from ._database import Database, StaticTablesContext
47 from ._collections import CollectionManager, CollectionRecord, RunRecord
50class DatasetRecordStorage(ABC):
51 """An interface that manages the records associated with a particular
52 `DatasetType`.
54 Parameters
55 ----------
56 datasetType : `DatasetType`
57 Dataset type whose records this object manages.
58 """
59 def __init__(self, datasetType: DatasetType):
60 self.datasetType = datasetType
62 @abstractmethod
63 def insert(self, run: RunRecord, dataIds: Iterable[DataCoordinate]) -> Iterator[DatasetRef]:
64 """Insert one or more dataset entries into the database.
66 Parameters
67 ----------
68 run : `RunRecord`
69 The record object describing the `~CollectionType.RUN` collection
70 this dataset will be associated with.
71 dataIds : `Iterable` [ `DataCoordinate` ]
72 Expanded data IDs (`DataCoordinate` instances) for the
73 datasets to be added. The dimensions of all data IDs must be the
74 same as ``self.datasetType.dimensions``.
76 Returns
77 -------
78 datasets : `Iterable` [ `DatasetRef` ]
79 References to the inserted datasets.
80 """
81 raise NotImplementedError()
83 @abstractmethod
84 def find(self, collection: CollectionRecord, dataId: DataCoordinate) -> Optional[DatasetRef]:
85 """Search a collection for a dataset with the given data ID.
87 Parameters
88 ----------
89 collection : `CollectionRecord`
90 The record object describing the collection to search for the
91 dataset. May have any `CollectionType`.
92 dataId: `DataCoordinate`
93 Complete (but not necessarily expanded) data ID to search with,
94 with ``dataId.graph == self.datasetType.dimensions``.
96 Returns
97 -------
98 ref : `DatasetRef`
99 A resolved `DatasetRef` (without components populated), or `None`
100 if no matching dataset was found.
101 """
102 raise NotImplementedError()
104 @abstractmethod
105 def delete(self, datasets: Iterable[DatasetRef]) -> None:
106 """Fully delete the given datasets from the registry.
108 Parameters
109 ----------
110 datasets : `Iterable` [ `DatasetRef` ]
111 Datasets to be deleted. All datasets must be resolved and have
112 the same `DatasetType` as ``self``.
114 Raises
115 ------
116 AmbiguousDatasetError
117 Raised if any of the given `DatasetRef` instances is unresolved.
118 """
119 raise NotImplementedError()
121 @abstractmethod
122 def associate(self, collection: CollectionRecord, datasets: Iterable[DatasetRef]) -> None:
123 """Associate one or more datasets with a collection.
125 Parameters
126 ----------
127 collection : `CollectionRecord`
128 The record object describing the collection. ``collection.type``
129 must be `~CollectionType.TAGGED`.
130 datasets : `Iterable` [ `DatasetRef` ]
131 Datasets to be associated. All datasets must be resolved and have
132 the same `DatasetType` as ``self``.
134 Raises
135 ------
136 AmbiguousDatasetError
137 Raised if any of the given `DatasetRef` instances is unresolved.
139 Notes
140 -----
141 Associating a dataset with into collection that already contains a
142 different dataset with the same `DatasetType` and data ID will remove
143 the existing dataset from that collection.
145 Associating the same dataset into a collection multiple times is a
146 no-op, but is still not permitted on read-only databases.
147 """
148 raise NotImplementedError()
150 @abstractmethod
151 def disassociate(self, collection: CollectionRecord, datasets: Iterable[DatasetRef]) -> None:
152 """Remove one or more datasets from a collection.
154 Parameters
155 ----------
156 collection : `CollectionRecord`
157 The record object describing the collection. ``collection.type``
158 must be `~CollectionType.TAGGED`.
159 datasets : `Iterable` [ `DatasetRef` ]
160 Datasets to be disassociated. All datasets must be resolved and
161 have the same `DatasetType` as ``self``.
163 Raises
164 ------
165 AmbiguousDatasetError
166 Raised if any of the given `DatasetRef` instances is unresolved.
167 """
168 raise NotImplementedError()
170 @abstractmethod
171 def select(self, collection: CollectionRecord,
172 dataId: SimpleQuery.Select.Or[DataCoordinate] = SimpleQuery.Select,
173 id: SimpleQuery.Select.Or[Optional[int]] = SimpleQuery.Select,
174 run: SimpleQuery.Select.Or[None] = SimpleQuery.Select,
175 ) -> Optional[SimpleQuery]:
176 """Return a SQLAlchemy object that represents a ``SELECT`` query for
177 this `DatasetType`.
179 All arguments can either be a value that constrains the query or
180 the `SimpleQuery.Select` tag object to indicate that the value should
181 be returned in the columns in the ``SELECT`` clause. The default is
182 `SimpleQuery.Select`.
184 Parameters
185 ----------
186 collection : `CollectionRecord`
187 The record object describing the collection to query. May not be
188 of type `CollectionType.CHAINED`.
189 dataId : `DataCoordinate` or `Select`
190 The data ID to restrict results with, or an instruction to return
191 the data ID via columns with names
192 ``self.datasetType.dimensions.names``.
193 id : `int`, `Select` or None,
194 The integer primary key value for the dataset, an instruction to
195 return it via a ``id`` column, or `None` to ignore it
196 entirely.
197 run : `None` or `Select`
198 If `Select` (default), include the dataset's run key value (as
199 column labeled with the return value of
200 ``CollectionManager.getRunForiegnKeyName``).
201 If `None`, do not include this column (to constrain the run,
202 pass a `RunRecord` as the ``collection`` argument instead.)
204 Returns
205 -------
206 query : `SimpleQuery` or `None`
207 A struct containing the SQLAlchemy object that representing a
208 simple ``SELECT`` query, or `None` if it is known that there are
209 no datasets of this `DatasetType` that match the given constraints.
210 """
211 raise NotImplementedError()
213 datasetType: DatasetType
214 """Dataset type whose records this object manages (`DatasetType`).
215 """
218class DatasetRecordStorageManager(ABC):
219 """An interface that manages the tables that describe datasets.
221 `DatasetRecordStorageManager` primarily serves as a container and factory
222 for `DatasetRecordStorage` instances, which each provide access to the
223 records for a different `DatasetType`.
224 """
226 @classmethod
227 @abstractmethod
228 def initialize(cls, db: Database, context: StaticTablesContext, *, collections: CollectionManager,
229 universe: DimensionUniverse) -> DatasetRecordStorageManager:
230 """Construct an instance of the manager.
232 Parameters
233 ----------
234 db : `Database`
235 Interface to the underlying database engine and namespace.
236 context : `StaticTablesContext`
237 Context object obtained from `Database.declareStaticTables`; used
238 to declare any tables that should always be present.
239 collections: `CollectionManager`
240 Manager object for the collections in this `Registry`.
241 universe : `DimensionUniverse`
242 Universe graph containing all dimensions known to this `Registry`.
244 Returns
245 -------
246 manager : `DatasetRecordStorageManager`
247 An instance of a concrete `DatasetRecordStorageManager` subclass.
248 """
249 raise NotImplementedError()
251 @classmethod
252 @abstractmethod
253 def addDatasetForeignKey(cls, tableSpec: ddl.TableSpec, *,
254 name: str = "dataset", constraint: bool = True, onDelete: Optional[str] = None,
255 **kwargs: Any) -> ddl.FieldSpec:
256 """Add a foreign key (field and constraint) referencing the dataset
257 table.
259 Parameters
260 ----------
261 tableSpec : `ddl.TableSpec`
262 Specification for the table that should reference the dataset
263 table. Will be modified in place.
264 name: `str`, optional
265 A name to use for the prefix of the new field; the full name is
266 ``{name}_id``.
267 onDelete: `str`, optional
268 One of "CASCADE" or "SET NULL", indicating what should happen to
269 the referencing row if the collection row is deleted. `None`
270 indicates that this should be an integrity error.
271 constraint: `bool`, optional
272 If `False` (`True` is default), add a field that can be joined to
273 the dataset primary key, but do not add a foreign key constraint.
274 **kwargs
275 Additional keyword arguments are forwarded to the `ddl.FieldSpec`
276 constructor (only the ``name`` and ``dtype`` arguments are
277 otherwise provided).
279 Returns
280 -------
281 idSpec : `ddl.FieldSpec`
282 Specification for the ID field.
283 """
284 raise NotImplementedError()
286 @abstractmethod
287 def refresh(self, *, universe: DimensionUniverse) -> None:
288 """Ensure all other operations on this manager are aware of any
289 dataset types that may have been registered by other clients since
290 it was initialized or last refreshed.
291 """
292 raise NotImplementedError()
294 @abstractmethod
295 def find(self, name: str) -> Optional[DatasetRecordStorage]:
296 """Return an object that provides access to the records associated with
297 the given `DatasetType`, if one exists.
299 Parameters
300 ----------
301 name : `str`
302 Name of the dataset type.
304 Returns
305 -------
306 records : `DatasetRecordStorage` or `None`
307 The object representing the records for the given dataset type, or
308 `None` if there are no records for that dataset type.
310 Notes
311 -----
312 Dataset types registered by another client of the same repository since
313 the last call to `initialize` or `refresh` may not be found.
314 """
315 raise NotImplementedError()
317 @abstractmethod
318 def register(self, datasetType: DatasetType) -> Tuple[DatasetRecordStorage, bool]:
319 """Ensure that this `Registry` can hold records for the given
320 `DatasetType`, creating new tables as necessary.
322 Parameters
323 ----------
324 datasetType : `DatasetType`
325 Dataset type for which a table should created (as necessary) and
326 an associated `DatasetRecordStorage` returned.
328 Returns
329 -------
330 records : `DatasetRecordStorage`
331 The object representing the records for the given dataset type.
332 inserted : `bool`
333 `True` if the dataset type did not exist in the registry before.
335 Notes
336 -----
337 This operation may not be invoked within a `Database.transaction`
338 context.
339 """
340 raise NotImplementedError()
342 @abstractmethod
343 def __iter__(self) -> Iterator[DatasetType]:
344 """Return an iterator over the the dataset types present in this layer.
346 Notes
347 -----
348 Dataset types registered by another client of the same layer since
349 the last call to `initialize` or `refresh` may not be included.
350 """
351 raise NotImplementedError()
353 @abstractmethod
354 def getDatasetRef(self, id: int, *, universe: DimensionUniverse) -> Optional[DatasetRef]:
355 """Return a `DatasetRef` for the given dataset primary key
356 value.
358 Parameters
359 ----------
360 id : `int`
361 Autoincrement primary key value for the dataset.
362 universe : `DimensionUniverse`
363 All known dimensions.
365 Returns
366 -------
367 ref : `DatasetRef` or `None`
368 Object representing the dataset, or `None` if no dataset with the
369 given primary key values exists in this layer.
370 """
371 raise NotImplementedError()