Coverage for python/lsst/daf/butler/registry/interfaces/_datasets.py : 61%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ("DatasetRecordStorageManager", "DatasetRecordStorage")
26from abc import ABC, abstractmethod
27from typing import (
28 Any,
29 Iterable,
30 Iterator,
31 Optional,
32 Tuple,
33 TYPE_CHECKING,
34)
36from ...core import (
37 DataCoordinate,
38 DatasetRef,
39 DatasetType,
40 ddl,
41 ExpandedDataCoordinate,
42)
43from ..simpleQuery import Select
45if TYPE_CHECKING: 45 ↛ 46line 45 didn't jump to line 46, because the condition on line 45 was never true
46 from ...core import DimensionUniverse
47 from ._database import Database, StaticTablesContext
48 from ._collections import CollectionManager, CollectionRecord, RunRecord
49 from ..simpleQuery import SimpleQuery
52class DatasetRecordStorage(ABC):
53 """An interface that manages the records associated with a particular
54 `DatasetType`.
56 Parameters
57 ----------
58 datasetType : `DatasetType`
59 Dataset type whose records this object manages.
60 """
61 def __init__(self, datasetType: DatasetType):
62 self.datasetType = datasetType
64 @abstractmethod
65 def insert(self, run: RunRecord, dataIds: Iterable[ExpandedDataCoordinate]) -> Iterator[DatasetRef]:
66 """Insert one or more dataset entries into the database.
68 Parameters
69 ----------
70 run : `RunRecord`
71 The record object describing the `~CollectionType.RUN` collection
72 this dataset will be associated with.
73 dataIds : `Iterable` [ `ExpandedDataCoordinate` ]
74 Expanded data IDs (`ExpandedDataCoordinate` instances) for the
75 datasets to be added. The dimensions of all data IDs must be the
76 same as ``self.datasetType.dimensions``.
78 Returns
79 -------
80 datasets : `Iterable` [ `DatasetRef` ]
81 References to the inserted datasets.
82 """
83 raise NotImplementedError()
85 @abstractmethod
86 def find(self, collection: CollectionRecord, dataId: DataCoordinate) -> Optional[DatasetRef]:
87 """Search a collection for a dataset with the given data ID.
89 Parameters
90 ----------
91 collection : `CollectionRecord`
92 The record object describing the collection to search for the
93 dataset. May have any `CollectionType`.
94 dataId: `DataCoordinate`
95 Complete (but not necessarily expanded) data ID to search with,
96 with ``dataId.graph == self.datasetType.dimensions``.
98 Returns
99 -------
100 ref : `DatasetRef`
101 A resolved `DatasetRef` (without components populated), or `None`
102 if no matching dataset was found.
103 """
104 raise NotImplementedError()
106 @abstractmethod
107 def delete(self, datasets: Iterable[DatasetRef]) -> None:
108 """Fully delete the given datasets from the registry.
110 Parameters
111 ----------
112 datasets : `Iterable` [ `DatasetRef` ]
113 Datasets to be deleted. All datasets must be resolved and have
114 the same `DatasetType` as ``self``.
116 Raises
117 ------
118 AmbiguousDatasetError
119 Raised if any of the given `DatasetRef` instances is unresolved.
120 """
121 raise NotImplementedError()
123 @abstractmethod
124 def associate(self, collection: CollectionRecord, datasets: Iterable[DatasetRef]) -> None:
125 """Associate one or more datasets with a collection.
127 Parameters
128 ----------
129 collection : `CollectionRecord`
130 The record object describing the collection. ``collection.type``
131 must be `~CollectionType.TAGGED`.
132 datasets : `Iterable` [ `DatasetRef` ]
133 Datasets to be associated. All datasets must be resolved and have
134 the same `DatasetType` as ``self``.
136 Raises
137 ------
138 AmbiguousDatasetError
139 Raised if any of the given `DatasetRef` instances is unresolved.
141 Notes
142 -----
143 Associating a dataset with into collection that already contains a
144 different dataset with the same `DatasetType` and data ID will remove
145 the existing dataset from that collection.
147 Associating the same dataset into a collection multiple times is a
148 no-op, but is still not permitted on read-only databases.
149 """
150 raise NotImplementedError()
152 @abstractmethod
153 def disassociate(self, collection: CollectionRecord, datasets: Iterable[DatasetRef]) -> None:
154 """Remove one or more datasets from a collection.
156 Parameters
157 ----------
158 collection : `CollectionRecord`
159 The record object describing the collection. ``collection.type``
160 must be `~CollectionType.TAGGED`.
161 datasets : `Iterable` [ `DatasetRef` ]
162 Datasets to be disassociated. All datasets must be resolved and
163 have the same `DatasetType` as ``self``.
165 Raises
166 ------
167 AmbiguousDatasetError
168 Raised if any of the given `DatasetRef` instances is unresolved.
169 """
170 raise NotImplementedError()
172 @abstractmethod
173 def select(self, collection: CollectionRecord,
174 dataId: Select.Or[DataCoordinate] = Select,
175 id: Select.Or[Optional[int]] = Select,
176 run: Select.Or[None] = Select,
177 ) -> Optional[SimpleQuery]:
178 """Return a SQLAlchemy object that represents a ``SELECT`` query for
179 this `DatasetType`.
181 All arguments can either be a value that constrains the query or
182 the `Select` tag object to indicate that the value should be returned
183 in the columns in the ``SELECT`` clause. The default is `Select`.
185 Parameters
186 ----------
187 collection : `CollectionRecord`
188 The record object describing the collection to query. May not be
189 of type `CollectionType.CHAINED`.
190 dataId : `DataCoordinate` or `Select`
191 The data ID to restrict results with, or an instruction to return
192 the data ID via columns with names
193 ``self.datasetType.dimensions.names``.
194 id : `int`, `Select` or None,
195 The integer primary key value for the dataset, an instruction to
196 return it via a ``id`` column, or `None` to ignore it
197 entirely.
198 run : `None` or `Select`
199 If `Select` (default), include the dataset's run key value (as
200 column labeled with the return value of
201 ``CollectionManager.getRunForiegnKeyName``).
202 If `None`, do not include this column (to constrain the run,
203 pass a `RunRecord` as the ``collection`` argument instead.)
205 Returns
206 -------
207 query : `SimpleQuery` or `None`
208 A struct containing the SQLAlchemy object that representing a
209 simple ``SELECT`` query, or `None` if it is known that there are
210 no datasets of this `DatasetType` that match the given constraints.
211 """
212 raise NotImplementedError()
214 datasetType: DatasetType
215 """Dataset type whose records this object manages (`DatasetType`).
216 """
219class DatasetRecordStorageManager(ABC):
220 """An interface that manages the tables that describe datasets.
222 `DatasetRecordStorageManager` primarily serves as a container and factory
223 for `DatasetRecordStorage` instances, which each provide access to the
224 records for a different `DatasetType`.
225 """
227 @classmethod
228 @abstractmethod
229 def initialize(cls, db: Database, context: StaticTablesContext, *, collections: CollectionManager,
230 universe: DimensionUniverse) -> DatasetRecordStorageManager:
231 """Construct an instance of the manager.
233 Parameters
234 ----------
235 db : `Database`
236 Interface to the underlying database engine and namespace.
237 context : `StaticTablesContext`
238 Context object obtained from `Database.declareStaticTables`; used
239 to declare any tables that should always be present.
240 collections: `CollectionManager`
241 Manager object for the collections in this `Registry`.
242 universe : `DimensionUniverse`
243 Universe graph containing all dimensions known to this `Registry`.
245 Returns
246 -------
247 manager : `DatasetRecordStorageManager`
248 An instance of a concrete `DatasetRecordStorageManager` subclass.
249 """
250 raise NotImplementedError()
252 @classmethod
253 @abstractmethod
254 def addDatasetForeignKey(cls, tableSpec: ddl.TableSpec, *,
255 name: str = "dataset", constraint: bool = True, onDelete: Optional[str] = None,
256 **kwargs: Any) -> ddl.FieldSpec:
257 """Add a foreign key (field and constraint) referencing the dataset
258 table.
260 Parameters
261 ----------
262 tableSpec : `ddl.TableSpec`
263 Specification for the table that should reference the dataset
264 table. Will be modified in place.
265 name: `str`, optional
266 A name to use for the prefix of the new field; the full name is
267 ``{name}_id``.
268 onDelete: `str`, optional
269 One of "CASCADE" or "SET NULL", indicating what should happen to
270 the referencing row if the collection row is deleted. `None`
271 indicates that this should be an integrity error.
272 constraint: `bool`, optional
273 If `False` (`True` is default), add a field that can be joined to
274 the dataset primary key, but do not add a foreign key constraint.
275 **kwargs
276 Additional keyword arguments are forwarded to the `ddl.FieldSpec`
277 constructor (only the ``name`` and ``dtype`` arguments are
278 otherwise provided).
280 Returns
281 -------
282 idSpec : `ddl.FieldSpec`
283 Specification for the ID field.
284 """
285 raise NotImplementedError()
287 @abstractmethod
288 def refresh(self, *, universe: DimensionUniverse) -> None:
289 """Ensure all other operations on this manager are aware of any
290 dataset types that may have been registered by other clients since
291 it was initialized or last refreshed.
292 """
293 raise NotImplementedError()
295 @abstractmethod
296 def find(self, name: str) -> Optional[DatasetRecordStorage]:
297 """Return an object that provides access to the records associated with
298 the given `DatasetType`, if one exists.
300 Parameters
301 ----------
302 name : `str`
303 Name of the dataset type.
305 Returns
306 -------
307 records : `DatasetRecordStorage` or `None`
308 The object representing the records for the given dataset type, or
309 `None` if there are no records for that dataset type.
311 Notes
312 -----
313 Dataset types registered by another client of the same repository since
314 the last call to `initialize` or `refresh` may not be found.
315 """
316 raise NotImplementedError()
318 @abstractmethod
319 def register(self, datasetType: DatasetType) -> Tuple[DatasetRecordStorage, bool]:
320 """Ensure that this `Registry` can hold records for the given
321 `DatasetType`, creating new tables as necessary.
323 Parameters
324 ----------
325 datasetType : `DatasetType`
326 Dataset type for which a table should created (as necessary) and
327 an associated `DatasetRecordStorage` returned.
329 Returns
330 -------
331 records : `DatasetRecordStorage`
332 The object representing the records for the given dataset type.
333 inserted : `bool`
334 `True` if the dataset type did not exist in the registry before.
336 Notes
337 -----
338 This operation may not be invoked within a `Database.transaction`
339 context.
340 """
341 raise NotImplementedError()
343 @abstractmethod
344 def __iter__(self) -> Iterator[DatasetType]:
345 """Return an iterator over the the dataset types present in this layer.
347 Notes
348 -----
349 Dataset types registered by another client of the same layer since
350 the last call to `initialize` or `refresh` may not be included.
351 """
352 raise NotImplementedError()
354 @abstractmethod
355 def getDatasetRef(self, id: int, *, universe: DimensionUniverse) -> Optional[DatasetRef]:
356 """Return a `DatasetRef` for the given dataset primary key
357 value.
359 Parameters
360 ----------
361 id : `int`
362 Autoincrement primary key value for the dataset.
363 universe : `DimensionUniverse`
364 All known dimensions.
366 Returns
367 -------
368 ref : `DatasetRef` or `None`
369 Object representing the dataset, or `None` if no dataset with the
370 given primary key values exists in this layer.
371 """
372 raise NotImplementedError()