Coverage for python/lsst/daf/butler/registry/interfaces/_datasets.py : 62%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ("DatasetRecordStorageManager", "DatasetRecordStorage")
26from abc import ABC, abstractmethod
27from typing import (
28 Any,
29 Dict,
30 Iterable,
31 Iterator,
32 Optional,
33 Tuple,
34 TYPE_CHECKING,
35)
37from ...core import (
38 DataCoordinate,
39 DatasetRef,
40 DatasetType,
41 ddl,
42 ExpandedDataCoordinate,
43 Quantum,
44)
45from ..simpleQuery import Select
47if TYPE_CHECKING: 47 ↛ 48line 47 didn't jump to line 48, because the condition on line 47 was never true
48 from ...core import DimensionUniverse
49 from ._database import Database, StaticTablesContext
50 from ._collections import CollectionManager, CollectionRecord, RunRecord
51 from ..simpleQuery import SimpleQuery
54class DatasetRecordStorage(ABC):
55 """An interface that manages the records associated with a particular
56 `DatasetType`.
58 Parameters
59 ----------
60 datasetType : `DatasetType`
61 Dataset type whose records this object manages.
62 """
63 def __init__(self, datasetType: DatasetType):
64 self.datasetType = datasetType
66 @abstractmethod
67 def insert(self, run: RunRecord, dataIds: Iterable[ExpandedDataCoordinate], *,
68 quantum: Optional[Quantum] = None) -> Iterator[DatasetRef]:
69 """Insert one or more dataset entries into the database.
71 Parameters
72 ----------
73 run : `RunRecord`
74 The record object describing the `~CollectionType.RUN` collection
75 this dataset will be associated with.
76 dataIds : `Iterable` [ `ExpandedDataCoordinate` ]
77 Expanded data IDs (`ExpandedDataCoordinate` instances) for the
78 datasets to be added. The dimensions of all data IDs must be the
79 same as ``self.datasetType.dimensions``.
80 quantum : `Quantum`, optional
81 The `Quantum` instance that should be recorded as responsible for
82 producing this dataset.
84 Returns
85 -------
86 datasets : `Iterable` [ `DatasetRef` ]
87 References to the inserted datasets.
89 Notes
90 -----
91 This method does not insert component datasets recursively, as those
92 have a different `DatasetType` than their parent and hence are managed
93 by a different `DatasetRecordStorage` instance.
94 """
95 raise NotImplementedError()
97 @abstractmethod
98 def find(self, collection: CollectionRecord, dataId: DataCoordinate) -> Optional[DatasetRef]:
99 """Search a collection for a dataset with the given data ID.
101 Parameters
102 ----------
103 collection : `CollectionRecord`
104 The record object describing the collection to search for the
105 dataset. May have any `CollectionType`.
106 dataId: `DataCoordinate`
107 Complete (but not necessarily expanded) data ID to search with,
108 with ``dataId.graph == self.datasetType.dimensions``.
110 Returns
111 -------
112 ref : `DatasetRef` or `None`
113 A resolved `DatasetRef` (without components populated), or `None`
114 if no matching dataset was found.
115 """
116 raise NotImplementedError()
118 @abstractmethod
119 def associate(self, collection: CollectionRecord, datasets: Iterable[DatasetRef]) -> None:
120 """Associate one or more datasets with a collection.
122 Parameters
123 ----------
124 collection : `CollectionRecord`
125 The record object describing the collection. ``collection.type``
126 must be `~CollectionType.TAGGED`.
127 datasets : `Iterable` [ `DatasetRef` ]
128 Datasets to be associated. All datasets must be resolved and have
129 the same `DatasetType` as ``self``.
131 Raises
132 ------
133 AmbiguousDatasetError
134 Raised if any of the given `DatasetRef` instances is unresolved.
136 Notes
137 -----
138 Associating a dataset with into collection that already contains a
139 different dataset with the same `DatasetType` and data ID will remove
140 the existing dataset from that collection.
142 Associating the same dataset into a collection multiple times is a
143 no-op, but is still not permitted on read-only databases.
144 """
145 raise NotImplementedError()
147 @abstractmethod
148 def disassociate(self, collection: CollectionRecord, datasets: Iterable[DatasetRef]) -> None:
149 """Remove one or more datasets from a collection.
151 Parameters
152 ----------
153 collection : `CollectionRecord`
154 The record object describing the collection. ``collection.type``
155 must be `~CollectionType.TAGGED`.
156 datasets : `Iterable` [ `DatasetRef` ]
157 Datasets to be disassociated. All datasets must be resolved and
158 have the same `DatasetType` as ``self``.
160 Raises
161 ------
162 AmbiguousDatasetError
163 Raised if any of the given `DatasetRef` instances is unresolved.
164 """
165 raise NotImplementedError()
167 @abstractmethod
168 def select(self, collection: CollectionRecord,
169 dataId: Select.Or[DataCoordinate] = Select,
170 id: Select.Or[Optional[int]] = Select,
171 run: Select.Or[None] = Select,
172 ) -> Optional[SimpleQuery]:
173 """Return a SQLAlchemy object that represents a ``SELECT`` query for
174 this `DatasetType`.
176 All arguments can either be a value that constrains the query or
177 the `Select` tag object to indicate that the value should be returned
178 in the columns in the ``SELECT`` clause. The default is `Select`.
180 Parameters
181 ----------
182 collection : `CollectionRecord`
183 The record object describing the collection to query. May not be
184 of type `CollectionType.CHAINED`.
185 dataId : `DataCoordinate` or `Select`
186 The data ID to restrict results with, or an instruction to return
187 the data ID via columns with names
188 ``self.datasetType.dimensions.names``.
189 id : `int`, `Select` or None,
190 The integer primary key value for the dataset, an instruction to
191 return it via a ``id`` column, or `None` to ignore it
192 entirely.
193 run : `None` or `Select`
194 If `Select` (default), include the dataset's run key value (as
195 column labeled with the return value of
196 ``CollectionManager.getRunForiegnKeyName``).
197 If `None`, do not include this column (to constrain the run,
198 pass a `RunRecord` as the ``collection`` argument instead.)
200 Returns
201 -------
202 query : `SimpleQuery` or `None`
203 A struct containing the SQLAlchemy object that representing a
204 simple ``SELECT`` query, or `None` if it is known that there are
205 no datasets of this `DatasetType` that match the given constraints.
206 """
207 raise NotImplementedError()
209 datasetType: DatasetType
210 """Dataset type whose records this object manages (`DatasetType`).
211 """
214class DatasetRecordStorageManager(ABC):
215 """An interface that manages the tables that describe datasets.
217 `DatasetRecordStorageManager` primarily serves as a container and factory
218 for `DatasetRecordStorage` instances, which each provide access to the
219 records for a different `DatasetType`.
220 """
222 @classmethod
223 @abstractmethod
224 def initialize(cls, db: Database, context: StaticTablesContext, *, collections: CollectionManager,
225 universe: DimensionUniverse) -> DatasetRecordStorageManager:
226 """Construct an instance of the manager.
228 Parameters
229 ----------
230 db : `Database`
231 Interface to the underlying database engine and namespace.
232 context : `StaticTablesContext`
233 Context object obtained from `Database.declareStaticTables`; used
234 to declare any tables that should always be present.
235 collections: `CollectionManager`
236 Manager object for the collections in this `Registry`.
237 universe : `DimensionUniverse`
238 Universe graph containing all dimensions known to this `Registry`.
240 Returns
241 -------
242 manager : `DatasetRecordStorageManager`
243 An instance of a concrete `DatasetRecordStorageManager` subclass.
244 """
245 raise NotImplementedError()
247 @classmethod
248 @abstractmethod
249 def addDatasetForeignKey(cls, tableSpec: ddl.TableSpec, *,
250 name: str = "dataset", constraint: bool = True, onDelete: Optional[str] = None,
251 **kwargs: Any) -> ddl.FieldSpec:
252 """Add a foreign key (field and constraint) referencing the dataset
253 table.
255 Parameters
256 ----------
257 tableSpec : `ddl.TableSpec`
258 Specification for the table that should reference the dataset
259 table. Will be modified in place.
260 name: `str`, optional
261 A name to use for the prefix of the new field; the full name is
262 ``{name}_id``.
263 onDelete: `str`, optional
264 One of "CASCADE" or "SET NULL", indicating what should happen to
265 the referencing row if the collection row is deleted. `None`
266 indicates that this should be an integrity error.
267 constraint: `bool`, optional
268 If `False` (`True` is default), add a field that can be joined to
269 the dataset primary key, but do not add a foreign key constraint.
270 **kwargs
271 Additional keyword arguments are forwarded to the `ddl.FieldSpec`
272 constructor (only the ``name`` and ``dtype`` arguments are
273 otherwise provided).
275 Returns
276 -------
277 idSpec : `ddl.FieldSpec`
278 Specification for the ID field.
279 """
280 raise NotImplementedError()
282 @abstractmethod
283 def refresh(self, *, universe: DimensionUniverse) -> None:
284 """Ensure all other operations on this manager are aware of any
285 dataset types that may have been registered by other clients since
286 it was initialized or last refreshed.
287 """
288 raise NotImplementedError()
290 @abstractmethod
291 def find(self, name: str) -> Optional[DatasetRecordStorage]:
292 """Return an object that provides access to the records associated with
293 the given `DatasetType`, if one exists.
295 Parameters
296 ----------
297 name : `str`
298 Name of the dataset type.
300 Returns
301 -------
302 records : `DatasetRecordStorage` or `None`
303 The object representing the records for the given dataset type, or
304 `None` if there are no records for that dataset type.
306 Notes
307 -----
308 Dataset types registered by another client of the same repository since
309 the last call to `initialize` or `refresh` may not be found.
310 """
311 raise NotImplementedError()
313 @abstractmethod
314 def register(self, datasetType: DatasetType) -> Tuple[DatasetRecordStorage, bool]:
315 """Ensure that this `Registry` can hold records for the given
316 `DatasetType`, creating new tables as necessary.
318 Parameters
319 ----------
320 datasetType : `DatasetType`
321 Dataset type for which a table should created (as necessary) and
322 an associated `DatasetRecordStorage` returned.
324 Returns
325 -------
326 records : `DatasetRecordStorage`
327 The object representing the records for the given dataset type.
328 inserted : `bool`
329 `True` if the dataset type did not exist in the registry before.
331 Notes
332 -----
333 This operation may not be invoked within a `Database.transaction`
334 context.
335 """
336 raise NotImplementedError()
338 @abstractmethod
339 def __iter__(self) -> Iterator[DatasetType]:
340 """Return an iterator over the the dataset types present in this layer.
342 Notes
343 -----
344 Dataset types registered by another client of the same layer since
345 the last call to `initialize` or `refresh` may not be included.
346 """
347 raise NotImplementedError()
349 @abstractmethod
350 def getDatasetRef(self, id: int) -> Optional[DatasetRef]:
351 """Return a `DatasetRef` for the given dataset primary key
352 value.
354 Parameters
355 ----------
356 id : `int`
357 Autoincrement primary key value for the dataset.
359 Returns
360 -------
361 ref : `DatasetRef` or `None`
362 Object representing the dataset, or `None` if no dataset with the
363 given primary key values exists in this layer.
364 """
365 raise NotImplementedError()
367 @abstractmethod
368 def attachComponents(self, composites: Iterable[Tuple[DatasetRef, Dict[str, DatasetRef]]]
369 ) -> Iterator[DatasetRef]:
370 """Attach components to one or more datasets.
372 Parameters
373 ----------
374 composites : `Iterable` [ `tuple` [ `DatasetRef`, `dict` ] ]
375 Iterable over parents and dictionaries of components. Both parent
376 and child `DatasetRef` instances must be resolved, and dict keys
377 are assumed (not necessarily checked) to match the component names
378 in the parent's storage class.
380 Yields
381 ------
382 parent : `DatasetRef`
383 Parent `DatasetRef` instances with `DatasetRef.components`
384 dictionaries updated to include new components.
385 """
386 raise NotImplementedError()
388 @abstractmethod
389 def fetchComponents(self, ref: DatasetRef) -> DatasetRef:
390 """Load references for all components to a `DatasetRef`.
392 Parameters
393 ----------
394 ref : `DatasetRef`
395 Reference to the parent dataset. If this dataset is not a
396 composite it will be returned unmodified.
398 Returns
399 -------
400 parent : `DatasetRef`
401 Version of ``ref`` with components attached.
403 Raises
404 ------
405 AmbiguousDatasetError
406 Raised if the given `DatasetRef` is unresolved.
407 """
408 raise NotImplementedError()