Coverage for python/lsst/daf/butler/registry/interfaces/_dimensions.py: 96%
33 statements
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-16 10:44 +0000
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-16 10:44 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
27from __future__ import annotations
29__all__ = ("DimensionRecordStorageManager",)
31from abc import abstractmethod
32from collections.abc import Set
33from typing import TYPE_CHECKING, Any
35from lsst.daf.relation import Join, Relation
37from ...dimensions import (
38 DataCoordinate,
39 DimensionElement,
40 DimensionGroup,
41 DimensionRecord,
42 DimensionRecordSet,
43 DimensionUniverse,
44)
45from ...dimensions.record_cache import DimensionRecordCache
46from ._versioning import VersionedExtension, VersionTuple
48if TYPE_CHECKING:
49 from .. import queries
50 from ._database import Database, StaticTablesContext
53class DimensionRecordStorageManager(VersionedExtension):
54 """An interface for managing the dimension records in a `Registry`.
56 `DimensionRecordStorageManager` primarily serves as a container and factory
57 for `DimensionRecordStorage` instances, which each provide access to the
58 records for a different `DimensionElement`.
60 Parameters
61 ----------
62 universe : `DimensionUniverse`
63 Universe of all dimensions and dimension elements known to the
64 `Registry`.
65 registry_schema_version : `VersionTuple` or `None`, optional
66 Version of registry schema.
68 Notes
69 -----
70 In a multi-layer `Registry`, many dimension elements will only have
71 records in one layer (often the base layer). The union of the records
72 across all layers forms the logical table for the full `Registry`.
73 """
75 def __init__(self, *, universe: DimensionUniverse, registry_schema_version: VersionTuple | None = None):
76 super().__init__(registry_schema_version=registry_schema_version)
77 self.universe = universe
79 @classmethod
80 @abstractmethod
81 def initialize(
82 cls,
83 db: Database,
84 context: StaticTablesContext,
85 *,
86 universe: DimensionUniverse,
87 registry_schema_version: VersionTuple | None = None,
88 ) -> DimensionRecordStorageManager:
89 """Construct an instance of the manager.
91 Parameters
92 ----------
93 db : `Database`
94 Interface to the underlying database engine and namespace.
95 context : `StaticTablesContext`
96 Context object obtained from `Database.declareStaticTables`; used
97 to declare any tables that should always be present in a layer
98 implemented with this manager.
99 universe : `DimensionUniverse`
100 Universe graph containing dimensions known to this `Registry`.
101 registry_schema_version : `VersionTuple` or `None`
102 Schema version of this extension as defined in registry.
104 Returns
105 -------
106 manager : `DimensionRecordStorageManager`
107 An instance of a concrete `DimensionRecordStorageManager` subclass.
108 """
109 raise NotImplementedError()
111 def fetch_cache_dict(self) -> dict[str, DimensionRecordSet]:
112 """Return a `dict` that can back a `DimensionRecordSet`.
114 This method is intended as the ``fetch`` callback argument to
115 `DimensionRecordCache`, in contexts where direct SQL queries are
116 possible.
117 """
118 raise NotImplementedError()
120 @abstractmethod
121 def insert(
122 self,
123 element: DimensionElement,
124 *records: DimensionRecord,
125 cache: DimensionRecordCache,
126 replace: bool = False,
127 skip_existing: bool = False,
128 ) -> None:
129 """Insert one or more records into storage.
131 Parameters
132 ----------
133 element : `DimensionElement`
134 Dimension element that provides the definition for records.
135 *records : `DimensionRecord`
136 One or more instances of the `DimensionRecord` subclass for the
137 element this storage is associated with.
138 cache : `DimensionRecordCache`
139 Cache of dimension records to update along with the database.
140 replace : `bool`, optional
141 If `True` (`False` is default), replace existing records in the
142 database if there is a conflict.
143 skip_existing : `bool`, optional
144 If `True` (`False` is default), skip insertion if a record with
145 the same primary key values already exists.
147 Raises
148 ------
149 TypeError
150 Raised if the element does not support record insertion.
151 sqlalchemy.exc.IntegrityError
152 Raised if one or more records violate database integrity
153 constraints.
154 """
155 raise NotImplementedError()
157 @abstractmethod
158 def sync(
159 self, record: DimensionRecord, cache: DimensionRecordCache, update: bool = False
160 ) -> bool | dict[str, Any]:
161 """Synchronize a record with the database, inserting it only if it does
162 not exist and comparing values if it does.
164 Parameters
165 ----------
166 record : `DimensionRecord`
167 An instance of the `DimensionRecord` subclass for the
168 element this storage is associated with.
169 cache : `DimensionRecordCache`
170 Cache of dimension records to update along with the database.
171 update : `bool`, optional
172 If `True` (`False` is default), update the existing record in the
173 database if there is a conflict.
175 Returns
176 -------
177 inserted_or_updated : `bool` or `dict`
178 `True` if a new row was inserted, `False` if no changes were
179 needed, or a `dict` mapping updated column names to their old
180 values if an update was performed (only possible if
181 ``update=True``).
183 Raises
184 ------
185 DatabaseConflictError
186 Raised if the record exists in the database (according to primary
187 key lookup) but is inconsistent with the given one.
188 TypeError
189 Raised if the element does not support record synchronization.
190 sqlalchemy.exc.IntegrityError
191 Raised if one or more records violate database integrity
192 constraints.
193 """
194 raise NotImplementedError()
196 @abstractmethod
197 def fetch_one(
198 self,
199 element_name: str,
200 data_id: DataCoordinate,
201 cache: DimensionRecordCache,
202 ) -> DimensionRecord | None:
203 """Retrieve a single record from storage.
205 Parameters
206 ----------
207 element_name : `str`
208 Name of the dimension element for the record to fetch.
209 data_id : `DataCoordinate`
210 Data ID of the record to fetch. Implied dimensions do not need to
211 be present.
212 cache : `DimensionRecordCache`
213 Cache to look in first.
215 Returns
216 -------
217 record : `DimensionRecord` or `None`
218 Fetched record, or *possibly* `None` if there was no match for the
219 given data ID.
220 """
221 raise NotImplementedError()
223 @abstractmethod
224 def save_dimension_group(self, group: DimensionGroup) -> int:
225 """Save a `DimensionGroup` definition to the database, allowing it to
226 be retrieved later via the returned key.
228 Parameters
229 ----------
230 group : `DimensionGroup`
231 Set of dimensions to save.
233 Returns
234 -------
235 key : `int`
236 Integer used as the unique key for this `DimensionGroup` in the
237 database.
239 Raises
240 ------
241 TransactionInterruption
242 Raised if this operation is invoked within a `Database.transaction`
243 context.
244 """
245 raise NotImplementedError()
247 @abstractmethod
248 def load_dimension_group(self, key: int) -> DimensionGroup:
249 """Retrieve a `DimensionGroup` that was previously saved in the
250 database.
252 Parameters
253 ----------
254 key : `int`
255 Integer used as the unique key for this `DimensionGroup` in the
256 database.
258 Returns
259 -------
260 dimensions : `DimensionGroup`
261 Retrieved dimensions.
263 Raises
264 ------
265 KeyError
266 Raised if the given key cannot be found in the database.
267 """
268 raise NotImplementedError()
270 @abstractmethod
271 def join(
272 self,
273 element_name: str,
274 target: Relation,
275 join: Join,
276 context: queries.SqlQueryContext,
277 ) -> Relation:
278 """Join this dimension element's records to a relation.
280 Parameters
281 ----------
282 element_name : `str`
283 Name of the dimension element whose relation should be joined in.
284 target : `~lsst.daf.relation.Relation`
285 Existing relation to join to. Implementations may require that
286 this relation already include dimension key columns for this
287 dimension element and assume that dataset or spatial join relations
288 that might provide these will be included in the relation tree
289 first.
290 join : `~lsst.daf.relation.Join`
291 Join operation to use when the implementation is an actual join.
292 When a true join is being simulated by other relation operations,
293 this objects `~lsst.daf.relation.Join.min_columns` and
294 `~lsst.daf.relation.Join.max_columns` should still be respected.
295 context : `.queries.SqlQueryContext`
296 Object that manages relation engines and database-side state (e.g.
297 temporary tables) for the query.
299 Returns
300 -------
301 joined : `~lsst.daf.relation.Relation`
302 New relation that includes this relation's dimension key and record
303 columns, as well as all columns in ``target``, with rows
304 constrained to those for which this element's dimension key values
305 exist in the registry and rows already exist in ``target``.
306 """
307 raise NotImplementedError()
309 @abstractmethod
310 def make_spatial_join_relation(
311 self,
312 element1: str,
313 element2: str,
314 context: queries.SqlQueryContext,
315 existing_relationships: Set[frozenset[str]] = frozenset(),
316 ) -> tuple[Relation, bool]:
317 """Create a relation that represents the spatial join between two
318 dimension elements.
320 Parameters
321 ----------
322 element1 : `str`
323 Name of one of the elements participating in the join.
324 element2 : `str`
325 Name of the other element participating in the join.
326 context : `.queries.SqlQueryContext`
327 Object that manages relation engines and database-side state
328 (e.g. temporary tables) for the query.
329 existing_relationships : `~collections.abc.Set` [ `frozenset` [ `str` \
330 ] ], optional
331 Relationships between dimensions that are already present in the
332 relation the result will be joined to. Spatial join relations
333 that duplicate these relationships will not be included in the
334 result, which may cause an identity relation to be returned if
335 a spatial relationship has already been established.
337 Returns
338 -------
339 relation : `lsst.daf.relation.Relation`
340 New relation that represents a spatial join between the two given
341 elements. Guaranteed to have key columns for all required
342 dimensions of both elements.
343 needs_refinement : `bool`
344 Whether the returned relation represents a conservative join that
345 needs refinement via native-iteration predicate.
346 """
347 raise NotImplementedError()
349 universe: DimensionUniverse
350 """Universe of all dimensions and dimension elements known to the
351 `Registry` (`DimensionUniverse`).
352 """