Coverage for python/lsst/daf/butler/registry/interfaces/_dimensions.py: 97%
39 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-19 03:44 -0700
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-19 03:44 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
27from __future__ import annotations
29__all__ = ("DimensionRecordStorageManager",)
31from abc import abstractmethod
32from collections.abc import Iterable, Set
33from typing import TYPE_CHECKING, Any
35from lsst.daf.relation import Join, Relation
37from ...dimensions import (
38 DataCoordinate,
39 DimensionElement,
40 DimensionGroup,
41 DimensionRecord,
42 DimensionRecordSet,
43 DimensionUniverse,
44)
45from ...dimensions.record_cache import DimensionRecordCache
46from ._versioning import VersionedExtension, VersionTuple
48if TYPE_CHECKING:
49 from ...direct_query_driver import QueryBuilder, QueryJoiner # Future query system (direct,server).
50 from ...queries.tree import Predicate # Future query system (direct,client,server).
51 from .. import queries # Old Registry.query* system.
52 from ._database import Database, StaticTablesContext
55class DimensionRecordStorageManager(VersionedExtension):
56 """An interface for managing the dimension records in a `Registry`.
58 `DimensionRecordStorageManager` primarily serves as a container and factory
59 for `DimensionRecordStorage` instances, which each provide access to the
60 records for a different `DimensionElement`.
62 Parameters
63 ----------
64 universe : `DimensionUniverse`
65 Universe of all dimensions and dimension elements known to the
66 `Registry`.
67 registry_schema_version : `VersionTuple` or `None`, optional
68 Version of registry schema.
70 Notes
71 -----
72 In a multi-layer `Registry`, many dimension elements will only have
73 records in one layer (often the base layer). The union of the records
74 across all layers forms the logical table for the full `Registry`.
75 """
77 def __init__(self, *, universe: DimensionUniverse, registry_schema_version: VersionTuple | None = None):
78 super().__init__(registry_schema_version=registry_schema_version)
79 self.universe = universe
81 @abstractmethod
82 def clone(self, db: Database) -> DimensionRecordStorageManager:
83 """Make an independent copy of this manager instance bound to a new
84 `Database` instance.
86 Parameters
87 ----------
88 db : `Database`
89 New `Database` object to use when instantiating the manager.
91 Returns
92 -------
93 instance : `DatasetRecordStorageManager`
94 New manager instance with the same configuration as this instance,
95 but bound to a new Database object.
96 """
97 raise NotImplementedError()
99 @classmethod
100 @abstractmethod
101 def initialize(
102 cls,
103 db: Database,
104 context: StaticTablesContext,
105 *,
106 universe: DimensionUniverse,
107 registry_schema_version: VersionTuple | None = None,
108 ) -> DimensionRecordStorageManager:
109 """Construct an instance of the manager.
111 Parameters
112 ----------
113 db : `Database`
114 Interface to the underlying database engine and namespace.
115 context : `StaticTablesContext`
116 Context object obtained from `Database.declareStaticTables`; used
117 to declare any tables that should always be present in a layer
118 implemented with this manager.
119 universe : `DimensionUniverse`
120 Universe graph containing dimensions known to this `Registry`.
121 registry_schema_version : `VersionTuple` or `None`
122 Schema version of this extension as defined in registry.
124 Returns
125 -------
126 manager : `DimensionRecordStorageManager`
127 An instance of a concrete `DimensionRecordStorageManager` subclass.
128 """
129 raise NotImplementedError()
131 def fetch_cache_dict(self) -> dict[str, DimensionRecordSet]:
132 """Return a `dict` that can back a `DimensionRecordSet`.
134 This method is intended as the ``fetch`` callback argument to
135 `DimensionRecordCache`, in contexts where direct SQL queries are
136 possible.
137 """
138 raise NotImplementedError()
140 @abstractmethod
141 def insert(
142 self,
143 element: DimensionElement,
144 *records: DimensionRecord,
145 replace: bool = False,
146 skip_existing: bool = False,
147 ) -> None:
148 """Insert one or more records into storage.
150 Parameters
151 ----------
152 element : `DimensionElement`
153 Dimension element that provides the definition for records.
154 *records : `DimensionRecord`
155 One or more instances of the `DimensionRecord` subclass for the
156 element this storage is associated with.
157 replace : `bool`, optional
158 If `True` (`False` is default), replace existing records in the
159 database if there is a conflict.
160 skip_existing : `bool`, optional
161 If `True` (`False` is default), skip insertion if a record with
162 the same primary key values already exists.
164 Raises
165 ------
166 TypeError
167 Raised if the element does not support record insertion.
168 sqlalchemy.exc.IntegrityError
169 Raised if one or more records violate database integrity
170 constraints.
171 """
172 raise NotImplementedError()
174 @abstractmethod
175 def sync(self, record: DimensionRecord, update: bool = False) -> bool | dict[str, Any]:
176 """Synchronize a record with the database, inserting it only if it does
177 not exist and comparing values if it does.
179 Parameters
180 ----------
181 record : `DimensionRecord`
182 An instance of the `DimensionRecord` subclass for the
183 element this storage is associated with.
184 update : `bool`, optional
185 If `True` (`False` is default), update the existing record in the
186 database if there is a conflict.
188 Returns
189 -------
190 inserted_or_updated : `bool` or `dict`
191 `True` if a new row was inserted, `False` if no changes were
192 needed, or a `dict` mapping updated column names to their old
193 values if an update was performed (only possible if
194 ``update=True``).
196 Raises
197 ------
198 DatabaseConflictError
199 Raised if the record exists in the database (according to primary
200 key lookup) but is inconsistent with the given one.
201 TypeError
202 Raised if the element does not support record synchronization.
203 sqlalchemy.exc.IntegrityError
204 Raised if one or more records violate database integrity
205 constraints.
206 """
207 raise NotImplementedError()
209 @abstractmethod
210 def fetch_one(
211 self,
212 element_name: str,
213 data_id: DataCoordinate,
214 cache: DimensionRecordCache,
215 ) -> DimensionRecord | None:
216 """Retrieve a single record from storage.
218 Parameters
219 ----------
220 element_name : `str`
221 Name of the dimension element for the record to fetch.
222 data_id : `DataCoordinate`
223 Data ID of the record to fetch. Implied dimensions do not need to
224 be present.
225 cache : `DimensionRecordCache`
226 Cache to look in first.
228 Returns
229 -------
230 record : `DimensionRecord` or `None`
231 Fetched record, or *possibly* `None` if there was no match for the
232 given data ID.
233 """
234 raise NotImplementedError()
236 @abstractmethod
237 def save_dimension_group(self, group: DimensionGroup) -> int:
238 """Save a `DimensionGroup` definition to the database, allowing it to
239 be retrieved later via the returned key.
241 Parameters
242 ----------
243 group : `DimensionGroup`
244 Set of dimensions to save.
246 Returns
247 -------
248 key : `int`
249 Integer used as the unique key for this `DimensionGroup` in the
250 database.
252 Raises
253 ------
254 TransactionInterruption
255 Raised if this operation is invoked within a `Database.transaction`
256 context.
257 """
258 raise NotImplementedError()
260 @abstractmethod
261 def load_dimension_group(self, key: int) -> DimensionGroup:
262 """Retrieve a `DimensionGroup` that was previously saved in the
263 database.
265 Parameters
266 ----------
267 key : `int`
268 Integer used as the unique key for this `DimensionGroup` in the
269 database.
271 Returns
272 -------
273 dimensions : `DimensionGroup`
274 Retrieved dimensions.
276 Raises
277 ------
278 KeyError
279 Raised if the given key cannot be found in the database.
280 """
281 raise NotImplementedError()
283 @abstractmethod
284 def join(
285 self,
286 element_name: str,
287 target: Relation,
288 join: Join,
289 context: queries.SqlQueryContext,
290 ) -> Relation:
291 """Join this dimension element's records to a relation.
293 Parameters
294 ----------
295 element_name : `str`
296 Name of the dimension element whose relation should be joined in.
297 target : `~lsst.daf.relation.Relation`
298 Existing relation to join to. Implementations may require that
299 this relation already include dimension key columns for this
300 dimension element and assume that dataset or spatial join relations
301 that might provide these will be included in the relation tree
302 first.
303 join : `~lsst.daf.relation.Join`
304 Join operation to use when the implementation is an actual join.
305 When a true join is being simulated by other relation operations,
306 this objects `~lsst.daf.relation.Join.min_columns` and
307 `~lsst.daf.relation.Join.max_columns` should still be respected.
308 context : `.queries.SqlQueryContext`
309 Object that manages relation engines and database-side state (e.g.
310 temporary tables) for the query.
312 Returns
313 -------
314 joined : `~lsst.daf.relation.Relation`
315 New relation that includes this relation's dimension key and record
316 columns, as well as all columns in ``target``, with rows
317 constrained to those for which this element's dimension key values
318 exist in the registry and rows already exist in ``target``.
319 """
320 raise NotImplementedError()
322 @abstractmethod
323 def make_spatial_join_relation(
324 self,
325 element1: str,
326 element2: str,
327 context: queries.SqlQueryContext,
328 existing_relationships: Set[frozenset[str]] = frozenset(),
329 ) -> tuple[Relation, bool]:
330 """Create a relation that represents the spatial join between two
331 dimension elements.
333 Parameters
334 ----------
335 element1 : `str`
336 Name of one of the elements participating in the join.
337 element2 : `str`
338 Name of the other element participating in the join.
339 context : `.queries.SqlQueryContext`
340 Object that manages relation engines and database-side state
341 (e.g. temporary tables) for the query.
342 existing_relationships : `~collections.abc.Set` [ `frozenset` [ `str` \
343 ] ], optional
344 Relationships between dimensions that are already present in the
345 relation the result will be joined to. Spatial join relations
346 that duplicate these relationships will not be included in the
347 result, which may cause an identity relation to be returned if
348 a spatial relationship has already been established.
350 Returns
351 -------
352 relation : `lsst.daf.relation.Relation`
353 New relation that represents a spatial join between the two given
354 elements. Guaranteed to have key columns for all required
355 dimensions of both elements.
356 needs_refinement : `bool`
357 Whether the returned relation represents a conservative join that
358 needs refinement via native-iteration predicate.
359 """
360 raise NotImplementedError()
362 @abstractmethod
363 def make_query_joiner(self, element: DimensionElement, fields: Set[str]) -> QueryJoiner:
364 """Make a `..direct_query_driver.QueryJoiner` that represents a
365 dimension element table.
367 Parameters
368 ----------
369 element : `DimensionElement`
370 Dimension element the table corresponds to.
371 fields : `~collections.abc.Set` [ `str` ]
372 Names of fields to make available in the joiner. These can be any
373 metadata or alternate key field in the element's schema, including
374 the special ``region`` and ``timespan`` fields. Dimension keys in
375 the element's schema are always included.
377 Returns
378 -------
379 joiner : `..direct_query_driver.QueryJoiner`
380 A query-construction object representing a table or subquery. This
381 is guaranteed to have rows that are unique over dimension keys and
382 all possible key values for this dimension, so joining in a
383 dimension element table:
385 - never introduces duplicates into the query's result rows;
386 - never restricts the query's rows *except* to ensure
387 required-implied relationships are followed.
388 """
389 raise NotImplementedError()
391 @abstractmethod
392 def process_query_overlaps(
393 self,
394 dimensions: DimensionGroup,
395 predicate: Predicate,
396 join_operands: Iterable[DimensionGroup],
397 ) -> tuple[Predicate, QueryBuilder]:
398 """Process a query's WHERE predicate and dimensions to handle spatial
399 and temporal overlaps.
401 Parameters
402 ----------
403 dimensions : `..dimensions.DimensionGroup`
404 Full dimensions of all tables to be joined into the query (even if
405 they are not included in the query results).
406 predicate : `..queries.tree.Predicate`
407 Boolean column expression that may contain user-provided spatial
408 and/or temporal overlaps intermixed with other constraints.
409 join_operands : `~collections.abc.Iterable` [ \
410 `..dimensions.DimensionGroup` ]
411 Dimensions of tables or subqueries that are already going to be
412 joined into the query that may establish their own spatial or
413 temporal relationships (e.g. a dataset search with both ``visit``
414 and ``patch`` dimensions).
416 Returns
417 -------
418 predicate : `..queries.tree.Predicate`
419 A version of the given predicate that preserves the overall
420 behavior of the filter while possibly rewriting overlap expressions
421 that have been partially moved into ``builder`` as some combination
422 of new nested predicates, joins, and postprocessing.
423 builder : `..direct_query_driver.QueryBuilder`
424 A query-construction helper object that includes any initial joins
425 and postprocessing needed to handle overlap expression extracted
426 from the original predicate.
428 Notes
429 -----
430 Implementations must delegate to `.queries.overlaps.OverlapsVisitor`
431 (possibly by subclassing it) to ensure "automatic" spatial and temporal
432 joins are added consistently by all query-construction implementations.
433 """
434 raise NotImplementedError()
436 universe: DimensionUniverse
437 """Universe of all dimensions and dimension elements known to the
438 `Registry` (`DimensionUniverse`).
439 """