Coverage for python / lsst / daf / butler / registry / interfaces / _dimensions.py: 94%
34 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-28 08:36 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-28 08:36 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
27from __future__ import annotations
29__all__ = ("DimensionRecordStorageManager",)
31from abc import abstractmethod
32from collections.abc import Iterable, Mapping, Set
33from typing import TYPE_CHECKING, Any
35from ...dimensions import (
36 DataCoordinate,
37 DataIdValue,
38 DimensionElement,
39 DimensionGroup,
40 DimensionRecord,
41 DimensionRecordSet,
42 DimensionUniverse,
43)
44from ...dimensions.record_cache import DimensionRecordCache
45from ._versioning import VersionedExtension, VersionTuple
47if TYPE_CHECKING:
48 from ...direct_query_driver import ( # Future query system (direct,server).
49 Postprocessing,
50 SqlJoinsBuilder,
51 SqlSelectBuilder,
52 )
53 from ...queries.tree import AnyDatasetType, Predicate # Future query system (direct,client,server).
54 from ._database import Database, StaticTablesContext
57class DimensionRecordStorageManager(VersionedExtension):
58 """An interface for managing the dimension records in a `Registry`.
60 `DimensionRecordStorageManager` primarily serves as a container and factory
61 for `DimensionRecordStorage` instances, which each provide access to the
62 records for a different `DimensionElement`.
64 Parameters
65 ----------
66 universe : `DimensionUniverse`
67 Universe of all dimensions and dimension elements known to the
68 `Registry`.
69 registry_schema_version : `VersionTuple` or `None`, optional
70 Version of registry schema.
72 Notes
73 -----
74 In a multi-layer `Registry`, many dimension elements will only have
75 records in one layer (often the base layer). The union of the records
76 across all layers forms the logical table for the full `Registry`.
77 """
79 def __init__(self, *, universe: DimensionUniverse, registry_schema_version: VersionTuple | None = None):
80 super().__init__(registry_schema_version=registry_schema_version)
81 self.universe = universe
83 @abstractmethod
84 def clone(self, db: Database) -> DimensionRecordStorageManager:
85 """Make an independent copy of this manager instance bound to a new
86 `Database` instance.
88 Parameters
89 ----------
90 db : `Database`
91 New `Database` object to use when instantiating the manager.
93 Returns
94 -------
95 instance : `DatasetRecordStorageManager`
96 New manager instance with the same configuration as this instance,
97 but bound to a new Database object.
98 """
99 raise NotImplementedError()
101 @classmethod
102 @abstractmethod
103 def initialize(
104 cls,
105 db: Database,
106 context: StaticTablesContext,
107 *,
108 universe: DimensionUniverse,
109 registry_schema_version: VersionTuple | None = None,
110 ) -> DimensionRecordStorageManager:
111 """Construct an instance of the manager.
113 Parameters
114 ----------
115 db : `Database`
116 Interface to the underlying database engine and namespace.
117 context : `StaticTablesContext`
118 Context object obtained from `Database.declareStaticTables`; used
119 to declare any tables that should always be present in a layer
120 implemented with this manager.
121 universe : `DimensionUniverse`
122 Universe graph containing dimensions known to this `Registry`.
123 registry_schema_version : `VersionTuple` or `None`
124 Schema version of this extension as defined in registry.
126 Returns
127 -------
128 manager : `DimensionRecordStorageManager`
129 An instance of a concrete `DimensionRecordStorageManager` subclass.
130 """
131 raise NotImplementedError()
133 def fetch_cache_dict(self) -> dict[str, DimensionRecordSet]:
134 """Return a `dict` that can back a `DimensionRecordSet`.
136 This method is intended as the ``fetch`` callback argument to
137 `DimensionRecordCache`, in contexts where direct SQL queries are
138 possible.
139 """
140 raise NotImplementedError()
142 @abstractmethod
143 def insert(
144 self,
145 element: DimensionElement,
146 *records: DimensionRecord,
147 replace: bool = False,
148 skip_existing: bool = False,
149 ) -> None:
150 """Insert one or more records into storage.
152 Parameters
153 ----------
154 element : `DimensionElement`
155 Dimension element that provides the definition for records.
156 *records : `DimensionRecord`
157 One or more instances of the `DimensionRecord` subclass for the
158 element this storage is associated with.
159 replace : `bool`, optional
160 If `True` (`False` is default), replace existing records in the
161 database if there is a conflict.
162 skip_existing : `bool`, optional
163 If `True` (`False` is default), skip insertion if a record with
164 the same primary key values already exists.
166 Raises
167 ------
168 TypeError
169 Raised if the element does not support record insertion.
170 sqlalchemy.exc.IntegrityError
171 Raised if one or more records violate database integrity
172 constraints.
173 """
174 raise NotImplementedError()
176 @abstractmethod
177 def sync(self, record: DimensionRecord, update: bool = False) -> bool | dict[str, Any]:
178 """Synchronize a record with the database, inserting it only if it does
179 not exist and comparing values if it does.
181 Parameters
182 ----------
183 record : `DimensionRecord`
184 An instance of the `DimensionRecord` subclass for the
185 element this storage is associated with.
186 update : `bool`, optional
187 If `True` (`False` is default), update the existing record in the
188 database if there is a conflict.
190 Returns
191 -------
192 inserted_or_updated : `bool` or `dict`
193 `True` if a new row was inserted, `False` if no changes were
194 needed, or a `dict` mapping updated column names to their old
195 values if an update was performed (only possible if
196 ``update=True``).
198 Raises
199 ------
200 DatabaseConflictError
201 Raised if the record exists in the database (according to primary
202 key lookup) but is inconsistent with the given one.
203 TypeError
204 Raised if the element does not support record synchronization.
205 sqlalchemy.exc.IntegrityError
206 Raised if one or more records violate database integrity
207 constraints.
208 """
209 raise NotImplementedError()
211 @abstractmethod
212 def fetch_one(
213 self,
214 element_name: str,
215 data_id: DataCoordinate,
216 cache: DimensionRecordCache,
217 ) -> DimensionRecord | None:
218 """Retrieve a single record from storage.
220 Parameters
221 ----------
222 element_name : `str`
223 Name of the dimension element for the record to fetch.
224 data_id : `DataCoordinate`
225 Data ID of the record to fetch. Implied dimensions do not need to
226 be present.
227 cache : `DimensionRecordCache`
228 Cache to look in first.
230 Returns
231 -------
232 record : `DimensionRecord` or `None`
233 Fetched record, or *possibly* `None` if there was no match for the
234 given data ID.
235 """
236 raise NotImplementedError()
238 @abstractmethod
239 def save_dimension_group(self, group: DimensionGroup) -> int:
240 """Save a `DimensionGroup` definition to the database, allowing it to
241 be retrieved later via the returned key.
243 If this dimension group has already been saved, this method just
244 returns the key already associated with it.
246 Parameters
247 ----------
248 group : `DimensionGroup`
249 Set of dimensions to save.
251 Returns
252 -------
253 key : `int`
254 Integer used as the unique key for this `DimensionGroup` in the
255 database.
257 Raises
258 ------
259 TransactionInterruption
260 Raised if this operation is invoked within a `Database.transaction`
261 context.
262 """
263 raise NotImplementedError()
265 @abstractmethod
266 def load_dimension_group(self, key: int) -> DimensionGroup:
267 """Retrieve a `DimensionGroup` that was previously saved in the
268 database.
270 Parameters
271 ----------
272 key : `int`
273 Integer used as the unique key for this `DimensionGroup` in the
274 database.
276 Returns
277 -------
278 dimensions : `DimensionGroup`
279 Retrieved dimensions.
281 Raises
282 ------
283 KeyError
284 Raised if the given key cannot be found in the database.
285 """
286 raise NotImplementedError()
288 @abstractmethod
289 def make_joins_builder(self, element: DimensionElement, fields: Set[str]) -> SqlJoinsBuilder:
290 """Make a `~lsst.daf.butler.direct_query_driver.SqlJoinsBuilder` that
291 represents a dimension element table.
293 Parameters
294 ----------
295 element : `DimensionElement`
296 Dimension element the table corresponds to.
297 fields : `~collections.abc.Set` [ `str` ]
298 Names of fields to make available in the builder. These can be any
299 metadata or alternate key field in the element's schema, including
300 the special ``region`` and ``timespan`` fields. Dimension keys in
301 the element's schema are always included.
303 Returns
304 -------
305 builder : `~lsst.daf.butler.direct_query_driver.SqlJoinsBuilder`
306 A query-construction object representing a table or subquery. This
307 is guaranteed to have rows that are unique over dimension keys and
308 all possible key values for this dimension, so joining in a
309 dimension element table:
311 - never introduces duplicates into the query's result rows;
312 - never restricts the query's rows *except* to ensure
313 required-implied relationships are followed.
314 """
315 raise NotImplementedError()
317 @abstractmethod
318 def process_query_overlaps(
319 self,
320 dimensions: DimensionGroup,
321 predicate: Predicate,
322 join_operands: Iterable[DimensionGroup],
323 calibration_dataset_types: Set[str | AnyDatasetType],
324 allow_duplicates: bool,
325 constraint_data_id: Mapping[str, DataIdValue],
326 ) -> tuple[Predicate, SqlSelectBuilder, Postprocessing]:
327 """Process a query's WHERE predicate and dimensions to handle spatial
328 and temporal overlaps.
330 Parameters
331 ----------
332 dimensions : `~lsst.daf.butler.dimensions.DimensionGroup`
333 Full dimensions of all tables to be joined into the query (even if
334 they are not included in the query results).
335 predicate : `~lsst.daf.butler.queries.tree.Predicate`
336 Boolean column expression that may contain user-provided spatial
337 and/or temporal overlaps intermixed with other constraints.
338 join_operands : `~collections.abc.Iterable` [ \
339 `~lsst.daf.butler.dimensions.DimensionGroup` ]
340 Dimensions of tables or subqueries that are already going to be
341 joined into the query that may establish their own spatial or
342 temporal relationships (e.g. a dataset search with both ``visit``
343 and ``patch`` dimensions).
344 calibration_dataset_types : `~collections.abc.Set` [ `str` or \
345 `~lsst.daf.butler.queries.tree.AnyDatasetType` ]
346 The names of dataset types that have been joined into the query via
347 a search that includes at least one calibration collection.
348 allow_duplicates : `bool`
349 If set to `True` then query will be allowed to return non-distinct
350 rows.
351 constraint_data_id : `~collections.abc.Mapping` [`str`, `int` | `str`]
352 Dimension values that are known to be common to all rows in the
353 query result set.
355 Returns
356 -------
357 predicate : `lsst.daf.butler.queries.tree.Predicate`
358 A version of the given predicate that preserves the overall
359 behavior of the filter while possibly rewriting overlap expressions
360 that have been partially moved into ``builder`` as some combination
361 of new nested predicates, joins, and postprocessing.
362 builder : `~lsst.daf.butler.direct_query_driver.SqlSelectBuilder`
363 A query-construction helper object that includes any initial joins
364 and postprocessing needed to handle overlap expression extracted
365 from the original predicate.
366 postprocessing : `Postprocessing`
367 Struct representing post-query processing to be done in Python.
369 Notes
370 -----
371 Implementations must delegate to `.queries.overlaps.OverlapsVisitor`
372 (possibly by subclassing it) to ensure "automatic" spatial and temporal
373 joins are added consistently by all query-construction implementations.
374 """
375 raise NotImplementedError()
377 universe: DimensionUniverse
378 """Universe of all dimensions and dimension elements known to the
379 `Registry` (`DimensionUniverse`).
380 """