Coverage for python/lsst/daf/butler/registry/dimensions/static.py: 93%
144 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-03-31 02:41 -0700
« prev ^ index » next coverage.py v6.5.0, created at 2023-03-31 02:41 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23import itertools
24from collections import defaultdict
25from collections.abc import Mapping, Set
26from typing import TYPE_CHECKING
28import sqlalchemy
29from lsst.daf.relation import Relation
31from ...core import (
32 DatabaseDimensionElement,
33 DatabaseTopologicalFamily,
34 DimensionElement,
35 DimensionGraph,
36 DimensionUniverse,
37 GovernorDimension,
38 NamedKeyDict,
39 SkyPixDimension,
40 ddl,
41)
42from .._exceptions import MissingSpatialOverlapError
43from ..interfaces import (
44 Database,
45 DatabaseDimensionOverlapStorage,
46 DatabaseDimensionRecordStorage,
47 DimensionRecordStorage,
48 DimensionRecordStorageManager,
49 GovernorDimensionRecordStorage,
50 StaticTablesContext,
51 VersionTuple,
52)
54if TYPE_CHECKING:
55 from .. import queries
58# This has to be updated on every schema change
59_VERSION = VersionTuple(6, 0, 2)
62class StaticDimensionRecordStorageManager(DimensionRecordStorageManager):
63 """An implementation of `DimensionRecordStorageManager` for single-layer
64 `Registry` and the base layers of multi-layer `Registry`.
66 This manager creates `DimensionRecordStorage` instances for all elements
67 in the `DimensionUniverse` in its own `initialize` method, as part of
68 static table creation, so it never needs to manage any dynamic registry
69 tables.
71 Parameters
72 ----------
73 db : `Database`
74 Interface to the underlying database engine and namespace.
75 records : `NamedKeyDict`
76 Mapping from `DimensionElement` to `DimensionRecordStorage` for that
77 element.
78 overlaps : `list` [ `DatabaseDimensionOverlapStorage` ]
79 Objects that manage materialized overlaps between database-backed
80 dimensions.
81 dimensionGraphStorage : `_DimensionGraphStorage`
82 Object that manages saved `DimensionGraph` definitions.
83 universe : `DimensionUniverse`
84 All known dimensions.
85 """
87 def __init__(
88 self,
89 db: Database,
90 *,
91 records: NamedKeyDict[DimensionElement, DimensionRecordStorage],
92 overlaps: dict[
93 tuple[DatabaseDimensionElement, DatabaseDimensionElement], DatabaseDimensionOverlapStorage
94 ],
95 dimensionGraphStorage: _DimensionGraphStorage,
96 universe: DimensionUniverse,
97 registry_schema_version: VersionTuple | None = None,
98 ):
99 super().__init__(universe=universe, registry_schema_version=registry_schema_version)
100 self._db = db
101 self._records = records
102 self._overlaps = overlaps
103 self._dimensionGraphStorage = dimensionGraphStorage
105 @classmethod
106 def initialize(
107 cls,
108 db: Database,
109 context: StaticTablesContext,
110 *,
111 universe: DimensionUniverse,
112 registry_schema_version: VersionTuple | None = None,
113 ) -> DimensionRecordStorageManager:
114 # Docstring inherited from DimensionRecordStorageManager.
115 # Start by initializing governor dimensions; those go both in the main
116 # 'records' mapping we'll pass to init, and a local dictionary that we
117 # can pass in when initializing storage for DatabaseDimensionElements.
118 governors = NamedKeyDict[GovernorDimension, GovernorDimensionRecordStorage]()
119 records = NamedKeyDict[DimensionElement, DimensionRecordStorage]()
120 for dimension in universe.getGovernorDimensions():
121 governorStorage = dimension.makeStorage(db, context=context)
122 governors[dimension] = governorStorage
123 records[dimension] = governorStorage
124 # Next we initialize storage for DatabaseDimensionElements. Some
125 # elements' storage may be views into anothers; we'll do a first pass
126 # to gather a mapping from the names of those targets back to their
127 # views.
128 view_targets = {
129 element.viewOf: element
130 for element in universe.getDatabaseElements()
131 if element.viewOf is not None
132 }
133 # We remember the spatial ones (grouped by family) so we can go back
134 # and initialize overlap storage for them later.
135 spatial = NamedKeyDict[DatabaseTopologicalFamily, list[DatabaseDimensionRecordStorage]]()
136 for element in universe.getDatabaseElements():
137 if element.viewOf is not None:
138 # We'll initialize this storage when the view's target is
139 # initialized.
140 continue
141 elementStorage = element.makeStorage(db, context=context, governors=governors)
142 records[element] = elementStorage
143 if element.spatial is not None:
144 spatial.setdefault(element.spatial, []).append(elementStorage)
145 if (view_element := view_targets.get(element.name)) is not None:
146 view_element_storage = view_element.makeStorage(
147 db,
148 context=context,
149 governors=governors,
150 view_target=elementStorage,
151 )
152 records[view_element] = view_element_storage
153 if view_element.spatial is not None: 153 ↛ 154line 153 didn't jump to line 154, because the condition on line 153 was never true
154 spatial.setdefault(view_element.spatial, []).append(view_element_storage)
156 # Finally we initialize overlap storage. The implementation class for
157 # this is currently hard-coded (it's not obvious there will ever be
158 # others). Note that overlaps between database-backed dimensions and
159 # skypix dimensions is internal to `DatabaseDimensionRecordStorage`,
160 # and hence is not included here.
161 from ..dimensions.overlaps import CrossFamilyDimensionOverlapStorage
163 overlaps: dict[
164 tuple[DatabaseDimensionElement, DatabaseDimensionElement], DatabaseDimensionOverlapStorage
165 ] = {}
166 for (family1, storages1), (family2, storages2) in itertools.combinations(spatial.items(), 2):
167 for elementStoragePair in itertools.product(storages1, storages2):
168 governorStoragePair = (governors[family1.governor], governors[family2.governor])
169 if elementStoragePair[0].element > elementStoragePair[1].element: 169 ↛ 170line 169 didn't jump to line 170, because the condition on line 169 was never true
170 elementStoragePair = (elementStoragePair[1], elementStoragePair[0])
171 governorStoragePair = (governorStoragePair[1], governorStoragePair[1])
172 overlapStorage = CrossFamilyDimensionOverlapStorage.initialize(
173 db,
174 elementStoragePair,
175 governorStoragePair,
176 context=context,
177 )
178 elementStoragePair[0].connect(overlapStorage)
179 elementStoragePair[1].connect(overlapStorage)
180 overlaps[overlapStorage.elements] = overlapStorage
181 # Create table that stores DimensionGraph definitions.
182 dimensionGraphStorage = _DimensionGraphStorage.initialize(db, context, universe=universe)
183 return cls(
184 db=db,
185 records=records,
186 universe=universe,
187 overlaps=overlaps,
188 dimensionGraphStorage=dimensionGraphStorage,
189 registry_schema_version=registry_schema_version,
190 )
192 def get(self, element: DimensionElement | str) -> DimensionRecordStorage | None:
193 # Docstring inherited from DimensionRecordStorageManager.
194 r = self._records.get(element)
195 if r is None:
196 if isinstance(element, str):
197 element = self.universe[element]
198 if isinstance(element, SkyPixDimension): 198 ↛ 200line 198 didn't jump to line 200, because the condition on line 198 was never false
199 return self.universe.skypix[element.system][element.level].makeStorage()
200 return r
202 def register(self, element: DimensionElement) -> DimensionRecordStorage:
203 # Docstring inherited from DimensionRecordStorageManager.
204 result = self.get(element)
205 assert result, "All records instances should be created in initialize()."
206 return result
208 def saveDimensionGraph(self, graph: DimensionGraph) -> int:
209 # Docstring inherited from DimensionRecordStorageManager.
210 return self._dimensionGraphStorage.save(graph)
212 def loadDimensionGraph(self, key: int) -> DimensionGraph:
213 # Docstring inherited from DimensionRecordStorageManager.
214 return self._dimensionGraphStorage.load(key)
216 def clearCaches(self) -> None:
217 # Docstring inherited from DimensionRecordStorageManager.
218 for storage in self._records.values():
219 storage.clearCaches()
221 def make_spatial_join_relation(
222 self,
223 element1: str,
224 element2: str,
225 context: queries.SqlQueryContext,
226 governor_constraints: Mapping[str, Set[str]],
227 ) -> tuple[Relation, bool]:
228 # Docstring inherited.
229 storage1 = self[element1]
230 storage2 = self[element2]
231 overlaps: Relation | None = None
232 needs_refinement: bool = False
233 match (storage1, storage2):
234 case [
235 DatabaseDimensionRecordStorage() as db_storage1,
236 DatabaseDimensionRecordStorage() as db_storage2,
237 ]:
238 # Construction guarantees that we only need to try this in one
239 # direction; either both storage objects know about the other
240 # or neither do.
241 overlaps = db_storage1.make_spatial_join_relation(
242 db_storage2.element, context, governor_constraints
243 )
244 if overlaps is None: 244 ↛ 262line 244 didn't jump to line 262, because the condition on line 244 was never false
245 # No direct materialized overlaps; use commonSkyPix as an
246 # intermediary.
247 common_skypix_overlap1 = db_storage1.make_spatial_join_relation(
248 self.universe.commonSkyPix, context, governor_constraints
249 )
250 common_skypix_overlap2 = db_storage2.make_spatial_join_relation(
251 self.universe.commonSkyPix, context, governor_constraints
252 )
253 assert (
254 common_skypix_overlap1 is not None and common_skypix_overlap2 is not None
255 ), "Overlaps with the common skypix dimension should always be available,"
256 overlaps = common_skypix_overlap1.join(common_skypix_overlap2)
257 needs_refinement = True
258 case [DatabaseDimensionRecordStorage() as db_storage, other]: 258 ↛ 259line 258 didn't jump to line 259, because the pattern on line 258 never matched
259 overlaps = db_storage.make_spatial_join_relation(other.element, context, governor_constraints)
260 case [other, DatabaseDimensionRecordStorage() as db_storage]: 260 ↛ 262line 260 didn't jump to line 262, because the pattern on line 260 never matched
261 overlaps = db_storage.make_spatial_join_relation(other.element, context, governor_constraints)
262 if overlaps is None:
263 # In the future, there's a lot more we could try here:
264 #
265 # - for skypix dimensions, looking for materialized overlaps at
266 # smaller spatial scales (higher-levels) and using bit-shifting;
267 #
268 # - for non-skypix dimensions, looking for materialized overlaps
269 # for more finer-grained members of the same family, and then
270 # doing SELECT DISTINCT (or even tolerating duplicates) on the
271 # columns we care about (e.g. use patch overlaps to satisfy a
272 # request for tract overlaps).
273 #
274 # It's not obvious that's better than just telling the user to
275 # materialize more overlaps, though.
276 raise MissingSpatialOverlapError(
277 f"No materialized overlaps for spatial join between {element1!r} and {element2!r}."
278 )
279 return overlaps, needs_refinement
281 @classmethod
282 def currentVersions(cls) -> list[VersionTuple]:
283 # Docstring inherited from VersionedExtension.
284 return [_VERSION]
287class _DimensionGraphStorage:
288 """Helper object that manages saved DimensionGraph definitions.
290 Should generally be constructed by calling `initialize` instead of invoking
291 the constructor directly.
293 Parameters
294 ----------
295 db : `Database`
296 Interface to the underlying database engine and namespace.
297 idTable : `sqlalchemy.schema.Table`
298 Table that just holds unique IDs for dimension graphs.
299 definitionTable : `sqlalchemy.schema.Table`
300 Table that maps dimension names to the IDs of the dimension graphs to
301 which they belong.
302 universe : `DimensionUniverse`
303 All known dimensions.
304 """
306 def __init__(
307 self,
308 db: Database,
309 idTable: sqlalchemy.schema.Table,
310 definitionTable: sqlalchemy.schema.Table,
311 universe: DimensionUniverse,
312 ):
313 self._db = db
314 self._idTable = idTable
315 self._definitionTable = definitionTable
316 self._universe = universe
317 self._keysByGraph: dict[DimensionGraph, int] = {universe.empty: 0}
318 self._graphsByKey: dict[int, DimensionGraph] = {0: universe.empty}
320 @classmethod
321 def initialize(
322 cls,
323 db: Database,
324 context: StaticTablesContext,
325 *,
326 universe: DimensionUniverse,
327 ) -> _DimensionGraphStorage:
328 """Construct a new instance, including creating tables if necessary.
330 Parameters
331 ----------
332 db : `Database`
333 Interface to the underlying database engine and namespace.
334 context : `StaticTablesContext`
335 Context object obtained from `Database.declareStaticTables`; used
336 to declare any tables that should always be present.
337 universe : `DimensionUniverse`
338 All known dimensions.
340 Returns
341 -------
342 storage : `_DimensionGraphStorage`
343 New instance of this class.
344 """
345 # We need two tables just so we have one where the autoincrement key is
346 # the only primary key column, as is required by (at least) SQLite. In
347 # other databases, we might be able to use a Sequence directly.
348 idTable = context.addTable(
349 "dimension_graph_key",
350 ddl.TableSpec(
351 fields=[
352 ddl.FieldSpec(
353 name="id",
354 dtype=sqlalchemy.BigInteger,
355 autoincrement=True,
356 primaryKey=True,
357 ),
358 ],
359 ),
360 )
361 definitionTable = context.addTable(
362 "dimension_graph_definition",
363 ddl.TableSpec(
364 fields=[
365 ddl.FieldSpec(name="dimension_graph_id", dtype=sqlalchemy.BigInteger, primaryKey=True),
366 ddl.FieldSpec(name="dimension_name", dtype=sqlalchemy.Text, primaryKey=True),
367 ],
368 foreignKeys=[
369 ddl.ForeignKeySpec(
370 "dimension_graph_key",
371 source=("dimension_graph_id",),
372 target=("id",),
373 onDelete="CASCADE",
374 ),
375 ],
376 ),
377 )
378 return cls(db, idTable, definitionTable, universe=universe)
380 def refresh(self) -> None:
381 """Refresh the in-memory cache of saved DimensionGraph definitions.
383 This should be done automatically whenever needed, but it can also
384 be called explicitly.
385 """
386 dimensionNamesByKey: dict[int, set[str]] = defaultdict(set)
387 with self._db.query(self._definitionTable.select()) as sql_result:
388 sql_rows = sql_result.mappings().fetchall()
389 for row in sql_rows:
390 key = row[self._definitionTable.columns.dimension_graph_id]
391 dimensionNamesByKey[key].add(row[self._definitionTable.columns.dimension_name])
392 keysByGraph: dict[DimensionGraph, int] = {self._universe.empty: 0}
393 graphsByKey: dict[int, DimensionGraph] = {0: self._universe.empty}
394 for key, dimensionNames in dimensionNamesByKey.items():
395 graph = DimensionGraph(self._universe, names=dimensionNames)
396 keysByGraph[graph] = key
397 graphsByKey[key] = graph
398 self._graphsByKey = graphsByKey
399 self._keysByGraph = keysByGraph
401 def save(self, graph: DimensionGraph) -> int:
402 """Save a `DimensionGraph` definition to the database, allowing it to
403 be retrieved later via the returned key.
405 Parameters
406 ----------
407 graph : `DimensionGraph`
408 Set of dimensions to save.
410 Returns
411 -------
412 key : `int`
413 Integer used as the unique key for this `DimensionGraph` in the
414 database.
415 """
416 key = self._keysByGraph.get(graph)
417 if key is not None:
418 return key
419 # Lock tables and then refresh to guard against races where some other
420 # process is trying to register the exact same dimension graph. This
421 # is probably not the most efficient way to do it, but it should be a
422 # rare operation, especially since the short-circuit above will usually
423 # work in long-lived data repositories.
424 with self._db.transaction(lock=[self._idTable, self._definitionTable]):
425 self.refresh()
426 key = self._keysByGraph.get(graph)
427 if key is None: 427 ↛ 433line 427 didn't jump to line 433, because the condition on line 427 was never false
428 (key,) = self._db.insert(self._idTable, {}, returnIds=True) # type: ignore
429 self._db.insert(
430 self._definitionTable,
431 *[{"dimension_graph_id": key, "dimension_name": name} for name in graph.required.names],
432 )
433 self._keysByGraph[graph] = key
434 self._graphsByKey[key] = graph
435 return key
437 def load(self, key: int) -> DimensionGraph:
438 """Retrieve a `DimensionGraph` that was previously saved in the
439 database.
441 Parameters
442 ----------
443 key : `int`
444 Integer used as the unique key for this `DimensionGraph` in the
445 database.
447 Returns
448 -------
449 graph : `DimensionGraph`
450 Retrieved graph.
451 """
452 graph = self._graphsByKey.get(key)
453 if graph is None:
454 self.refresh()
455 graph = self._graphsByKey[key]
456 return graph