Coverage for python/lsst/daf/butler/registry/dimensions/static.py: 92%
153 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-02-01 10:03 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2023-02-01 10:03 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23import itertools
24from collections import defaultdict
25from collections.abc import Mapping, Set
26from typing import TYPE_CHECKING
28import sqlalchemy
29from lsst.daf.relation import Relation
31from ...core import (
32 DatabaseDimensionElement,
33 DatabaseTopologicalFamily,
34 DimensionElement,
35 DimensionGraph,
36 DimensionUniverse,
37 GovernorDimension,
38 NamedKeyDict,
39 SkyPixDimension,
40 ddl,
41)
42from .._exceptions import MissingSpatialOverlapError
43from ..interfaces import (
44 Database,
45 DatabaseDimensionOverlapStorage,
46 DatabaseDimensionRecordStorage,
47 DimensionRecordStorage,
48 DimensionRecordStorageManager,
49 GovernorDimensionRecordStorage,
50 StaticTablesContext,
51 VersionTuple,
52)
54if TYPE_CHECKING: 54 ↛ 55line 54 didn't jump to line 55, because the condition on line 54 was never true
55 from .. import queries
58# This has to be updated on every schema change
59_VERSION = VersionTuple(6, 0, 2)
62class StaticDimensionRecordStorageManager(DimensionRecordStorageManager):
63 """An implementation of `DimensionRecordStorageManager` for single-layer
64 `Registry` and the base layers of multi-layer `Registry`.
66 This manager creates `DimensionRecordStorage` instances for all elements
67 in the `DimensionUniverse` in its own `initialize` method, as part of
68 static table creation, so it never needs to manage any dynamic registry
69 tables.
71 Parameters
72 ----------
73 db : `Database`
74 Interface to the underlying database engine and namespace.
75 records : `NamedKeyDict`
76 Mapping from `DimensionElement` to `DimensionRecordStorage` for that
77 element.
78 overlaps : `list` [ `DatabaseDimensionOverlapStorage` ]
79 Objects that manage materialized overlaps between database-backed
80 dimensions.
81 dimensionGraphStorage : `_DimensionGraphStorage`
82 Object that manages saved `DimensionGraph` definitions.
83 universe : `DimensionUniverse`
84 All known dimensions.
85 """
87 def __init__(
88 self,
89 db: Database,
90 *,
91 records: NamedKeyDict[DimensionElement, DimensionRecordStorage],
92 overlaps: dict[
93 tuple[DatabaseDimensionElement, DatabaseDimensionElement], DatabaseDimensionOverlapStorage
94 ],
95 dimensionGraphStorage: _DimensionGraphStorage,
96 universe: DimensionUniverse,
97 ):
98 super().__init__(universe=universe)
99 self._db = db
100 self._records = records
101 self._overlaps = overlaps
102 self._dimensionGraphStorage = dimensionGraphStorage
104 @classmethod
105 def initialize(
106 cls, db: Database, context: StaticTablesContext, *, universe: DimensionUniverse
107 ) -> DimensionRecordStorageManager:
108 # Docstring inherited from DimensionRecordStorageManager.
109 # Start by initializing governor dimensions; those go both in the main
110 # 'records' mapping we'll pass to init, and a local dictionary that we
111 # can pass in when initializing storage for DatabaseDimensionElements.
112 governors = NamedKeyDict[GovernorDimension, GovernorDimensionRecordStorage]()
113 records = NamedKeyDict[DimensionElement, DimensionRecordStorage]()
114 for dimension in universe.getGovernorDimensions():
115 governorStorage = dimension.makeStorage(db, context=context)
116 governors[dimension] = governorStorage
117 records[dimension] = governorStorage
118 # Next we initialize storage for DatabaseDimensionElements. Some
119 # elements' storage may be views into anothers; we'll do a first pass
120 # to gather a mapping from the names of those targets back to their
121 # views.
122 view_targets = {
123 element.viewOf: element
124 for element in universe.getDatabaseElements()
125 if element.viewOf is not None
126 }
127 # We remember the spatial ones (grouped by family) so we can go back
128 # and initialize overlap storage for them later.
129 spatial = NamedKeyDict[DatabaseTopologicalFamily, list[DatabaseDimensionRecordStorage]]()
130 for element in universe.getDatabaseElements():
131 if element.viewOf is not None:
132 # We'll initialize this storage when the view's target is
133 # initialized.
134 continue
135 elementStorage = element.makeStorage(db, context=context, governors=governors)
136 records[element] = elementStorage
137 if element.spatial is not None:
138 spatial.setdefault(element.spatial, []).append(elementStorage)
139 if (view_element := view_targets.get(element.name)) is not None:
140 view_element_storage = view_element.makeStorage(
141 db,
142 context=context,
143 governors=governors,
144 view_target=elementStorage,
145 )
146 records[view_element] = view_element_storage
147 if view_element.spatial is not None: 147 ↛ 148line 147 didn't jump to line 148, because the condition on line 147 was never true
148 spatial.setdefault(view_element.spatial, []).append(view_element_storage)
150 # Finally we initialize overlap storage. The implementation class for
151 # this is currently hard-coded (it's not obvious there will ever be
152 # others). Note that overlaps between database-backed dimensions and
153 # skypix dimensions is internal to `DatabaseDimensionRecordStorage`,
154 # and hence is not included here.
155 from ..dimensions.overlaps import CrossFamilyDimensionOverlapStorage
157 overlaps: dict[
158 tuple[DatabaseDimensionElement, DatabaseDimensionElement], DatabaseDimensionOverlapStorage
159 ] = {}
160 for (family1, storages1), (family2, storages2) in itertools.combinations(spatial.items(), 2):
161 for elementStoragePair in itertools.product(storages1, storages2):
162 governorStoragePair = (governors[family1.governor], governors[family2.governor])
163 if elementStoragePair[0].element > elementStoragePair[1].element: 163 ↛ 164line 163 didn't jump to line 164, because the condition on line 163 was never true
164 elementStoragePair = (elementStoragePair[1], elementStoragePair[0])
165 governorStoragePair = (governorStoragePair[1], governorStoragePair[1])
166 overlapStorage = CrossFamilyDimensionOverlapStorage.initialize(
167 db,
168 elementStoragePair,
169 governorStoragePair,
170 context=context,
171 )
172 elementStoragePair[0].connect(overlapStorage)
173 elementStoragePair[1].connect(overlapStorage)
174 overlaps[overlapStorage.elements] = overlapStorage
175 # Create table that stores DimensionGraph definitions.
176 dimensionGraphStorage = _DimensionGraphStorage.initialize(db, context, universe=universe)
177 return cls(
178 db=db,
179 records=records,
180 universe=universe,
181 overlaps=overlaps,
182 dimensionGraphStorage=dimensionGraphStorage,
183 )
185 def get(self, element: DimensionElement | str) -> DimensionRecordStorage | None:
186 # Docstring inherited from DimensionRecordStorageManager.
187 r = self._records.get(element)
188 if r is None:
189 if isinstance(element, str):
190 element = self.universe[element]
191 if isinstance(element, SkyPixDimension): 191 ↛ 193line 191 didn't jump to line 193, because the condition on line 191 was never false
192 return self.universe.skypix[element.system][element.level].makeStorage()
193 return r
195 def register(self, element: DimensionElement) -> DimensionRecordStorage:
196 # Docstring inherited from DimensionRecordStorageManager.
197 result = self.get(element)
198 assert result, "All records instances should be created in initialize()."
199 return result
201 def saveDimensionGraph(self, graph: DimensionGraph) -> int:
202 # Docstring inherited from DimensionRecordStorageManager.
203 return self._dimensionGraphStorage.save(graph)
205 def loadDimensionGraph(self, key: int) -> DimensionGraph:
206 # Docstring inherited from DimensionRecordStorageManager.
207 return self._dimensionGraphStorage.load(key)
209 def clearCaches(self) -> None:
210 # Docstring inherited from DimensionRecordStorageManager.
211 for storage in self._records.values():
212 storage.clearCaches()
214 def make_spatial_join_relation(
215 self,
216 element1: str,
217 element2: str,
218 context: queries.SqlQueryContext,
219 governor_constraints: Mapping[str, Set[str]],
220 ) -> tuple[Relation, bool]:
221 # Docstring inherited.
222 storage1 = self[element1]
223 storage2 = self[element2]
224 overlaps: Relation | None = None
225 needs_refinement: bool = False
226 match (storage1, storage2):
227 case [
228 DatabaseDimensionRecordStorage() as db_storage1,
229 DatabaseDimensionRecordStorage() as db_storage2,
230 ]:
231 # Construction guarantees that we only need to try this in one
232 # direction; either both storage objects know about the other
233 # or neither do.
234 overlaps = db_storage1.make_spatial_join_relation(
235 db_storage2.element, context, governor_constraints
236 )
237 if overlaps is None: 237 ↛ 255line 237 didn't jump to line 255, because the condition on line 237 was never false
238 # No direct materialized overlaps; use commonSkyPix as an
239 # intermediary.
240 common_skypix_overlap1 = db_storage1.make_spatial_join_relation(
241 self.universe.commonSkyPix, context, governor_constraints
242 )
243 common_skypix_overlap2 = db_storage2.make_spatial_join_relation(
244 self.universe.commonSkyPix, context, governor_constraints
245 )
246 assert (
247 common_skypix_overlap1 is not None and common_skypix_overlap2 is not None
248 ), "Overlaps with the common skypix dimension should always be available,"
249 overlaps = common_skypix_overlap1.join(common_skypix_overlap2)
250 needs_refinement = True
251 case [DatabaseDimensionRecordStorage() as db_storage, other]:
252 overlaps = db_storage.make_spatial_join_relation(other.element, context, governor_constraints)
253 case [other, DatabaseDimensionRecordStorage() as db_storage]: 253 ↛ 255line 253 didn't jump to line 255, because the pattern on line 253 never matched
254 overlaps = db_storage.make_spatial_join_relation(other.element, context, governor_constraints)
255 if overlaps is None: 255 ↛ 269line 255 didn't jump to line 269, because the condition on line 255 was never true
256 # In the future, there's a lot more we could try here:
257 #
258 # - for skypix dimensions, looking for materialized overlaps at
259 # smaller spatial scales (higher-levels) and using bit-shifting;
260 #
261 # - for non-skypix dimensions, looking for materialized overlaps
262 # for more finer-grained members of the same family, and then
263 # doing SELECT DISTINCT (or even tolerating duplicates) on the
264 # columns we care about (e.g. use patch overlaps to satisfy a
265 # request for tract overlaps).
266 #
267 # It's not obvious that's better than just telling the user to
268 # materialize more overlaps, though.
269 raise MissingSpatialOverlapError(
270 f"No materialized overlaps for spatial join between {element1!r} and {element2!r}."
271 )
272 return overlaps, needs_refinement
274 @classmethod
275 def currentVersion(cls) -> VersionTuple | None:
276 # Docstring inherited from VersionedExtension.
277 return _VERSION
279 def schemaDigest(self) -> str | None:
280 # Docstring inherited from VersionedExtension.
281 tables: list[sqlalchemy.schema.Table] = []
282 for recStorage in self._records.values():
283 tables += recStorage.digestTables()
284 for overlapStorage in self._overlaps.values():
285 tables += overlapStorage.digestTables()
286 return self._defaultSchemaDigest(tables, self._db.dialect)
289class _DimensionGraphStorage:
290 """Helper object that manages saved DimensionGraph definitions.
292 Should generally be constructed by calling `initialize` instead of invoking
293 the constructor directly.
295 Parameters
296 ----------
297 db : `Database`
298 Interface to the underlying database engine and namespace.
299 idTable : `sqlalchemy.schema.Table`
300 Table that just holds unique IDs for dimension graphs.
301 definitionTable : `sqlalchemy.schema.Table`
302 Table that maps dimension names to the IDs of the dimension graphs to
303 which they belong.
304 universe : `DimensionUniverse`
305 All known dimensions.
306 """
308 def __init__(
309 self,
310 db: Database,
311 idTable: sqlalchemy.schema.Table,
312 definitionTable: sqlalchemy.schema.Table,
313 universe: DimensionUniverse,
314 ):
315 self._db = db
316 self._idTable = idTable
317 self._definitionTable = definitionTable
318 self._universe = universe
319 self._keysByGraph: dict[DimensionGraph, int] = {universe.empty: 0}
320 self._graphsByKey: dict[int, DimensionGraph] = {0: universe.empty}
322 @classmethod
323 def initialize(
324 cls,
325 db: Database,
326 context: StaticTablesContext,
327 *,
328 universe: DimensionUniverse,
329 ) -> _DimensionGraphStorage:
330 """Construct a new instance, including creating tables if necessary.
332 Parameters
333 ----------
334 db : `Database`
335 Interface to the underlying database engine and namespace.
336 context : `StaticTablesContext`
337 Context object obtained from `Database.declareStaticTables`; used
338 to declare any tables that should always be present.
339 universe : `DimensionUniverse`
340 All known dimensions.
342 Returns
343 -------
344 storage : `_DimensionGraphStorage`
345 New instance of this class.
346 """
347 # We need two tables just so we have one where the autoincrement key is
348 # the only primary key column, as is required by (at least) SQLite. In
349 # other databases, we might be able to use a Sequence directly.
350 idTable = context.addTable(
351 "dimension_graph_key",
352 ddl.TableSpec(
353 fields=[
354 ddl.FieldSpec(
355 name="id",
356 dtype=sqlalchemy.BigInteger,
357 autoincrement=True,
358 primaryKey=True,
359 ),
360 ],
361 ),
362 )
363 definitionTable = context.addTable(
364 "dimension_graph_definition",
365 ddl.TableSpec(
366 fields=[
367 ddl.FieldSpec(name="dimension_graph_id", dtype=sqlalchemy.BigInteger, primaryKey=True),
368 ddl.FieldSpec(name="dimension_name", dtype=sqlalchemy.Text, primaryKey=True),
369 ],
370 foreignKeys=[
371 ddl.ForeignKeySpec(
372 "dimension_graph_key",
373 source=("dimension_graph_id",),
374 target=("id",),
375 onDelete="CASCADE",
376 ),
377 ],
378 ),
379 )
380 return cls(db, idTable, definitionTable, universe=universe)
382 def refresh(self) -> None:
383 """Refresh the in-memory cache of saved DimensionGraph definitions.
385 This should be done automatically whenever needed, but it can also
386 be called explicitly.
387 """
388 dimensionNamesByKey: dict[int, set[str]] = defaultdict(set)
389 with self._db.query(self._definitionTable.select()) as sql_result:
390 sql_rows = sql_result.mappings().fetchall()
391 for row in sql_rows:
392 key = row[self._definitionTable.columns.dimension_graph_id]
393 dimensionNamesByKey[key].add(row[self._definitionTable.columns.dimension_name])
394 keysByGraph: dict[DimensionGraph, int] = {self._universe.empty: 0}
395 graphsByKey: dict[int, DimensionGraph] = {0: self._universe.empty}
396 for key, dimensionNames in dimensionNamesByKey.items():
397 graph = DimensionGraph(self._universe, names=dimensionNames)
398 keysByGraph[graph] = key
399 graphsByKey[key] = graph
400 self._graphsByKey = graphsByKey
401 self._keysByGraph = keysByGraph
403 def save(self, graph: DimensionGraph) -> int:
404 """Save a `DimensionGraph` definition to the database, allowing it to
405 be retrieved later via the returned key.
407 Parameters
408 ----------
409 graph : `DimensionGraph`
410 Set of dimensions to save.
412 Returns
413 -------
414 key : `int`
415 Integer used as the unique key for this `DimensionGraph` in the
416 database.
417 """
418 key = self._keysByGraph.get(graph)
419 if key is not None:
420 return key
421 # Lock tables and then refresh to guard against races where some other
422 # process is trying to register the exact same dimension graph. This
423 # is probably not the most efficient way to do it, but it should be a
424 # rare operation, especially since the short-circuit above will usually
425 # work in long-lived data repositories.
426 with self._db.transaction(lock=[self._idTable, self._definitionTable]):
427 self.refresh()
428 key = self._keysByGraph.get(graph)
429 if key is None: 429 ↛ 435line 429 didn't jump to line 435, because the condition on line 429 was never false
430 (key,) = self._db.insert(self._idTable, {}, returnIds=True) # type: ignore
431 self._db.insert(
432 self._definitionTable,
433 *[{"dimension_graph_id": key, "dimension_name": name} for name in graph.required.names],
434 )
435 self._keysByGraph[graph] = key
436 self._graphsByKey[key] = graph
437 return key
439 def load(self, key: int) -> DimensionGraph:
440 """Retrieve a `DimensionGraph` that was previously saved in the
441 database.
443 Parameters
444 ----------
445 key : `int`
446 Integer used as the unique key for this `DimensionGraph` in the
447 database.
449 Returns
450 -------
451 graph : `DimensionGraph`
452 Retrieved graph.
453 """
454 graph = self._graphsByKey.get(key)
455 if graph is None:
456 self.refresh()
457 graph = self._graphsByKey[key]
458 return graph