Coverage for python/lsst/daf/butler/registry/dimensions/static.py: 96%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23import itertools
24from collections import defaultdict
25from typing import Dict, List, Optional, Set, Tuple
27import sqlalchemy
29from ...core import (
30 DatabaseDimensionElement,
31 DatabaseTopologicalFamily,
32 DimensionElement,
33 DimensionGraph,
34 DimensionUniverse,
35 GovernorDimension,
36 NamedKeyDict,
37 SkyPixDimension,
38 ddl,
39)
40from ..interfaces import (
41 Database,
42 DatabaseDimensionOverlapStorage,
43 DatabaseDimensionRecordStorage,
44 DimensionRecordStorage,
45 DimensionRecordStorageManager,
46 GovernorDimensionRecordStorage,
47 StaticTablesContext,
48 VersionTuple,
49)
51# This has to be updated on every schema change
52_VERSION = VersionTuple(6, 0, 1)
55class StaticDimensionRecordStorageManager(DimensionRecordStorageManager):
56 """An implementation of `DimensionRecordStorageManager` for single-layer
57 `Registry` and the base layers of multi-layer `Registry`.
59 This manager creates `DimensionRecordStorage` instances for all elements
60 in the `DimensionUniverse` in its own `initialize` method, as part of
61 static table creation, so it never needs to manage any dynamic registry
62 tables.
64 Parameters
65 ----------
66 db : `Database`
67 Interface to the underlying database engine and namespace.
68 records : `NamedKeyDict`
69 Mapping from `DimensionElement` to `DimensionRecordStorage` for that
70 element.
71 overlaps : `list` [ `DatabaseDimensionOverlapStorage` ]
72 Objects that manage materialized overlaps between database-backed
73 dimensions.
74 dimensionGraphStorage : `_DimensionGraphStorage`
75 Object that manages saved `DimensionGraph` definitions.
76 universe : `DimensionUniverse`
77 All known dimensions.
78 """
80 def __init__(
81 self,
82 db: Database,
83 *,
84 records: NamedKeyDict[DimensionElement, DimensionRecordStorage],
85 overlaps: Dict[
86 Tuple[DatabaseDimensionElement, DatabaseDimensionElement], DatabaseDimensionOverlapStorage
87 ],
88 dimensionGraphStorage: _DimensionGraphStorage,
89 universe: DimensionUniverse,
90 ):
91 super().__init__(universe=universe)
92 self._db = db
93 self._records = records
94 self._overlaps = overlaps
95 self._dimensionGraphStorage = dimensionGraphStorage
97 @classmethod
98 def initialize(
99 cls, db: Database, context: StaticTablesContext, *, universe: DimensionUniverse
100 ) -> DimensionRecordStorageManager:
101 # Docstring inherited from DimensionRecordStorageManager.
102 # Start by initializing governor dimensions; those go both in the main
103 # 'records' mapping we'll pass to init, and a local dictionary that we
104 # can pass in when initializing storage for DatabaseDimensionElements.
105 governors = NamedKeyDict[GovernorDimension, GovernorDimensionRecordStorage]()
106 records = NamedKeyDict[DimensionElement, DimensionRecordStorage]()
107 for dimension in universe.getGovernorDimensions():
108 governorStorage = dimension.makeStorage(db, context=context)
109 governors[dimension] = governorStorage
110 records[dimension] = governorStorage
111 # Next we initialize storage for DatabaseDimensionElements.
112 # We remember the spatial ones (grouped by family) so we can go back
113 # and initialize overlap storage for them later.
114 spatial = NamedKeyDict[DatabaseTopologicalFamily, List[DatabaseDimensionRecordStorage]]()
115 for element in universe.getDatabaseElements():
116 elementStorage = element.makeStorage(db, context=context, governors=governors)
117 records[element] = elementStorage
118 if element.spatial is not None:
119 spatial.setdefault(element.spatial, []).append(elementStorage)
120 # Finally we initialize overlap storage. The implementation class for
121 # this is currently hard-coded (it's not obvious there will ever be
122 # others). Note that overlaps between database-backed dimensions and
123 # skypix dimensions is internal to `DatabaseDimensionRecordStorage`,
124 # and hence is not included here.
125 from ..dimensions.overlaps import CrossFamilyDimensionOverlapStorage
127 overlaps: Dict[
128 Tuple[DatabaseDimensionElement, DatabaseDimensionElement], DatabaseDimensionOverlapStorage
129 ] = {}
130 for (family1, storages1), (family2, storages2) in itertools.combinations(spatial.items(), 2):
131 for elementStoragePair in itertools.product(storages1, storages2):
132 governorStoragePair = (governors[family1.governor], governors[family2.governor])
133 if elementStoragePair[0].element > elementStoragePair[1].element: 133 ↛ 134line 133 didn't jump to line 134, because the condition on line 133 was never true
134 elementStoragePair = (elementStoragePair[1], elementStoragePair[0])
135 governorStoragePair = (governorStoragePair[1], governorStoragePair[1])
136 overlapStorage = CrossFamilyDimensionOverlapStorage.initialize(
137 db,
138 elementStoragePair,
139 governorStoragePair,
140 context=context,
141 )
142 elementStoragePair[0].connect(overlapStorage)
143 elementStoragePair[1].connect(overlapStorage)
144 overlaps[overlapStorage.elements] = overlapStorage
145 # Create table that stores DimensionGraph definitions.
146 dimensionGraphStorage = _DimensionGraphStorage.initialize(db, context, universe=universe)
147 return cls(
148 db=db,
149 records=records,
150 universe=universe,
151 overlaps=overlaps,
152 dimensionGraphStorage=dimensionGraphStorage,
153 )
155 def refresh(self) -> None:
156 # Docstring inherited from DimensionRecordStorageManager.
157 for dimension in self.universe.getGovernorDimensions():
158 storage = self._records[dimension]
159 assert isinstance(storage, GovernorDimensionRecordStorage)
160 storage.refresh()
162 def get(self, element: DimensionElement) -> Optional[DimensionRecordStorage]:
163 # Docstring inherited from DimensionRecordStorageManager.
164 r = self._records.get(element)
165 if r is None and isinstance(element, SkyPixDimension):
166 return self.universe.skypix[element.system][element.level].makeStorage()
167 return r
169 def register(self, element: DimensionElement) -> DimensionRecordStorage:
170 # Docstring inherited from DimensionRecordStorageManager.
171 result = self.get(element)
172 assert result, "All records instances should be created in initialize()."
173 return result
175 def saveDimensionGraph(self, graph: DimensionGraph) -> int:
176 # Docstring inherited from DimensionRecordStorageManager.
177 return self._dimensionGraphStorage.save(graph)
179 def loadDimensionGraph(self, key: int) -> DimensionGraph:
180 # Docstring inherited from DimensionRecordStorageManager.
181 return self._dimensionGraphStorage.load(key)
183 def clearCaches(self) -> None:
184 # Docstring inherited from DimensionRecordStorageManager.
185 for storage in self._records.values():
186 storage.clearCaches()
188 @classmethod
189 def currentVersion(cls) -> Optional[VersionTuple]:
190 # Docstring inherited from VersionedExtension.
191 return _VERSION
193 def schemaDigest(self) -> Optional[str]:
194 # Docstring inherited from VersionedExtension.
195 tables: List[sqlalchemy.schema.Table] = []
196 for recStorage in self._records.values():
197 tables += recStorage.digestTables()
198 for overlapStorage in self._overlaps.values():
199 tables += overlapStorage.digestTables()
200 return self._defaultSchemaDigest(tables, self._db.dialect)
203class _DimensionGraphStorage:
204 """Helper object that manages saved DimensionGraph definitions.
206 Should generally be constructed by calling `initialize` instead of invoking
207 the constructor directly.
209 Parameters
210 ----------
211 db : `Database`
212 Interface to the underlying database engine and namespace.
213 idTable : `sqlalchemy.schema.Table`
214 Table that just holds unique IDs for dimension graphs.
215 definitionTable : `sqlalchemy.schema.Table`
216 Table that maps dimension names to the IDs of the dimension graphs to
217 which they belong.
218 universe : `DimensionUniverse`
219 All known dimensions.
220 """
222 def __init__(
223 self,
224 db: Database,
225 idTable: sqlalchemy.schema.Table,
226 definitionTable: sqlalchemy.schema.Table,
227 universe: DimensionUniverse,
228 ):
229 self._db = db
230 self._idTable = idTable
231 self._definitionTable = definitionTable
232 self._universe = universe
233 self._keysByGraph: Dict[DimensionGraph, int] = {universe.empty: 0}
234 self._graphsByKey: Dict[int, DimensionGraph] = {0: universe.empty}
236 @classmethod
237 def initialize(
238 cls,
239 db: Database,
240 context: StaticTablesContext,
241 *,
242 universe: DimensionUniverse,
243 ) -> _DimensionGraphStorage:
244 """Construct a new instance, including creating tables if necessary.
246 Parameters
247 ----------
248 db : `Database`
249 Interface to the underlying database engine and namespace.
250 context : `StaticTablesContext`
251 Context object obtained from `Database.declareStaticTables`; used
252 to declare any tables that should always be present.
253 universe : `DimensionUniverse`
254 All known dimensions.
256 Returns
257 -------
258 storage : `_DimensionGraphStorage`
259 New instance of this class.
260 """
261 # We need two tables just so we have one where the autoincrement key is
262 # the only primary key column, as is required by (at least) SQLite. In
263 # other databases, we might be able to use a Sequence directly.
264 idTable = context.addTable(
265 "dimension_graph_key",
266 ddl.TableSpec(
267 fields=[
268 ddl.FieldSpec(
269 name="id",
270 dtype=sqlalchemy.BigInteger,
271 autoincrement=True,
272 primaryKey=True,
273 ),
274 ],
275 ),
276 )
277 definitionTable = context.addTable(
278 "dimension_graph_definition",
279 ddl.TableSpec(
280 fields=[
281 ddl.FieldSpec(name="dimension_graph_id", dtype=sqlalchemy.BigInteger, primaryKey=True),
282 ddl.FieldSpec(name="dimension_name", dtype=sqlalchemy.Text, primaryKey=True),
283 ],
284 foreignKeys=[
285 ddl.ForeignKeySpec(
286 "dimension_graph_key",
287 source=("dimension_graph_id",),
288 target=("id",),
289 onDelete="CASCADE",
290 ),
291 ],
292 ),
293 )
294 return cls(db, idTable, definitionTable, universe=universe)
296 def refresh(self) -> None:
297 """Refresh the in-memory cache of saved DimensionGraph definitions.
299 This should be done automatically whenever needed, but it can also
300 be called explicitly.
301 """
302 dimensionNamesByKey: Dict[int, Set[str]] = defaultdict(set)
303 for row in self._db.query(self._definitionTable.select()).mappings():
304 key = row[self._definitionTable.columns.dimension_graph_id]
305 dimensionNamesByKey[key].add(row[self._definitionTable.columns.dimension_name])
306 keysByGraph: Dict[DimensionGraph, int] = {self._universe.empty: 0}
307 graphsByKey: Dict[int, DimensionGraph] = {0: self._universe.empty}
308 for key, dimensionNames in dimensionNamesByKey.items():
309 graph = DimensionGraph(self._universe, names=dimensionNames)
310 keysByGraph[graph] = key
311 graphsByKey[key] = graph
312 self._graphsByKey = graphsByKey
313 self._keysByGraph = keysByGraph
315 def save(self, graph: DimensionGraph) -> int:
316 """Save a `DimensionGraph` definition to the database, allowing it to
317 be retrieved later via the returned key.
319 Parameters
320 ----------
321 graph : `DimensionGraph`
322 Set of dimensions to save.
324 Returns
325 -------
326 key : `int`
327 Integer used as the unique key for this `DimensionGraph` in the
328 database.
329 """
330 key = self._keysByGraph.get(graph)
331 if key is not None:
332 return key
333 # Lock tables and then refresh to guard against races where some other
334 # process is trying to register the exact same dimension graph. This
335 # is probably not the most efficient way to do it, but it should be a
336 # rare operation, especially since the short-circuit above will usually
337 # work in long-lived data repositories.
338 with self._db.transaction(lock=[self._idTable, self._definitionTable]):
339 self.refresh()
340 key = self._keysByGraph.get(graph)
341 if key is None: 341 ↛ 347line 341 didn't jump to line 347, because the condition on line 341 was never false
342 (key,) = self._db.insert(self._idTable, {}, returnIds=True) # type: ignore
343 self._db.insert(
344 self._definitionTable,
345 *[{"dimension_graph_id": key, "dimension_name": name} for name in graph.required.names],
346 )
347 self._keysByGraph[graph] = key
348 self._graphsByKey[key] = graph
349 return key
351 def load(self, key: int) -> DimensionGraph:
352 """Retrieve a `DimensionGraph` that was previously saved in the
353 database.
355 Parameters
356 ----------
357 key : `int`
358 Integer used as the unique key for this `DimensionGraph` in the
359 database.
361 Returns
362 -------
363 graph : `DimensionGraph`
364 Retrieved graph.
365 """
366 graph = self._graphsByKey.get(key)
367 if graph is None:
368 self.refresh()
369 graph = self._graphsByKey[key]
370 return graph