Coverage for python/lsst/daf/butler/registry/dimensions/static.py: 95%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23from collections import defaultdict
24import itertools
25from typing import Dict, List, Optional, Set, Tuple
27import sqlalchemy
29from ...core import (
30 DatabaseDimensionElement,
31 DatabaseTopologicalFamily,
32 ddl,
33 DimensionElement,
34 DimensionGraph,
35 DimensionUniverse,
36 GovernorDimension,
37 NamedKeyDict,
38 SkyPixDimension,
39)
40from ..interfaces import (
41 Database,
42 StaticTablesContext,
43 DatabaseDimensionRecordStorage,
44 DatabaseDimensionOverlapStorage,
45 DimensionRecordStorageManager,
46 DimensionRecordStorage,
47 GovernorDimensionRecordStorage,
48 VersionTuple
49)
52# This has to be updated on every schema change
53_VERSION = VersionTuple(6, 0, 1)
56class StaticDimensionRecordStorageManager(DimensionRecordStorageManager):
57 """An implementation of `DimensionRecordStorageManager` for single-layer
58 `Registry` and the base layers of multi-layer `Registry`.
60 This manager creates `DimensionRecordStorage` instances for all elements
61 in the `DimensionUniverse` in its own `initialize` method, as part of
62 static table creation, so it never needs to manage any dynamic registry
63 tables.
65 Parameters
66 ----------
67 db : `Database`
68 Interface to the underlying database engine and namespace.
69 records : `NamedKeyDict`
70 Mapping from `DimensionElement` to `DimensionRecordStorage` for that
71 element.
72 overlaps : `list` [ `DatabaseDimensionOverlapStorage` ]
73 Objects that manage materialized overlaps between database-backed
74 dimensions.
75 dimensionGraphStorage : `_DimensionGraphStorage`
76 Object that manages saved `DimensionGraph` definitions.
77 universe : `DimensionUniverse`
78 All known dimensions.
79 """
80 def __init__(
81 self,
82 db: Database, *,
83 records: NamedKeyDict[DimensionElement, DimensionRecordStorage],
84 overlaps: Dict[Tuple[DatabaseDimensionElement, DatabaseDimensionElement],
85 DatabaseDimensionOverlapStorage],
86 dimensionGraphStorage: _DimensionGraphStorage,
87 universe: DimensionUniverse,
88 ):
89 super().__init__(universe=universe)
90 self._db = db
91 self._records = records
92 self._overlaps = overlaps
93 self._dimensionGraphStorage = dimensionGraphStorage
95 @classmethod
96 def initialize(cls, db: Database, context: StaticTablesContext, *,
97 universe: DimensionUniverse) -> DimensionRecordStorageManager:
98 # Docstring inherited from DimensionRecordStorageManager.
99 # Start by initializing governor dimensions; those go both in the main
100 # 'records' mapping we'll pass to init, and a local dictionary that we
101 # can pass in when initializing storage for DatabaseDimensionElements.
102 governors = NamedKeyDict[GovernorDimension, GovernorDimensionRecordStorage]()
103 records = NamedKeyDict[DimensionElement, DimensionRecordStorage]()
104 for dimension in universe.getGovernorDimensions():
105 governorStorage = dimension.makeStorage(db, context=context)
106 governors[dimension] = governorStorage
107 records[dimension] = governorStorage
108 # Next we initialize storage for DatabaseDimensionElements.
109 # We remember the spatial ones (grouped by family) so we can go back
110 # and initialize overlap storage for them later.
111 spatial = NamedKeyDict[DatabaseTopologicalFamily, List[DatabaseDimensionRecordStorage]]()
112 for element in universe.getDatabaseElements():
113 elementStorage = element.makeStorage(db, context=context, governors=governors)
114 records[element] = elementStorage
115 if element.spatial is not None:
116 spatial.setdefault(element.spatial, []).append(elementStorage)
117 # Finally we initialize overlap storage. The implementation class for
118 # this is currently hard-coded (it's not obvious there will ever be
119 # others). Note that overlaps between database-backed dimensions and
120 # skypix dimensions is internal to `DatabaseDimensionRecordStorage`,
121 # and hence is not included here.
122 from ..dimensions.overlaps import CrossFamilyDimensionOverlapStorage
123 overlaps: Dict[Tuple[DatabaseDimensionElement, DatabaseDimensionElement],
124 DatabaseDimensionOverlapStorage] = {}
125 for (family1, storages1), (family2, storages2) in itertools.combinations(spatial.items(), 2):
126 for elementStoragePair in itertools.product(storages1, storages2):
127 governorStoragePair = (governors[family1.governor], governors[family2.governor])
128 if elementStoragePair[0].element > elementStoragePair[1].element: 128 ↛ 129line 128 didn't jump to line 129, because the condition on line 128 was never true
129 elementStoragePair = (elementStoragePair[1], elementStoragePair[0])
130 governorStoragePair = (governorStoragePair[1], governorStoragePair[1])
131 overlapStorage = CrossFamilyDimensionOverlapStorage.initialize(
132 db,
133 elementStoragePair,
134 governorStoragePair,
135 context=context,
136 )
137 elementStoragePair[0].connect(overlapStorage)
138 elementStoragePair[1].connect(overlapStorage)
139 overlaps[overlapStorage.elements] = overlapStorage
140 # Create table that stores DimensionGraph definitions.
141 dimensionGraphStorage = _DimensionGraphStorage.initialize(db, context, universe=universe)
142 return cls(db=db, records=records, universe=universe, overlaps=overlaps,
143 dimensionGraphStorage=dimensionGraphStorage)
145 def refresh(self) -> None:
146 # Docstring inherited from DimensionRecordStorageManager.
147 for dimension in self.universe.getGovernorDimensions():
148 storage = self._records[dimension]
149 assert isinstance(storage, GovernorDimensionRecordStorage)
150 storage.refresh()
152 def get(self, element: DimensionElement) -> Optional[DimensionRecordStorage]:
153 # Docstring inherited from DimensionRecordStorageManager.
154 r = self._records.get(element)
155 if r is None and isinstance(element, SkyPixDimension):
156 return self.universe.skypix[element.system][element.level].makeStorage()
157 return r
159 def register(self, element: DimensionElement) -> DimensionRecordStorage:
160 # Docstring inherited from DimensionRecordStorageManager.
161 result = self.get(element)
162 assert result, "All records instances should be created in initialize()."
163 return result
165 def saveDimensionGraph(self, graph: DimensionGraph) -> int:
166 # Docstring inherited from DimensionRecordStorageManager.
167 return self._dimensionGraphStorage.save(graph)
169 def loadDimensionGraph(self, key: int) -> DimensionGraph:
170 # Docstring inherited from DimensionRecordStorageManager.
171 return self._dimensionGraphStorage.load(key)
173 def clearCaches(self) -> None:
174 # Docstring inherited from DimensionRecordStorageManager.
175 for storage in self._records.values():
176 storage.clearCaches()
178 @classmethod
179 def currentVersion(cls) -> Optional[VersionTuple]:
180 # Docstring inherited from VersionedExtension.
181 return _VERSION
183 def schemaDigest(self) -> Optional[str]:
184 # Docstring inherited from VersionedExtension.
185 tables: List[sqlalchemy.schema.Table] = []
186 for recStorage in self._records.values():
187 tables += recStorage.digestTables()
188 for overlapStorage in self._overlaps.values():
189 tables += overlapStorage.digestTables()
190 return self._defaultSchemaDigest(tables, self._db.dialect)
193class _DimensionGraphStorage:
194 """Helper object that manages saved DimensionGraph definitions.
196 Should generally be constructed by calling `initialize` instead of invoking
197 the constructor directly.
199 Parameters
200 ----------
201 db : `Database`
202 Interface to the underlying database engine and namespace.
203 idTable : `sqlalchemy.schema.Table`
204 Table that just holds unique IDs for dimension graphs.
205 definitionTable : `sqlalchemy.schema.Table`
206 Table that maps dimension names to the IDs of the dimension graphs to
207 which they belong.
208 universe : `DimensionUniverse`
209 All known dimensions.
210 """
211 def __init__(
212 self,
213 db: Database,
214 idTable: sqlalchemy.schema.Table,
215 definitionTable: sqlalchemy.schema.Table,
216 universe: DimensionUniverse,
217 ):
218 self._db = db
219 self._idTable = idTable
220 self._definitionTable = definitionTable
221 self._universe = universe
222 self._keysByGraph: Dict[DimensionGraph, int] = {universe.empty: 0}
223 self._graphsByKey: Dict[int, DimensionGraph] = {0: universe.empty}
225 @classmethod
226 def initialize(
227 cls,
228 db: Database,
229 context: StaticTablesContext, *,
230 universe: DimensionUniverse,
231 ) -> _DimensionGraphStorage:
232 """Construct a new instance, including creating tables if necessary.
234 Parameters
235 ----------
236 db : `Database`
237 Interface to the underlying database engine and namespace.
238 context : `StaticTablesContext`
239 Context object obtained from `Database.declareStaticTables`; used
240 to declare any tables that should always be present.
241 universe : `DimensionUniverse`
242 All known dimensions.
244 Returns
245 -------
246 storage : `_DimensionGraphStorage`
247 New instance of this class.
248 """
249 # We need two tables just so we have one where the autoincrement key is
250 # the only primary key column, as is required by (at least) SQLite. In
251 # other databases, we might be able to use a Sequence directly.
252 idTable = context.addTable(
253 "dimension_graph_key",
254 ddl.TableSpec(
255 fields=[
256 ddl.FieldSpec(
257 name="id",
258 dtype=sqlalchemy.BigInteger,
259 autoincrement=True,
260 primaryKey=True,
261 ),
262 ],
263 )
264 )
265 definitionTable = context.addTable(
266 "dimension_graph_definition",
267 ddl.TableSpec(
268 fields=[
269 ddl.FieldSpec(name="dimension_graph_id", dtype=sqlalchemy.BigInteger, primaryKey=True),
270 ddl.FieldSpec(name="dimension_name", dtype=sqlalchemy.Text, primaryKey=True),
271 ],
272 foreignKeys=[
273 ddl.ForeignKeySpec(
274 "dimension_graph_key",
275 source=("dimension_graph_id",),
276 target=("id",),
277 onDelete="CASCADE",
278 ),
279 ],
280 )
281 )
282 return cls(db, idTable, definitionTable, universe=universe)
284 def refresh(self) -> None:
285 """Refresh the in-memory cache of saved DimensionGraph definitions.
287 This should be done automatically whenever needed, but it can also
288 be called explicitly.
289 """
290 dimensionNamesByKey: Dict[int, Set[str]] = defaultdict(set)
291 for row in self._db.query(self._definitionTable.select()).mappings():
292 key = row[self._definitionTable.columns.dimension_graph_id]
293 dimensionNamesByKey[key].add(row[self._definitionTable.columns.dimension_name])
294 keysByGraph: Dict[DimensionGraph, int] = {self._universe.empty: 0}
295 graphsByKey: Dict[int, DimensionGraph] = {0: self._universe.empty}
296 for key, dimensionNames in dimensionNamesByKey.items():
297 graph = DimensionGraph(self._universe, names=dimensionNames)
298 keysByGraph[graph] = key
299 graphsByKey[key] = graph
300 self._graphsByKey = graphsByKey
301 self._keysByGraph = keysByGraph
303 def save(self, graph: DimensionGraph) -> int:
304 """Save a `DimensionGraph` definition to the database, allowing it to
305 be retrieved later via the returned key.
307 Parameters
308 ----------
309 graph : `DimensionGraph`
310 Set of dimensions to save.
312 Returns
313 -------
314 key : `int`
315 Integer used as the unique key for this `DimensionGraph` in the
316 database.
317 """
318 key = self._keysByGraph.get(graph)
319 if key is not None:
320 return key
321 # Lock tables and then refresh to guard against races where some other
322 # process is trying to register the exact same dimension graph. This
323 # is probably not the most efficient way to do it, but it should be a
324 # rare operation, especially since the short-circuit above will usually
325 # work in long-lived data repositories.
326 with self._db.transaction(lock=[self._idTable, self._definitionTable]):
327 self.refresh()
328 key = self._keysByGraph.get(graph)
329 if key is None: 329 ↛ 338line 329 didn't jump to line 338, because the condition on line 329 was never false
330 (key,) = self._db.insert(self._idTable, {}, returnIds=True) # type: ignore
331 self._db.insert(
332 self._definitionTable,
333 *[
334 {"dimension_graph_id": key, "dimension_name": name}
335 for name in graph.required.names
336 ],
337 )
338 self._keysByGraph[graph] = key
339 self._graphsByKey[key] = graph
340 return key
342 def load(self, key: int) -> DimensionGraph:
343 """Retrieve a `DimensionGraph` that was previously saved in the
344 database.
346 Parameters
347 ----------
348 key : `int`
349 Integer used as the unique key for this `DimensionGraph` in the
350 database.
352 Returns
353 -------
354 graph : `DimensionGraph`
355 Retrieved graph.
356 """
357 graph = self._graphsByKey.get(key)
358 if graph is None:
359 self.refresh()
360 graph = self._graphsByKey[key]
361 return graph