Coverage for python/lsst/daf/butler/registry/dimensions/static.py : 95%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23from collections import defaultdict
24import itertools
25from typing import Dict, List, Optional, Set, Tuple
27import sqlalchemy
29from ...core import (
30 DatabaseDimensionElement,
31 DatabaseTopologicalFamily,
32 ddl,
33 DimensionElement,
34 DimensionGraph,
35 DimensionUniverse,
36 GovernorDimension,
37 NamedKeyDict,
38 SkyPixDimension,
39)
40from ..interfaces import (
41 Database,
42 StaticTablesContext,
43 DatabaseDimensionRecordStorage,
44 DatabaseDimensionOverlapStorage,
45 DimensionRecordStorageManager,
46 DimensionRecordStorage,
47 GovernorDimensionRecordStorage,
48 VersionTuple
49)
52# This has to be updated on every schema change
53_VERSION = VersionTuple(6, 0, 0)
56class StaticDimensionRecordStorageManager(DimensionRecordStorageManager):
57 """An implementation of `DimensionRecordStorageManager` for single-layer
58 `Registry` and the base layers of multi-layer `Registry`.
60 This manager creates `DimensionRecordStorage` instances for all elements
61 in the `DimensionUniverse` in its own `initialize` method, as part of
62 static table creation, so it never needs to manage any dynamic registry
63 tables.
65 Parameters
66 ----------
67 db : `Database`
68 Interface to the underlying database engine and namespace.
69 records : `NamedKeyDict`
70 Mapping from `DimensionElement` to `DimensionRecordStorage` for that
71 element.
72 overlaps : `list` [ `DatabaseDimensionOverlapStorage` ]
73 Objects that manage materialized overlaps between database-backed
74 dimensions.
75 dimensionGraphStorage : `_DimensionGraphStorage`
76 Object that manages saved `DimensionGraph` definitions.
77 universe : `DimensionUniverse`
78 All known dimensions.
79 """
80 def __init__(
81 self,
82 db: Database, *,
83 records: NamedKeyDict[DimensionElement, DimensionRecordStorage],
84 overlaps: Dict[Tuple[DatabaseDimensionElement, DatabaseDimensionElement],
85 DatabaseDimensionOverlapStorage],
86 dimensionGraphStorage: _DimensionGraphStorage,
87 universe: DimensionUniverse,
88 ):
89 super().__init__(universe=universe)
90 self._db = db
91 self._records = records
92 self._overlaps = overlaps
93 self._dimensionGraphStorage = dimensionGraphStorage
95 @classmethod
96 def initialize(cls, db: Database, context: StaticTablesContext, *,
97 universe: DimensionUniverse) -> DimensionRecordStorageManager:
98 # Docstring inherited from DimensionRecordStorageManager.
99 # Start by initializing governor dimensions; those go both in the main
100 # 'records' mapping we'll pass to init, and a local dictionary that we
101 # can pass in when initializing storage for DatabaseDimensionElements.
102 governors = NamedKeyDict[GovernorDimension, GovernorDimensionRecordStorage]()
103 records = NamedKeyDict[DimensionElement, DimensionRecordStorage]()
104 for dimension in universe.getGovernorDimensions():
105 governorStorage = dimension.makeStorage(db, context=context)
106 governors[dimension] = governorStorage
107 records[dimension] = governorStorage
108 # Next we initialize storage for DatabaseDimensionElements.
109 # We remember the spatial ones (grouped by family) so we can go back
110 # and initialize overlap storage for them later.
111 spatial = NamedKeyDict[DatabaseTopologicalFamily, List[DatabaseDimensionRecordStorage]]()
112 for element in universe.getDatabaseElements():
113 elementStorage = element.makeStorage(db, context=context, governors=governors)
114 records[element] = elementStorage
115 if element.spatial is not None:
116 spatial.setdefault(element.spatial, []).append(elementStorage)
117 # Finally we initialize overlap storage. The implementation class for
118 # this is currently hard-coded (it's not obvious there will ever be
119 # others). Note that overlaps between database-backed dimensions and
120 # skypix dimensions is internal to `DatabaseDimensionRecordStorage`,
121 # and hence is not included here.
122 from ..dimensions.overlaps import CrossFamilyDimensionOverlapStorage
123 overlaps: Dict[Tuple[DatabaseDimensionElement, DatabaseDimensionElement],
124 DatabaseDimensionOverlapStorage] = {}
125 for (family1, storages1), (family2, storages2) in itertools.combinations(spatial.items(), 2):
126 for elementStoragePair in itertools.product(storages1, storages2):
127 governorStoragePair = (governors[family1.governor], governors[family2.governor])
128 if elementStoragePair[0].element > elementStoragePair[1].element: 128 ↛ 131line 128 didn't jump to line 131, because the condition on line 128 was never true
129 # mypy doesn't realize that tuple(reversed(...)) preserves
130 # the number of elements.
131 elementStoragePair = tuple(reversed(elementStoragePair)) # type: ignore
132 governorStoragePair = tuple(reversed(governorStoragePair)) # type: ignore
133 overlapStorage = CrossFamilyDimensionOverlapStorage.initialize(
134 db,
135 elementStoragePair,
136 governorStoragePair,
137 context=context,
138 )
139 elementStoragePair[0].connect(overlapStorage)
140 elementStoragePair[1].connect(overlapStorage)
141 overlaps[overlapStorage.elements] = overlapStorage
142 # Create table that stores DimensionGraph definitions.
143 dimensionGraphStorage = _DimensionGraphStorage.initialize(db, context, universe=universe)
144 return cls(db=db, records=records, universe=universe, overlaps=overlaps,
145 dimensionGraphStorage=dimensionGraphStorage)
147 def refresh(self) -> None:
148 # Docstring inherited from DimensionRecordStorageManager.
149 for dimension in self.universe.getGovernorDimensions():
150 storage = self._records[dimension]
151 assert isinstance(storage, GovernorDimensionRecordStorage)
152 storage.refresh()
154 def get(self, element: DimensionElement) -> Optional[DimensionRecordStorage]:
155 # Docstring inherited from DimensionRecordStorageManager.
156 r = self._records.get(element)
157 if r is None and isinstance(element, SkyPixDimension):
158 return self.universe.skypix[element.system][element.level].makeStorage()
159 return r
161 def register(self, element: DimensionElement) -> DimensionRecordStorage:
162 # Docstring inherited from DimensionRecordStorageManager.
163 result = self.get(element)
164 assert result, "All records instances should be created in initialize()."
165 return result
167 def saveDimensionGraph(self, graph: DimensionGraph) -> int:
168 # Docstring inherited from DimensionRecordStorageManager.
169 return self._dimensionGraphStorage.save(graph)
171 def loadDimensionGraph(self, key: int) -> DimensionGraph:
172 # Docstring inherited from DimensionRecordStorageManager.
173 return self._dimensionGraphStorage.load(key)
175 def clearCaches(self) -> None:
176 # Docstring inherited from DimensionRecordStorageManager.
177 for storage in self._records.values():
178 storage.clearCaches()
180 @classmethod
181 def currentVersion(cls) -> Optional[VersionTuple]:
182 # Docstring inherited from VersionedExtension.
183 return _VERSION
185 def schemaDigest(self) -> Optional[str]:
186 # Docstring inherited from VersionedExtension.
187 tables: List[sqlalchemy.schema.Table] = []
188 for recStorage in self._records.values():
189 tables += recStorage.digestTables()
190 for overlapStorage in self._overlaps.values():
191 tables += overlapStorage.digestTables()
192 return self._defaultSchemaDigest(tables, self._db.dialect)
195class _DimensionGraphStorage:
196 """Helper object that manages saved DimensionGraph definitions.
198 Should generally be constructed by calling `initialize` instead of invoking
199 the constructor directly.
201 Parameters
202 ----------
203 db : `Database`
204 Interface to the underlying database engine and namespace.
205 idTable : `sqlalchemy.schema.Table`
206 Table that just holds unique IDs for dimension graphs.
207 definitionTable : `sqlalchemy.schema.Table`
208 Table that maps dimension names to the IDs of the dimension graphs to
209 which they belong.
210 universe : `DimensionUniverse`
211 All known dimensions.
212 """
213 def __init__(
214 self,
215 db: Database,
216 idTable: sqlalchemy.schema.Table,
217 definitionTable: sqlalchemy.schema.Table,
218 universe: DimensionUniverse,
219 ):
220 self._db = db
221 self._idTable = idTable
222 self._definitionTable = definitionTable
223 self._universe = universe
224 self._keysByGraph: Dict[DimensionGraph, int] = {universe.empty: 0}
225 self._graphsByKey: Dict[int, DimensionGraph] = {0: universe.empty}
227 @classmethod
228 def initialize(
229 cls,
230 db: Database,
231 context: StaticTablesContext, *,
232 universe: DimensionUniverse,
233 ) -> _DimensionGraphStorage:
234 """Construct a new instance, including creating tables if necessary.
236 Parameters
237 ----------
238 db : `Database`
239 Interface to the underlying database engine and namespace.
240 context : `StaticTablesContext`
241 Context object obtained from `Database.declareStaticTables`; used
242 to declare any tables that should always be present.
243 universe : `DimensionUniverse`
244 All known dimensions.
246 Returns
247 -------
248 storage : `_DimensionGraphStorage`
249 New instance of this class.
250 """
251 # We need two tables just so we have one where the autoincrement key is
252 # the only primary key column, as is required by (at least) SQLite. In
253 # other databases, we might be able to use a Sequence directly.
254 idTable = context.addTable(
255 "dimension_graph_key",
256 ddl.TableSpec(
257 fields=[
258 ddl.FieldSpec(
259 name="id",
260 dtype=sqlalchemy.BigInteger,
261 autoincrement=True,
262 primaryKey=True,
263 ),
264 ],
265 )
266 )
267 definitionTable = context.addTable(
268 "dimension_graph_definition",
269 ddl.TableSpec(
270 fields=[
271 ddl.FieldSpec(name="dimension_graph_id", dtype=sqlalchemy.BigInteger, primaryKey=True),
272 ddl.FieldSpec(name="dimension_name", dtype=sqlalchemy.Text, primaryKey=True),
273 ],
274 foreignKeys=[
275 ddl.ForeignKeySpec(
276 "dimension_graph_key",
277 source=("dimension_graph_id",),
278 target=("id",),
279 onDelete="CASCADE",
280 ),
281 ],
282 )
283 )
284 return cls(db, idTable, definitionTable, universe=universe)
286 def refresh(self) -> None:
287 """Refresh the in-memory cache of saved DimensionGraph definitions.
289 This should be done automatically whenever needed, but it can also
290 be called explicitly.
291 """
292 dimensionNamesByKey: Dict[int, Set[str]] = defaultdict(set)
293 for row in self._db.query(self._definitionTable.select()):
294 key = row[self._definitionTable.columns.dimension_graph_id]
295 dimensionNamesByKey[key].add(row[self._definitionTable.columns.dimension_name])
296 keysByGraph: Dict[DimensionGraph, int] = {self._universe.empty: 0}
297 graphsByKey: Dict[int, DimensionGraph] = {0: self._universe.empty}
298 for key, dimensionNames in dimensionNamesByKey.items():
299 graph = DimensionGraph(self._universe, names=dimensionNames)
300 keysByGraph[graph] = key
301 graphsByKey[key] = graph
302 self._graphsByKey = graphsByKey
303 self._keysByGraph = keysByGraph
305 def save(self, graph: DimensionGraph) -> int:
306 """Save a `DimensionGraph` definition to the database, allowing it to
307 be retrieved later via the returned key.
309 Parameters
310 ----------
311 graph : `DimensionGraph`
312 Set of dimensions to save.
314 Returns
315 -------
316 key : `int`
317 Integer used as the unique key for this `DimensionGraph` in the
318 database.
319 """
320 key = self._keysByGraph.get(graph)
321 if key is not None:
322 return key
323 # Lock tables and then refresh to guard against races where some other
324 # process is trying to register the exact same dimension graph. This
325 # is probably not the most efficient way to do it, but it should be a
326 # rare operation, especially since the short-circuit above will usually
327 # work in long-lived data repositories.
328 with self._db.transaction(lock=[self._idTable, self._definitionTable]):
329 self.refresh()
330 key = self._keysByGraph.get(graph)
331 if key is None: 331 ↛ 340line 331 didn't jump to line 340, because the condition on line 331 was never false
332 (key,) = self._db.insert(self._idTable, {}, returnIds=True) # type: ignore
333 self._db.insert(
334 self._definitionTable,
335 *[
336 {"dimension_graph_id": key, "dimension_name": name}
337 for name in graph.required.names
338 ],
339 )
340 self._keysByGraph[graph] = key
341 self._graphsByKey[key] = graph
342 return key
344 def load(self, key: int) -> DimensionGraph:
345 """Retrieve a `DimensionGraph` that was previously saved in the
346 database.
348 Parameters
349 ----------
350 key : `int`
351 Integer used as the unique key for this `DimensionGraph` in the
352 database.
354 Returns
355 -------
356 graph : `DimensionGraph`
357 Retrieved graph.
358 """
359 graph = self._graphsByKey.get(key)
360 if graph is None:
361 self.refresh()
362 graph = self._graphsByKey[key]
363 return graph