Coverage for python/lsst/daf/butler/registry/dimensions/static.py: 95%
118 statements
« prev ^ index » next coverage.py v6.5.0, created at 2022-11-19 01:58 -0800
« prev ^ index » next coverage.py v6.5.0, created at 2022-11-19 01:58 -0800
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23import itertools
24from collections import defaultdict
26import sqlalchemy
28from ...core import (
29 DatabaseDimensionElement,
30 DatabaseTopologicalFamily,
31 DimensionElement,
32 DimensionGraph,
33 DimensionUniverse,
34 GovernorDimension,
35 NamedKeyDict,
36 SkyPixDimension,
37 ddl,
38)
39from ..interfaces import (
40 Database,
41 DatabaseDimensionOverlapStorage,
42 DatabaseDimensionRecordStorage,
43 DimensionRecordStorage,
44 DimensionRecordStorageManager,
45 GovernorDimensionRecordStorage,
46 StaticTablesContext,
47 VersionTuple,
48)
50# This has to be updated on every schema change
51_VERSION = VersionTuple(6, 0, 2)
54class StaticDimensionRecordStorageManager(DimensionRecordStorageManager):
55 """An implementation of `DimensionRecordStorageManager` for single-layer
56 `Registry` and the base layers of multi-layer `Registry`.
58 This manager creates `DimensionRecordStorage` instances for all elements
59 in the `DimensionUniverse` in its own `initialize` method, as part of
60 static table creation, so it never needs to manage any dynamic registry
61 tables.
63 Parameters
64 ----------
65 db : `Database`
66 Interface to the underlying database engine and namespace.
67 records : `NamedKeyDict`
68 Mapping from `DimensionElement` to `DimensionRecordStorage` for that
69 element.
70 overlaps : `list` [ `DatabaseDimensionOverlapStorage` ]
71 Objects that manage materialized overlaps between database-backed
72 dimensions.
73 dimensionGraphStorage : `_DimensionGraphStorage`
74 Object that manages saved `DimensionGraph` definitions.
75 universe : `DimensionUniverse`
76 All known dimensions.
77 """
79 def __init__(
80 self,
81 db: Database,
82 *,
83 records: NamedKeyDict[DimensionElement, DimensionRecordStorage],
84 overlaps: dict[
85 tuple[DatabaseDimensionElement, DatabaseDimensionElement], DatabaseDimensionOverlapStorage
86 ],
87 dimensionGraphStorage: _DimensionGraphStorage,
88 universe: DimensionUniverse,
89 ):
90 super().__init__(universe=universe)
91 self._db = db
92 self._records = records
93 self._overlaps = overlaps
94 self._dimensionGraphStorage = dimensionGraphStorage
96 @classmethod
97 def initialize(
98 cls, db: Database, context: StaticTablesContext, *, universe: DimensionUniverse
99 ) -> DimensionRecordStorageManager:
100 # Docstring inherited from DimensionRecordStorageManager.
101 # Start by initializing governor dimensions; those go both in the main
102 # 'records' mapping we'll pass to init, and a local dictionary that we
103 # can pass in when initializing storage for DatabaseDimensionElements.
104 governors = NamedKeyDict[GovernorDimension, GovernorDimensionRecordStorage]()
105 records = NamedKeyDict[DimensionElement, DimensionRecordStorage]()
106 for dimension in universe.getGovernorDimensions():
107 governorStorage = dimension.makeStorage(db, context=context)
108 governors[dimension] = governorStorage
109 records[dimension] = governorStorage
110 # Next we initialize storage for DatabaseDimensionElements.
111 # We remember the spatial ones (grouped by family) so we can go back
112 # and initialize overlap storage for them later.
113 spatial = NamedKeyDict[DatabaseTopologicalFamily, list[DatabaseDimensionRecordStorage]]()
114 for element in universe.getDatabaseElements():
115 elementStorage = element.makeStorage(db, context=context, governors=governors)
116 records[element] = elementStorage
117 if element.spatial is not None:
118 spatial.setdefault(element.spatial, []).append(elementStorage)
119 # Finally we initialize overlap storage. The implementation class for
120 # this is currently hard-coded (it's not obvious there will ever be
121 # others). Note that overlaps between database-backed dimensions and
122 # skypix dimensions is internal to `DatabaseDimensionRecordStorage`,
123 # and hence is not included here.
124 from ..dimensions.overlaps import CrossFamilyDimensionOverlapStorage
126 overlaps: dict[
127 tuple[DatabaseDimensionElement, DatabaseDimensionElement], DatabaseDimensionOverlapStorage
128 ] = {}
129 for (family1, storages1), (family2, storages2) in itertools.combinations(spatial.items(), 2):
130 for elementStoragePair in itertools.product(storages1, storages2):
131 governorStoragePair = (governors[family1.governor], governors[family2.governor])
132 if elementStoragePair[0].element > elementStoragePair[1].element: 132 ↛ 133line 132 didn't jump to line 133, because the condition on line 132 was never true
133 elementStoragePair = (elementStoragePair[1], elementStoragePair[0])
134 governorStoragePair = (governorStoragePair[1], governorStoragePair[1])
135 overlapStorage = CrossFamilyDimensionOverlapStorage.initialize(
136 db,
137 elementStoragePair,
138 governorStoragePair,
139 context=context,
140 )
141 elementStoragePair[0].connect(overlapStorage)
142 elementStoragePair[1].connect(overlapStorage)
143 overlaps[overlapStorage.elements] = overlapStorage
144 # Create table that stores DimensionGraph definitions.
145 dimensionGraphStorage = _DimensionGraphStorage.initialize(db, context, universe=universe)
146 return cls(
147 db=db,
148 records=records,
149 universe=universe,
150 overlaps=overlaps,
151 dimensionGraphStorage=dimensionGraphStorage,
152 )
154 def refresh(self) -> None:
155 # Docstring inherited from DimensionRecordStorageManager.
156 for dimension in self.universe.getGovernorDimensions():
157 storage = self._records[dimension]
158 assert isinstance(storage, GovernorDimensionRecordStorage)
159 storage.refresh()
161 def get(self, element: DimensionElement) -> DimensionRecordStorage | None:
162 # Docstring inherited from DimensionRecordStorageManager.
163 r = self._records.get(element)
164 if r is None and isinstance(element, SkyPixDimension):
165 return self.universe.skypix[element.system][element.level].makeStorage()
166 return r
168 def register(self, element: DimensionElement) -> DimensionRecordStorage:
169 # Docstring inherited from DimensionRecordStorageManager.
170 result = self.get(element)
171 assert result, "All records instances should be created in initialize()."
172 return result
174 def saveDimensionGraph(self, graph: DimensionGraph) -> int:
175 # Docstring inherited from DimensionRecordStorageManager.
176 return self._dimensionGraphStorage.save(graph)
178 def loadDimensionGraph(self, key: int) -> DimensionGraph:
179 # Docstring inherited from DimensionRecordStorageManager.
180 return self._dimensionGraphStorage.load(key)
182 def clearCaches(self) -> None:
183 # Docstring inherited from DimensionRecordStorageManager.
184 for storage in self._records.values():
185 storage.clearCaches()
187 @classmethod
188 def currentVersion(cls) -> VersionTuple | None:
189 # Docstring inherited from VersionedExtension.
190 return _VERSION
192 def schemaDigest(self) -> str | None:
193 # Docstring inherited from VersionedExtension.
194 tables: list[sqlalchemy.schema.Table] = []
195 for recStorage in self._records.values():
196 tables += recStorage.digestTables()
197 for overlapStorage in self._overlaps.values():
198 tables += overlapStorage.digestTables()
199 return self._defaultSchemaDigest(tables, self._db.dialect)
202class _DimensionGraphStorage:
203 """Helper object that manages saved DimensionGraph definitions.
205 Should generally be constructed by calling `initialize` instead of invoking
206 the constructor directly.
208 Parameters
209 ----------
210 db : `Database`
211 Interface to the underlying database engine and namespace.
212 idTable : `sqlalchemy.schema.Table`
213 Table that just holds unique IDs for dimension graphs.
214 definitionTable : `sqlalchemy.schema.Table`
215 Table that maps dimension names to the IDs of the dimension graphs to
216 which they belong.
217 universe : `DimensionUniverse`
218 All known dimensions.
219 """
221 def __init__(
222 self,
223 db: Database,
224 idTable: sqlalchemy.schema.Table,
225 definitionTable: sqlalchemy.schema.Table,
226 universe: DimensionUniverse,
227 ):
228 self._db = db
229 self._idTable = idTable
230 self._definitionTable = definitionTable
231 self._universe = universe
232 self._keysByGraph: dict[DimensionGraph, int] = {universe.empty: 0}
233 self._graphsByKey: dict[int, DimensionGraph] = {0: universe.empty}
235 @classmethod
236 def initialize(
237 cls,
238 db: Database,
239 context: StaticTablesContext,
240 *,
241 universe: DimensionUniverse,
242 ) -> _DimensionGraphStorage:
243 """Construct a new instance, including creating tables if necessary.
245 Parameters
246 ----------
247 db : `Database`
248 Interface to the underlying database engine and namespace.
249 context : `StaticTablesContext`
250 Context object obtained from `Database.declareStaticTables`; used
251 to declare any tables that should always be present.
252 universe : `DimensionUniverse`
253 All known dimensions.
255 Returns
256 -------
257 storage : `_DimensionGraphStorage`
258 New instance of this class.
259 """
260 # We need two tables just so we have one where the autoincrement key is
261 # the only primary key column, as is required by (at least) SQLite. In
262 # other databases, we might be able to use a Sequence directly.
263 idTable = context.addTable(
264 "dimension_graph_key",
265 ddl.TableSpec(
266 fields=[
267 ddl.FieldSpec(
268 name="id",
269 dtype=sqlalchemy.BigInteger,
270 autoincrement=True,
271 primaryKey=True,
272 ),
273 ],
274 ),
275 )
276 definitionTable = context.addTable(
277 "dimension_graph_definition",
278 ddl.TableSpec(
279 fields=[
280 ddl.FieldSpec(name="dimension_graph_id", dtype=sqlalchemy.BigInteger, primaryKey=True),
281 ddl.FieldSpec(name="dimension_name", dtype=sqlalchemy.Text, primaryKey=True),
282 ],
283 foreignKeys=[
284 ddl.ForeignKeySpec(
285 "dimension_graph_key",
286 source=("dimension_graph_id",),
287 target=("id",),
288 onDelete="CASCADE",
289 ),
290 ],
291 ),
292 )
293 return cls(db, idTable, definitionTable, universe=universe)
295 def refresh(self) -> None:
296 """Refresh the in-memory cache of saved DimensionGraph definitions.
298 This should be done automatically whenever needed, but it can also
299 be called explicitly.
300 """
301 dimensionNamesByKey: dict[int, set[str]] = defaultdict(set)
302 for row in self._db.query(self._definitionTable.select()).mappings():
303 key = row[self._definitionTable.columns.dimension_graph_id]
304 dimensionNamesByKey[key].add(row[self._definitionTable.columns.dimension_name])
305 keysByGraph: dict[DimensionGraph, int] = {self._universe.empty: 0}
306 graphsByKey: dict[int, DimensionGraph] = {0: self._universe.empty}
307 for key, dimensionNames in dimensionNamesByKey.items():
308 graph = DimensionGraph(self._universe, names=dimensionNames)
309 keysByGraph[graph] = key
310 graphsByKey[key] = graph
311 self._graphsByKey = graphsByKey
312 self._keysByGraph = keysByGraph
314 def save(self, graph: DimensionGraph) -> int:
315 """Save a `DimensionGraph` definition to the database, allowing it to
316 be retrieved later via the returned key.
318 Parameters
319 ----------
320 graph : `DimensionGraph`
321 Set of dimensions to save.
323 Returns
324 -------
325 key : `int`
326 Integer used as the unique key for this `DimensionGraph` in the
327 database.
328 """
329 key = self._keysByGraph.get(graph)
330 if key is not None:
331 return key
332 # Lock tables and then refresh to guard against races where some other
333 # process is trying to register the exact same dimension graph. This
334 # is probably not the most efficient way to do it, but it should be a
335 # rare operation, especially since the short-circuit above will usually
336 # work in long-lived data repositories.
337 with self._db.transaction(lock=[self._idTable, self._definitionTable]):
338 self.refresh()
339 key = self._keysByGraph.get(graph)
340 if key is None: 340 ↛ 346line 340 didn't jump to line 346, because the condition on line 340 was never false
341 (key,) = self._db.insert(self._idTable, {}, returnIds=True) # type: ignore
342 self._db.insert(
343 self._definitionTable,
344 *[{"dimension_graph_id": key, "dimension_name": name} for name in graph.required.names],
345 )
346 self._keysByGraph[graph] = key
347 self._graphsByKey[key] = graph
348 return key
350 def load(self, key: int) -> DimensionGraph:
351 """Retrieve a `DimensionGraph` that was previously saved in the
352 database.
354 Parameters
355 ----------
356 key : `int`
357 Integer used as the unique key for this `DimensionGraph` in the
358 database.
360 Returns
361 -------
362 graph : `DimensionGraph`
363 Retrieved graph.
364 """
365 graph = self._graphsByKey.get(key)
366 if graph is None:
367 self.refresh()
368 graph = self._graphsByKey[key]
369 return graph