Coverage for python/lsst/daf/butler/registry/dimensions/static.py: 96%

120 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-01-06 01:41 -0800

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23import itertools 

24from collections import defaultdict 

25 

26import sqlalchemy 

27 

28from ...core import ( 

29 DatabaseDimensionElement, 

30 DatabaseTopologicalFamily, 

31 DimensionElement, 

32 DimensionGraph, 

33 DimensionUniverse, 

34 GovernorDimension, 

35 NamedKeyDict, 

36 SkyPixDimension, 

37 ddl, 

38) 

39from ..interfaces import ( 

40 Database, 

41 DatabaseDimensionOverlapStorage, 

42 DatabaseDimensionRecordStorage, 

43 DimensionRecordStorage, 

44 DimensionRecordStorageManager, 

45 GovernorDimensionRecordStorage, 

46 StaticTablesContext, 

47 VersionTuple, 

48) 

49 

50# This has to be updated on every schema change 

51_VERSION = VersionTuple(6, 0, 2) 

52 

53 

54class StaticDimensionRecordStorageManager(DimensionRecordStorageManager): 

55 """An implementation of `DimensionRecordStorageManager` for single-layer 

56 `Registry` and the base layers of multi-layer `Registry`. 

57 

58 This manager creates `DimensionRecordStorage` instances for all elements 

59 in the `DimensionUniverse` in its own `initialize` method, as part of 

60 static table creation, so it never needs to manage any dynamic registry 

61 tables. 

62 

63 Parameters 

64 ---------- 

65 db : `Database` 

66 Interface to the underlying database engine and namespace. 

67 records : `NamedKeyDict` 

68 Mapping from `DimensionElement` to `DimensionRecordStorage` for that 

69 element. 

70 overlaps : `list` [ `DatabaseDimensionOverlapStorage` ] 

71 Objects that manage materialized overlaps between database-backed 

72 dimensions. 

73 dimensionGraphStorage : `_DimensionGraphStorage` 

74 Object that manages saved `DimensionGraph` definitions. 

75 universe : `DimensionUniverse` 

76 All known dimensions. 

77 """ 

78 

79 def __init__( 

80 self, 

81 db: Database, 

82 *, 

83 records: NamedKeyDict[DimensionElement, DimensionRecordStorage], 

84 overlaps: dict[ 

85 tuple[DatabaseDimensionElement, DatabaseDimensionElement], DatabaseDimensionOverlapStorage 

86 ], 

87 dimensionGraphStorage: _DimensionGraphStorage, 

88 universe: DimensionUniverse, 

89 ): 

90 super().__init__(universe=universe) 

91 self._db = db 

92 self._records = records 

93 self._overlaps = overlaps 

94 self._dimensionGraphStorage = dimensionGraphStorage 

95 

96 @classmethod 

97 def initialize( 

98 cls, db: Database, context: StaticTablesContext, *, universe: DimensionUniverse 

99 ) -> DimensionRecordStorageManager: 

100 # Docstring inherited from DimensionRecordStorageManager. 

101 # Start by initializing governor dimensions; those go both in the main 

102 # 'records' mapping we'll pass to init, and a local dictionary that we 

103 # can pass in when initializing storage for DatabaseDimensionElements. 

104 governors = NamedKeyDict[GovernorDimension, GovernorDimensionRecordStorage]() 

105 records = NamedKeyDict[DimensionElement, DimensionRecordStorage]() 

106 for dimension in universe.getGovernorDimensions(): 

107 governorStorage = dimension.makeStorage(db, context=context) 

108 governors[dimension] = governorStorage 

109 records[dimension] = governorStorage 

110 # Next we initialize storage for DatabaseDimensionElements. 

111 # We remember the spatial ones (grouped by family) so we can go back 

112 # and initialize overlap storage for them later. 

113 spatial = NamedKeyDict[DatabaseTopologicalFamily, list[DatabaseDimensionRecordStorage]]() 

114 for element in universe.getDatabaseElements(): 

115 elementStorage = element.makeStorage(db, context=context, governors=governors) 

116 records[element] = elementStorage 

117 if element.spatial is not None: 

118 spatial.setdefault(element.spatial, []).append(elementStorage) 

119 # Finally we initialize overlap storage. The implementation class for 

120 # this is currently hard-coded (it's not obvious there will ever be 

121 # others). Note that overlaps between database-backed dimensions and 

122 # skypix dimensions is internal to `DatabaseDimensionRecordStorage`, 

123 # and hence is not included here. 

124 from ..dimensions.overlaps import CrossFamilyDimensionOverlapStorage 

125 

126 overlaps: dict[ 

127 tuple[DatabaseDimensionElement, DatabaseDimensionElement], DatabaseDimensionOverlapStorage 

128 ] = {} 

129 for (family1, storages1), (family2, storages2) in itertools.combinations(spatial.items(), 2): 

130 for elementStoragePair in itertools.product(storages1, storages2): 

131 governorStoragePair = (governors[family1.governor], governors[family2.governor]) 

132 if elementStoragePair[0].element > elementStoragePair[1].element: 132 ↛ 133line 132 didn't jump to line 133, because the condition on line 132 was never true

133 elementStoragePair = (elementStoragePair[1], elementStoragePair[0]) 

134 governorStoragePair = (governorStoragePair[1], governorStoragePair[1]) 

135 overlapStorage = CrossFamilyDimensionOverlapStorage.initialize( 

136 db, 

137 elementStoragePair, 

138 governorStoragePair, 

139 context=context, 

140 ) 

141 elementStoragePair[0].connect(overlapStorage) 

142 elementStoragePair[1].connect(overlapStorage) 

143 overlaps[overlapStorage.elements] = overlapStorage 

144 # Create table that stores DimensionGraph definitions. 

145 dimensionGraphStorage = _DimensionGraphStorage.initialize(db, context, universe=universe) 

146 return cls( 

147 db=db, 

148 records=records, 

149 universe=universe, 

150 overlaps=overlaps, 

151 dimensionGraphStorage=dimensionGraphStorage, 

152 ) 

153 

154 def refresh(self) -> None: 

155 # Docstring inherited from DimensionRecordStorageManager. 

156 for dimension in self.universe.getGovernorDimensions(): 

157 storage = self._records[dimension] 

158 assert isinstance(storage, GovernorDimensionRecordStorage) 

159 storage.refresh() 

160 

161 def get(self, element: DimensionElement) -> DimensionRecordStorage | None: 

162 # Docstring inherited from DimensionRecordStorageManager. 

163 r = self._records.get(element) 

164 if r is None and isinstance(element, SkyPixDimension): 

165 return self.universe.skypix[element.system][element.level].makeStorage() 

166 return r 

167 

168 def register(self, element: DimensionElement) -> DimensionRecordStorage: 

169 # Docstring inherited from DimensionRecordStorageManager. 

170 result = self.get(element) 

171 assert result, "All records instances should be created in initialize()." 

172 return result 

173 

174 def saveDimensionGraph(self, graph: DimensionGraph) -> int: 

175 # Docstring inherited from DimensionRecordStorageManager. 

176 return self._dimensionGraphStorage.save(graph) 

177 

178 def loadDimensionGraph(self, key: int) -> DimensionGraph: 

179 # Docstring inherited from DimensionRecordStorageManager. 

180 return self._dimensionGraphStorage.load(key) 

181 

182 def clearCaches(self) -> None: 

183 # Docstring inherited from DimensionRecordStorageManager. 

184 for storage in self._records.values(): 

185 storage.clearCaches() 

186 

187 @classmethod 

188 def currentVersion(cls) -> VersionTuple | None: 

189 # Docstring inherited from VersionedExtension. 

190 return _VERSION 

191 

192 def schemaDigest(self) -> str | None: 

193 # Docstring inherited from VersionedExtension. 

194 tables: list[sqlalchemy.schema.Table] = [] 

195 for recStorage in self._records.values(): 

196 tables += recStorage.digestTables() 

197 for overlapStorage in self._overlaps.values(): 

198 tables += overlapStorage.digestTables() 

199 return self._defaultSchemaDigest(tables, self._db.dialect) 

200 

201 

202class _DimensionGraphStorage: 

203 """Helper object that manages saved DimensionGraph definitions. 

204 

205 Should generally be constructed by calling `initialize` instead of invoking 

206 the constructor directly. 

207 

208 Parameters 

209 ---------- 

210 db : `Database` 

211 Interface to the underlying database engine and namespace. 

212 idTable : `sqlalchemy.schema.Table` 

213 Table that just holds unique IDs for dimension graphs. 

214 definitionTable : `sqlalchemy.schema.Table` 

215 Table that maps dimension names to the IDs of the dimension graphs to 

216 which they belong. 

217 universe : `DimensionUniverse` 

218 All known dimensions. 

219 """ 

220 

221 def __init__( 

222 self, 

223 db: Database, 

224 idTable: sqlalchemy.schema.Table, 

225 definitionTable: sqlalchemy.schema.Table, 

226 universe: DimensionUniverse, 

227 ): 

228 self._db = db 

229 self._idTable = idTable 

230 self._definitionTable = definitionTable 

231 self._universe = universe 

232 self._keysByGraph: dict[DimensionGraph, int] = {universe.empty: 0} 

233 self._graphsByKey: dict[int, DimensionGraph] = {0: universe.empty} 

234 

235 @classmethod 

236 def initialize( 

237 cls, 

238 db: Database, 

239 context: StaticTablesContext, 

240 *, 

241 universe: DimensionUniverse, 

242 ) -> _DimensionGraphStorage: 

243 """Construct a new instance, including creating tables if necessary. 

244 

245 Parameters 

246 ---------- 

247 db : `Database` 

248 Interface to the underlying database engine and namespace. 

249 context : `StaticTablesContext` 

250 Context object obtained from `Database.declareStaticTables`; used 

251 to declare any tables that should always be present. 

252 universe : `DimensionUniverse` 

253 All known dimensions. 

254 

255 Returns 

256 ------- 

257 storage : `_DimensionGraphStorage` 

258 New instance of this class. 

259 """ 

260 # We need two tables just so we have one where the autoincrement key is 

261 # the only primary key column, as is required by (at least) SQLite. In 

262 # other databases, we might be able to use a Sequence directly. 

263 idTable = context.addTable( 

264 "dimension_graph_key", 

265 ddl.TableSpec( 

266 fields=[ 

267 ddl.FieldSpec( 

268 name="id", 

269 dtype=sqlalchemy.BigInteger, 

270 autoincrement=True, 

271 primaryKey=True, 

272 ), 

273 ], 

274 ), 

275 ) 

276 definitionTable = context.addTable( 

277 "dimension_graph_definition", 

278 ddl.TableSpec( 

279 fields=[ 

280 ddl.FieldSpec(name="dimension_graph_id", dtype=sqlalchemy.BigInteger, primaryKey=True), 

281 ddl.FieldSpec(name="dimension_name", dtype=sqlalchemy.Text, primaryKey=True), 

282 ], 

283 foreignKeys=[ 

284 ddl.ForeignKeySpec( 

285 "dimension_graph_key", 

286 source=("dimension_graph_id",), 

287 target=("id",), 

288 onDelete="CASCADE", 

289 ), 

290 ], 

291 ), 

292 ) 

293 return cls(db, idTable, definitionTable, universe=universe) 

294 

295 def refresh(self) -> None: 

296 """Refresh the in-memory cache of saved DimensionGraph definitions. 

297 

298 This should be done automatically whenever needed, but it can also 

299 be called explicitly. 

300 """ 

301 dimensionNamesByKey: dict[int, set[str]] = defaultdict(set) 

302 with self._db.query(self._definitionTable.select()) as sql_result: 

303 sql_rows = sql_result.mappings().fetchall() 

304 for row in sql_rows: 

305 key = row[self._definitionTable.columns.dimension_graph_id] 

306 dimensionNamesByKey[key].add(row[self._definitionTable.columns.dimension_name]) 

307 keysByGraph: dict[DimensionGraph, int] = {self._universe.empty: 0} 

308 graphsByKey: dict[int, DimensionGraph] = {0: self._universe.empty} 

309 for key, dimensionNames in dimensionNamesByKey.items(): 

310 graph = DimensionGraph(self._universe, names=dimensionNames) 

311 keysByGraph[graph] = key 

312 graphsByKey[key] = graph 

313 self._graphsByKey = graphsByKey 

314 self._keysByGraph = keysByGraph 

315 

316 def save(self, graph: DimensionGraph) -> int: 

317 """Save a `DimensionGraph` definition to the database, allowing it to 

318 be retrieved later via the returned key. 

319 

320 Parameters 

321 ---------- 

322 graph : `DimensionGraph` 

323 Set of dimensions to save. 

324 

325 Returns 

326 ------- 

327 key : `int` 

328 Integer used as the unique key for this `DimensionGraph` in the 

329 database. 

330 """ 

331 key = self._keysByGraph.get(graph) 

332 if key is not None: 

333 return key 

334 # Lock tables and then refresh to guard against races where some other 

335 # process is trying to register the exact same dimension graph. This 

336 # is probably not the most efficient way to do it, but it should be a 

337 # rare operation, especially since the short-circuit above will usually 

338 # work in long-lived data repositories. 

339 with self._db.transaction(lock=[self._idTable, self._definitionTable]): 

340 self.refresh() 

341 key = self._keysByGraph.get(graph) 

342 if key is None: 342 ↛ 348line 342 didn't jump to line 348, because the condition on line 342 was never false

343 (key,) = self._db.insert(self._idTable, {}, returnIds=True) # type: ignore 

344 self._db.insert( 

345 self._definitionTable, 

346 *[{"dimension_graph_id": key, "dimension_name": name} for name in graph.required.names], 

347 ) 

348 self._keysByGraph[graph] = key 

349 self._graphsByKey[key] = graph 

350 return key 

351 

352 def load(self, key: int) -> DimensionGraph: 

353 """Retrieve a `DimensionGraph` that was previously saved in the 

354 database. 

355 

356 Parameters 

357 ---------- 

358 key : `int` 

359 Integer used as the unique key for this `DimensionGraph` in the 

360 database. 

361 

362 Returns 

363 ------- 

364 graph : `DimensionGraph` 

365 Retrieved graph. 

366 """ 

367 graph = self._graphsByKey.get(key) 

368 if graph is None: 

369 self.refresh() 

370 graph = self._graphsByKey[key] 

371 return graph