Coverage for python/lsst/daf/butler/registry/dimensions/static.py: 96%

119 statements  

« prev     ^ index     » next       coverage.py v6.4.1, created at 2022-07-03 01:07 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23import itertools 

24from collections import defaultdict 

25from typing import Dict, List, Optional, Set, Tuple 

26 

27import sqlalchemy 

28 

29from ...core import ( 

30 DatabaseDimensionElement, 

31 DatabaseTopologicalFamily, 

32 DimensionElement, 

33 DimensionGraph, 

34 DimensionUniverse, 

35 GovernorDimension, 

36 NamedKeyDict, 

37 SkyPixDimension, 

38 ddl, 

39) 

40from ..interfaces import ( 

41 Database, 

42 DatabaseDimensionOverlapStorage, 

43 DatabaseDimensionRecordStorage, 

44 DimensionRecordStorage, 

45 DimensionRecordStorageManager, 

46 GovernorDimensionRecordStorage, 

47 StaticTablesContext, 

48 VersionTuple, 

49) 

50 

51# This has to be updated on every schema change 

52_VERSION = VersionTuple(6, 0, 2) 

53 

54 

55class StaticDimensionRecordStorageManager(DimensionRecordStorageManager): 

56 """An implementation of `DimensionRecordStorageManager` for single-layer 

57 `Registry` and the base layers of multi-layer `Registry`. 

58 

59 This manager creates `DimensionRecordStorage` instances for all elements 

60 in the `DimensionUniverse` in its own `initialize` method, as part of 

61 static table creation, so it never needs to manage any dynamic registry 

62 tables. 

63 

64 Parameters 

65 ---------- 

66 db : `Database` 

67 Interface to the underlying database engine and namespace. 

68 records : `NamedKeyDict` 

69 Mapping from `DimensionElement` to `DimensionRecordStorage` for that 

70 element. 

71 overlaps : `list` [ `DatabaseDimensionOverlapStorage` ] 

72 Objects that manage materialized overlaps between database-backed 

73 dimensions. 

74 dimensionGraphStorage : `_DimensionGraphStorage` 

75 Object that manages saved `DimensionGraph` definitions. 

76 universe : `DimensionUniverse` 

77 All known dimensions. 

78 """ 

79 

80 def __init__( 

81 self, 

82 db: Database, 

83 *, 

84 records: NamedKeyDict[DimensionElement, DimensionRecordStorage], 

85 overlaps: Dict[ 

86 Tuple[DatabaseDimensionElement, DatabaseDimensionElement], DatabaseDimensionOverlapStorage 

87 ], 

88 dimensionGraphStorage: _DimensionGraphStorage, 

89 universe: DimensionUniverse, 

90 ): 

91 super().__init__(universe=universe) 

92 self._db = db 

93 self._records = records 

94 self._overlaps = overlaps 

95 self._dimensionGraphStorage = dimensionGraphStorage 

96 

97 @classmethod 

98 def initialize( 

99 cls, db: Database, context: StaticTablesContext, *, universe: DimensionUniverse 

100 ) -> DimensionRecordStorageManager: 

101 # Docstring inherited from DimensionRecordStorageManager. 

102 # Start by initializing governor dimensions; those go both in the main 

103 # 'records' mapping we'll pass to init, and a local dictionary that we 

104 # can pass in when initializing storage for DatabaseDimensionElements. 

105 governors = NamedKeyDict[GovernorDimension, GovernorDimensionRecordStorage]() 

106 records = NamedKeyDict[DimensionElement, DimensionRecordStorage]() 

107 for dimension in universe.getGovernorDimensions(): 

108 governorStorage = dimension.makeStorage(db, context=context) 

109 governors[dimension] = governorStorage 

110 records[dimension] = governorStorage 

111 # Next we initialize storage for DatabaseDimensionElements. 

112 # We remember the spatial ones (grouped by family) so we can go back 

113 # and initialize overlap storage for them later. 

114 spatial = NamedKeyDict[DatabaseTopologicalFamily, List[DatabaseDimensionRecordStorage]]() 

115 for element in universe.getDatabaseElements(): 

116 elementStorage = element.makeStorage(db, context=context, governors=governors) 

117 records[element] = elementStorage 

118 if element.spatial is not None: 

119 spatial.setdefault(element.spatial, []).append(elementStorage) 

120 # Finally we initialize overlap storage. The implementation class for 

121 # this is currently hard-coded (it's not obvious there will ever be 

122 # others). Note that overlaps between database-backed dimensions and 

123 # skypix dimensions is internal to `DatabaseDimensionRecordStorage`, 

124 # and hence is not included here. 

125 from ..dimensions.overlaps import CrossFamilyDimensionOverlapStorage 

126 

127 overlaps: Dict[ 

128 Tuple[DatabaseDimensionElement, DatabaseDimensionElement], DatabaseDimensionOverlapStorage 

129 ] = {} 

130 for (family1, storages1), (family2, storages2) in itertools.combinations(spatial.items(), 2): 

131 for elementStoragePair in itertools.product(storages1, storages2): 

132 governorStoragePair = (governors[family1.governor], governors[family2.governor]) 

133 if elementStoragePair[0].element > elementStoragePair[1].element: 133 ↛ 134line 133 didn't jump to line 134, because the condition on line 133 was never true

134 elementStoragePair = (elementStoragePair[1], elementStoragePair[0]) 

135 governorStoragePair = (governorStoragePair[1], governorStoragePair[1]) 

136 overlapStorage = CrossFamilyDimensionOverlapStorage.initialize( 

137 db, 

138 elementStoragePair, 

139 governorStoragePair, 

140 context=context, 

141 ) 

142 elementStoragePair[0].connect(overlapStorage) 

143 elementStoragePair[1].connect(overlapStorage) 

144 overlaps[overlapStorage.elements] = overlapStorage 

145 # Create table that stores DimensionGraph definitions. 

146 dimensionGraphStorage = _DimensionGraphStorage.initialize(db, context, universe=universe) 

147 return cls( 

148 db=db, 

149 records=records, 

150 universe=universe, 

151 overlaps=overlaps, 

152 dimensionGraphStorage=dimensionGraphStorage, 

153 ) 

154 

155 def refresh(self) -> None: 

156 # Docstring inherited from DimensionRecordStorageManager. 

157 for dimension in self.universe.getGovernorDimensions(): 

158 storage = self._records[dimension] 

159 assert isinstance(storage, GovernorDimensionRecordStorage) 

160 storage.refresh() 

161 

162 def get(self, element: DimensionElement) -> Optional[DimensionRecordStorage]: 

163 # Docstring inherited from DimensionRecordStorageManager. 

164 r = self._records.get(element) 

165 if r is None and isinstance(element, SkyPixDimension): 

166 return self.universe.skypix[element.system][element.level].makeStorage() 

167 return r 

168 

169 def register(self, element: DimensionElement) -> DimensionRecordStorage: 

170 # Docstring inherited from DimensionRecordStorageManager. 

171 result = self.get(element) 

172 assert result, "All records instances should be created in initialize()." 

173 return result 

174 

175 def saveDimensionGraph(self, graph: DimensionGraph) -> int: 

176 # Docstring inherited from DimensionRecordStorageManager. 

177 return self._dimensionGraphStorage.save(graph) 

178 

179 def loadDimensionGraph(self, key: int) -> DimensionGraph: 

180 # Docstring inherited from DimensionRecordStorageManager. 

181 return self._dimensionGraphStorage.load(key) 

182 

183 def clearCaches(self) -> None: 

184 # Docstring inherited from DimensionRecordStorageManager. 

185 for storage in self._records.values(): 

186 storage.clearCaches() 

187 

188 @classmethod 

189 def currentVersion(cls) -> Optional[VersionTuple]: 

190 # Docstring inherited from VersionedExtension. 

191 return _VERSION 

192 

193 def schemaDigest(self) -> Optional[str]: 

194 # Docstring inherited from VersionedExtension. 

195 tables: List[sqlalchemy.schema.Table] = [] 

196 for recStorage in self._records.values(): 

197 tables += recStorage.digestTables() 

198 for overlapStorage in self._overlaps.values(): 

199 tables += overlapStorage.digestTables() 

200 return self._defaultSchemaDigest(tables, self._db.dialect) 

201 

202 

203class _DimensionGraphStorage: 

204 """Helper object that manages saved DimensionGraph definitions. 

205 

206 Should generally be constructed by calling `initialize` instead of invoking 

207 the constructor directly. 

208 

209 Parameters 

210 ---------- 

211 db : `Database` 

212 Interface to the underlying database engine and namespace. 

213 idTable : `sqlalchemy.schema.Table` 

214 Table that just holds unique IDs for dimension graphs. 

215 definitionTable : `sqlalchemy.schema.Table` 

216 Table that maps dimension names to the IDs of the dimension graphs to 

217 which they belong. 

218 universe : `DimensionUniverse` 

219 All known dimensions. 

220 """ 

221 

222 def __init__( 

223 self, 

224 db: Database, 

225 idTable: sqlalchemy.schema.Table, 

226 definitionTable: sqlalchemy.schema.Table, 

227 universe: DimensionUniverse, 

228 ): 

229 self._db = db 

230 self._idTable = idTable 

231 self._definitionTable = definitionTable 

232 self._universe = universe 

233 self._keysByGraph: Dict[DimensionGraph, int] = {universe.empty: 0} 

234 self._graphsByKey: Dict[int, DimensionGraph] = {0: universe.empty} 

235 

236 @classmethod 

237 def initialize( 

238 cls, 

239 db: Database, 

240 context: StaticTablesContext, 

241 *, 

242 universe: DimensionUniverse, 

243 ) -> _DimensionGraphStorage: 

244 """Construct a new instance, including creating tables if necessary. 

245 

246 Parameters 

247 ---------- 

248 db : `Database` 

249 Interface to the underlying database engine and namespace. 

250 context : `StaticTablesContext` 

251 Context object obtained from `Database.declareStaticTables`; used 

252 to declare any tables that should always be present. 

253 universe : `DimensionUniverse` 

254 All known dimensions. 

255 

256 Returns 

257 ------- 

258 storage : `_DimensionGraphStorage` 

259 New instance of this class. 

260 """ 

261 # We need two tables just so we have one where the autoincrement key is 

262 # the only primary key column, as is required by (at least) SQLite. In 

263 # other databases, we might be able to use a Sequence directly. 

264 idTable = context.addTable( 

265 "dimension_graph_key", 

266 ddl.TableSpec( 

267 fields=[ 

268 ddl.FieldSpec( 

269 name="id", 

270 dtype=sqlalchemy.BigInteger, 

271 autoincrement=True, 

272 primaryKey=True, 

273 ), 

274 ], 

275 ), 

276 ) 

277 definitionTable = context.addTable( 

278 "dimension_graph_definition", 

279 ddl.TableSpec( 

280 fields=[ 

281 ddl.FieldSpec(name="dimension_graph_id", dtype=sqlalchemy.BigInteger, primaryKey=True), 

282 ddl.FieldSpec(name="dimension_name", dtype=sqlalchemy.Text, primaryKey=True), 

283 ], 

284 foreignKeys=[ 

285 ddl.ForeignKeySpec( 

286 "dimension_graph_key", 

287 source=("dimension_graph_id",), 

288 target=("id",), 

289 onDelete="CASCADE", 

290 ), 

291 ], 

292 ), 

293 ) 

294 return cls(db, idTable, definitionTable, universe=universe) 

295 

296 def refresh(self) -> None: 

297 """Refresh the in-memory cache of saved DimensionGraph definitions. 

298 

299 This should be done automatically whenever needed, but it can also 

300 be called explicitly. 

301 """ 

302 dimensionNamesByKey: Dict[int, Set[str]] = defaultdict(set) 

303 for row in self._db.query(self._definitionTable.select()).mappings(): 

304 key = row[self._definitionTable.columns.dimension_graph_id] 

305 dimensionNamesByKey[key].add(row[self._definitionTable.columns.dimension_name]) 

306 keysByGraph: Dict[DimensionGraph, int] = {self._universe.empty: 0} 

307 graphsByKey: Dict[int, DimensionGraph] = {0: self._universe.empty} 

308 for key, dimensionNames in dimensionNamesByKey.items(): 

309 graph = DimensionGraph(self._universe, names=dimensionNames) 

310 keysByGraph[graph] = key 

311 graphsByKey[key] = graph 

312 self._graphsByKey = graphsByKey 

313 self._keysByGraph = keysByGraph 

314 

315 def save(self, graph: DimensionGraph) -> int: 

316 """Save a `DimensionGraph` definition to the database, allowing it to 

317 be retrieved later via the returned key. 

318 

319 Parameters 

320 ---------- 

321 graph : `DimensionGraph` 

322 Set of dimensions to save. 

323 

324 Returns 

325 ------- 

326 key : `int` 

327 Integer used as the unique key for this `DimensionGraph` in the 

328 database. 

329 """ 

330 key = self._keysByGraph.get(graph) 

331 if key is not None: 

332 return key 

333 # Lock tables and then refresh to guard against races where some other 

334 # process is trying to register the exact same dimension graph. This 

335 # is probably not the most efficient way to do it, but it should be a 

336 # rare operation, especially since the short-circuit above will usually 

337 # work in long-lived data repositories. 

338 with self._db.transaction(lock=[self._idTable, self._definitionTable]): 

339 self.refresh() 

340 key = self._keysByGraph.get(graph) 

341 if key is None: 341 ↛ 347line 341 didn't jump to line 347, because the condition on line 341 was never false

342 (key,) = self._db.insert(self._idTable, {}, returnIds=True) # type: ignore 

343 self._db.insert( 

344 self._definitionTable, 

345 *[{"dimension_graph_id": key, "dimension_name": name} for name in graph.required.names], 

346 ) 

347 self._keysByGraph[graph] = key 

348 self._graphsByKey[key] = graph 

349 return key 

350 

351 def load(self, key: int) -> DimensionGraph: 

352 """Retrieve a `DimensionGraph` that was previously saved in the 

353 database. 

354 

355 Parameters 

356 ---------- 

357 key : `int` 

358 Integer used as the unique key for this `DimensionGraph` in the 

359 database. 

360 

361 Returns 

362 ------- 

363 graph : `DimensionGraph` 

364 Retrieved graph. 

365 """ 

366 graph = self._graphsByKey.get(key) 

367 if graph is None: 

368 self.refresh() 

369 graph = self._graphsByKey[key] 

370 return graph