Coverage for python/lsst/daf/butler/registry/dimensions/static.py: 96%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

119 statements  

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23from collections import defaultdict 

24import itertools 

25from typing import Dict, List, Optional, Set, Tuple 

26 

27import sqlalchemy 

28 

29from ...core import ( 

30 DatabaseDimensionElement, 

31 DatabaseTopologicalFamily, 

32 ddl, 

33 DimensionElement, 

34 DimensionGraph, 

35 DimensionUniverse, 

36 GovernorDimension, 

37 NamedKeyDict, 

38 SkyPixDimension, 

39) 

40from ..interfaces import ( 

41 Database, 

42 StaticTablesContext, 

43 DatabaseDimensionRecordStorage, 

44 DatabaseDimensionOverlapStorage, 

45 DimensionRecordStorageManager, 

46 DimensionRecordStorage, 

47 GovernorDimensionRecordStorage, 

48 VersionTuple 

49) 

50 

51 

52# This has to be updated on every schema change 

53_VERSION = VersionTuple(6, 0, 1) 

54 

55 

56class StaticDimensionRecordStorageManager(DimensionRecordStorageManager): 

57 """An implementation of `DimensionRecordStorageManager` for single-layer 

58 `Registry` and the base layers of multi-layer `Registry`. 

59 

60 This manager creates `DimensionRecordStorage` instances for all elements 

61 in the `DimensionUniverse` in its own `initialize` method, as part of 

62 static table creation, so it never needs to manage any dynamic registry 

63 tables. 

64 

65 Parameters 

66 ---------- 

67 db : `Database` 

68 Interface to the underlying database engine and namespace. 

69 records : `NamedKeyDict` 

70 Mapping from `DimensionElement` to `DimensionRecordStorage` for that 

71 element. 

72 overlaps : `list` [ `DatabaseDimensionOverlapStorage` ] 

73 Objects that manage materialized overlaps between database-backed 

74 dimensions. 

75 dimensionGraphStorage : `_DimensionGraphStorage` 

76 Object that manages saved `DimensionGraph` definitions. 

77 universe : `DimensionUniverse` 

78 All known dimensions. 

79 """ 

80 def __init__( 

81 self, 

82 db: Database, *, 

83 records: NamedKeyDict[DimensionElement, DimensionRecordStorage], 

84 overlaps: Dict[Tuple[DatabaseDimensionElement, DatabaseDimensionElement], 

85 DatabaseDimensionOverlapStorage], 

86 dimensionGraphStorage: _DimensionGraphStorage, 

87 universe: DimensionUniverse, 

88 ): 

89 super().__init__(universe=universe) 

90 self._db = db 

91 self._records = records 

92 self._overlaps = overlaps 

93 self._dimensionGraphStorage = dimensionGraphStorage 

94 

95 @classmethod 

96 def initialize(cls, db: Database, context: StaticTablesContext, *, 

97 universe: DimensionUniverse) -> DimensionRecordStorageManager: 

98 # Docstring inherited from DimensionRecordStorageManager. 

99 # Start by initializing governor dimensions; those go both in the main 

100 # 'records' mapping we'll pass to init, and a local dictionary that we 

101 # can pass in when initializing storage for DatabaseDimensionElements. 

102 governors = NamedKeyDict[GovernorDimension, GovernorDimensionRecordStorage]() 

103 records = NamedKeyDict[DimensionElement, DimensionRecordStorage]() 

104 for dimension in universe.getGovernorDimensions(): 

105 governorStorage = dimension.makeStorage(db, context=context) 

106 governors[dimension] = governorStorage 

107 records[dimension] = governorStorage 

108 # Next we initialize storage for DatabaseDimensionElements. 

109 # We remember the spatial ones (grouped by family) so we can go back 

110 # and initialize overlap storage for them later. 

111 spatial = NamedKeyDict[DatabaseTopologicalFamily, List[DatabaseDimensionRecordStorage]]() 

112 for element in universe.getDatabaseElements(): 

113 elementStorage = element.makeStorage(db, context=context, governors=governors) 

114 records[element] = elementStorage 

115 if element.spatial is not None: 

116 spatial.setdefault(element.spatial, []).append(elementStorage) 

117 # Finally we initialize overlap storage. The implementation class for 

118 # this is currently hard-coded (it's not obvious there will ever be 

119 # others). Note that overlaps between database-backed dimensions and 

120 # skypix dimensions is internal to `DatabaseDimensionRecordStorage`, 

121 # and hence is not included here. 

122 from ..dimensions.overlaps import CrossFamilyDimensionOverlapStorage 

123 overlaps: Dict[Tuple[DatabaseDimensionElement, DatabaseDimensionElement], 

124 DatabaseDimensionOverlapStorage] = {} 

125 for (family1, storages1), (family2, storages2) in itertools.combinations(spatial.items(), 2): 

126 for elementStoragePair in itertools.product(storages1, storages2): 

127 governorStoragePair = (governors[family1.governor], governors[family2.governor]) 

128 if elementStoragePair[0].element > elementStoragePair[1].element: 128 ↛ 129line 128 didn't jump to line 129, because the condition on line 128 was never true

129 elementStoragePair = (elementStoragePair[1], elementStoragePair[0]) 

130 governorStoragePair = (governorStoragePair[1], governorStoragePair[1]) 

131 overlapStorage = CrossFamilyDimensionOverlapStorage.initialize( 

132 db, 

133 elementStoragePair, 

134 governorStoragePair, 

135 context=context, 

136 ) 

137 elementStoragePair[0].connect(overlapStorage) 

138 elementStoragePair[1].connect(overlapStorage) 

139 overlaps[overlapStorage.elements] = overlapStorage 

140 # Create table that stores DimensionGraph definitions. 

141 dimensionGraphStorage = _DimensionGraphStorage.initialize(db, context, universe=universe) 

142 return cls(db=db, records=records, universe=universe, overlaps=overlaps, 

143 dimensionGraphStorage=dimensionGraphStorage) 

144 

145 def refresh(self) -> None: 

146 # Docstring inherited from DimensionRecordStorageManager. 

147 for dimension in self.universe.getGovernorDimensions(): 

148 storage = self._records[dimension] 

149 assert isinstance(storage, GovernorDimensionRecordStorage) 

150 storage.refresh() 

151 

152 def get(self, element: DimensionElement) -> Optional[DimensionRecordStorage]: 

153 # Docstring inherited from DimensionRecordStorageManager. 

154 r = self._records.get(element) 

155 if r is None and isinstance(element, SkyPixDimension): 

156 return self.universe.skypix[element.system][element.level].makeStorage() 

157 return r 

158 

159 def register(self, element: DimensionElement) -> DimensionRecordStorage: 

160 # Docstring inherited from DimensionRecordStorageManager. 

161 result = self.get(element) 

162 assert result, "All records instances should be created in initialize()." 

163 return result 

164 

165 def saveDimensionGraph(self, graph: DimensionGraph) -> int: 

166 # Docstring inherited from DimensionRecordStorageManager. 

167 return self._dimensionGraphStorage.save(graph) 

168 

169 def loadDimensionGraph(self, key: int) -> DimensionGraph: 

170 # Docstring inherited from DimensionRecordStorageManager. 

171 return self._dimensionGraphStorage.load(key) 

172 

173 def clearCaches(self) -> None: 

174 # Docstring inherited from DimensionRecordStorageManager. 

175 for storage in self._records.values(): 

176 storage.clearCaches() 

177 

178 @classmethod 

179 def currentVersion(cls) -> Optional[VersionTuple]: 

180 # Docstring inherited from VersionedExtension. 

181 return _VERSION 

182 

183 def schemaDigest(self) -> Optional[str]: 

184 # Docstring inherited from VersionedExtension. 

185 tables: List[sqlalchemy.schema.Table] = [] 

186 for recStorage in self._records.values(): 

187 tables += recStorage.digestTables() 

188 for overlapStorage in self._overlaps.values(): 

189 tables += overlapStorage.digestTables() 

190 return self._defaultSchemaDigest(tables, self._db.dialect) 

191 

192 

193class _DimensionGraphStorage: 

194 """Helper object that manages saved DimensionGraph definitions. 

195 

196 Should generally be constructed by calling `initialize` instead of invoking 

197 the constructor directly. 

198 

199 Parameters 

200 ---------- 

201 db : `Database` 

202 Interface to the underlying database engine and namespace. 

203 idTable : `sqlalchemy.schema.Table` 

204 Table that just holds unique IDs for dimension graphs. 

205 definitionTable : `sqlalchemy.schema.Table` 

206 Table that maps dimension names to the IDs of the dimension graphs to 

207 which they belong. 

208 universe : `DimensionUniverse` 

209 All known dimensions. 

210 """ 

211 def __init__( 

212 self, 

213 db: Database, 

214 idTable: sqlalchemy.schema.Table, 

215 definitionTable: sqlalchemy.schema.Table, 

216 universe: DimensionUniverse, 

217 ): 

218 self._db = db 

219 self._idTable = idTable 

220 self._definitionTable = definitionTable 

221 self._universe = universe 

222 self._keysByGraph: Dict[DimensionGraph, int] = {universe.empty: 0} 

223 self._graphsByKey: Dict[int, DimensionGraph] = {0: universe.empty} 

224 

225 @classmethod 

226 def initialize( 

227 cls, 

228 db: Database, 

229 context: StaticTablesContext, *, 

230 universe: DimensionUniverse, 

231 ) -> _DimensionGraphStorage: 

232 """Construct a new instance, including creating tables if necessary. 

233 

234 Parameters 

235 ---------- 

236 db : `Database` 

237 Interface to the underlying database engine and namespace. 

238 context : `StaticTablesContext` 

239 Context object obtained from `Database.declareStaticTables`; used 

240 to declare any tables that should always be present. 

241 universe : `DimensionUniverse` 

242 All known dimensions. 

243 

244 Returns 

245 ------- 

246 storage : `_DimensionGraphStorage` 

247 New instance of this class. 

248 """ 

249 # We need two tables just so we have one where the autoincrement key is 

250 # the only primary key column, as is required by (at least) SQLite. In 

251 # other databases, we might be able to use a Sequence directly. 

252 idTable = context.addTable( 

253 "dimension_graph_key", 

254 ddl.TableSpec( 

255 fields=[ 

256 ddl.FieldSpec( 

257 name="id", 

258 dtype=sqlalchemy.BigInteger, 

259 autoincrement=True, 

260 primaryKey=True, 

261 ), 

262 ], 

263 ) 

264 ) 

265 definitionTable = context.addTable( 

266 "dimension_graph_definition", 

267 ddl.TableSpec( 

268 fields=[ 

269 ddl.FieldSpec(name="dimension_graph_id", dtype=sqlalchemy.BigInteger, primaryKey=True), 

270 ddl.FieldSpec(name="dimension_name", dtype=sqlalchemy.Text, primaryKey=True), 

271 ], 

272 foreignKeys=[ 

273 ddl.ForeignKeySpec( 

274 "dimension_graph_key", 

275 source=("dimension_graph_id",), 

276 target=("id",), 

277 onDelete="CASCADE", 

278 ), 

279 ], 

280 ) 

281 ) 

282 return cls(db, idTable, definitionTable, universe=universe) 

283 

284 def refresh(self) -> None: 

285 """Refresh the in-memory cache of saved DimensionGraph definitions. 

286 

287 This should be done automatically whenever needed, but it can also 

288 be called explicitly. 

289 """ 

290 dimensionNamesByKey: Dict[int, Set[str]] = defaultdict(set) 

291 for row in self._db.query(self._definitionTable.select()).mappings(): 

292 key = row[self._definitionTable.columns.dimension_graph_id] 

293 dimensionNamesByKey[key].add(row[self._definitionTable.columns.dimension_name]) 

294 keysByGraph: Dict[DimensionGraph, int] = {self._universe.empty: 0} 

295 graphsByKey: Dict[int, DimensionGraph] = {0: self._universe.empty} 

296 for key, dimensionNames in dimensionNamesByKey.items(): 

297 graph = DimensionGraph(self._universe, names=dimensionNames) 

298 keysByGraph[graph] = key 

299 graphsByKey[key] = graph 

300 self._graphsByKey = graphsByKey 

301 self._keysByGraph = keysByGraph 

302 

303 def save(self, graph: DimensionGraph) -> int: 

304 """Save a `DimensionGraph` definition to the database, allowing it to 

305 be retrieved later via the returned key. 

306 

307 Parameters 

308 ---------- 

309 graph : `DimensionGraph` 

310 Set of dimensions to save. 

311 

312 Returns 

313 ------- 

314 key : `int` 

315 Integer used as the unique key for this `DimensionGraph` in the 

316 database. 

317 """ 

318 key = self._keysByGraph.get(graph) 

319 if key is not None: 

320 return key 

321 # Lock tables and then refresh to guard against races where some other 

322 # process is trying to register the exact same dimension graph. This 

323 # is probably not the most efficient way to do it, but it should be a 

324 # rare operation, especially since the short-circuit above will usually 

325 # work in long-lived data repositories. 

326 with self._db.transaction(lock=[self._idTable, self._definitionTable]): 

327 self.refresh() 

328 key = self._keysByGraph.get(graph) 

329 if key is None: 329 ↛ 338line 329 didn't jump to line 338, because the condition on line 329 was never false

330 (key,) = self._db.insert(self._idTable, {}, returnIds=True) # type: ignore 

331 self._db.insert( 

332 self._definitionTable, 

333 *[ 

334 {"dimension_graph_id": key, "dimension_name": name} 

335 for name in graph.required.names 

336 ], 

337 ) 

338 self._keysByGraph[graph] = key 

339 self._graphsByKey[key] = graph 

340 return key 

341 

342 def load(self, key: int) -> DimensionGraph: 

343 """Retrieve a `DimensionGraph` that was previously saved in the 

344 database. 

345 

346 Parameters 

347 ---------- 

348 key : `int` 

349 Integer used as the unique key for this `DimensionGraph` in the 

350 database. 

351 

352 Returns 

353 ------- 

354 graph : `DimensionGraph` 

355 Retrieved graph. 

356 """ 

357 graph = self._graphsByKey.get(key) 

358 if graph is None: 

359 self.refresh() 

360 graph = self._graphsByKey[key] 

361 return graph