Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23from collections import defaultdict 

24import itertools 

25from typing import Dict, List, Optional, Set, Tuple 

26 

27import sqlalchemy 

28 

29from ...core import ( 

30 DatabaseDimensionElement, 

31 DatabaseTopologicalFamily, 

32 ddl, 

33 DimensionElement, 

34 DimensionGraph, 

35 DimensionUniverse, 

36 GovernorDimension, 

37 NamedKeyDict, 

38 SkyPixDimension, 

39) 

40from ..interfaces import ( 

41 Database, 

42 StaticTablesContext, 

43 DatabaseDimensionRecordStorage, 

44 DatabaseDimensionOverlapStorage, 

45 DimensionRecordStorageManager, 

46 DimensionRecordStorage, 

47 GovernorDimensionRecordStorage, 

48 VersionTuple 

49) 

50 

51 

52# This has to be updated on every schema change 

53_VERSION = VersionTuple(6, 0, 0) 

54 

55 

56class StaticDimensionRecordStorageManager(DimensionRecordStorageManager): 

57 """An implementation of `DimensionRecordStorageManager` for single-layer 

58 `Registry` and the base layers of multi-layer `Registry`. 

59 

60 This manager creates `DimensionRecordStorage` instances for all elements 

61 in the `DimensionUniverse` in its own `initialize` method, as part of 

62 static table creation, so it never needs to manage any dynamic registry 

63 tables. 

64 

65 Parameters 

66 ---------- 

67 db : `Database` 

68 Interface to the underlying database engine and namespace. 

69 records : `NamedKeyDict` 

70 Mapping from `DimensionElement` to `DimensionRecordStorage` for that 

71 element. 

72 overlaps : `list` [ `DatabaseDimensionOverlapStorage` ] 

73 Objects that manage materialized overlaps between database-backed 

74 dimensions. 

75 dimensionGraphStorage : `_DimensionGraphStorage` 

76 Object that manages saved `DimensionGraph` definitions. 

77 universe : `DimensionUniverse` 

78 All known dimensions. 

79 """ 

80 def __init__( 

81 self, 

82 db: Database, *, 

83 records: NamedKeyDict[DimensionElement, DimensionRecordStorage], 

84 overlaps: Dict[Tuple[DatabaseDimensionElement, DatabaseDimensionElement], 

85 DatabaseDimensionOverlapStorage], 

86 dimensionGraphStorage: _DimensionGraphStorage, 

87 universe: DimensionUniverse, 

88 ): 

89 super().__init__(universe=universe) 

90 self._db = db 

91 self._records = records 

92 self._overlaps = overlaps 

93 self._dimensionGraphStorage = dimensionGraphStorage 

94 

95 @classmethod 

96 def initialize(cls, db: Database, context: StaticTablesContext, *, 

97 universe: DimensionUniverse) -> DimensionRecordStorageManager: 

98 # Docstring inherited from DimensionRecordStorageManager. 

99 # Start by initializing governor dimensions; those go both in the main 

100 # 'records' mapping we'll pass to init, and a local dictionary that we 

101 # can pass in when initializing storage for DatabaseDimensionElements. 

102 governors = NamedKeyDict[GovernorDimension, GovernorDimensionRecordStorage]() 

103 records = NamedKeyDict[DimensionElement, DimensionRecordStorage]() 

104 for dimension in universe.getGovernorDimensions(): 

105 governorStorage = dimension.makeStorage(db, context=context) 

106 governors[dimension] = governorStorage 

107 records[dimension] = governorStorage 

108 # Next we initialize storage for DatabaseDimensionElements. 

109 # We remember the spatial ones (grouped by family) so we can go back 

110 # and initialize overlap storage for them later. 

111 spatial = NamedKeyDict[DatabaseTopologicalFamily, List[DatabaseDimensionRecordStorage]]() 

112 for element in universe.getDatabaseElements(): 

113 elementStorage = element.makeStorage(db, context=context, governors=governors) 

114 records[element] = elementStorage 

115 if element.spatial is not None: 

116 spatial.setdefault(element.spatial, []).append(elementStorage) 

117 # Finally we initialize overlap storage. The implementation class for 

118 # this is currently hard-coded (it's not obvious there will ever be 

119 # others). Note that overlaps between database-backed dimensions and 

120 # skypix dimensions is internal to `DatabaseDimensionRecordStorage`, 

121 # and hence is not included here. 

122 from ..dimensions.overlaps import CrossFamilyDimensionOverlapStorage 

123 overlaps: Dict[Tuple[DatabaseDimensionElement, DatabaseDimensionElement], 

124 DatabaseDimensionOverlapStorage] = {} 

125 for (family1, storages1), (family2, storages2) in itertools.combinations(spatial.items(), 2): 

126 for elementStoragePair in itertools.product(storages1, storages2): 

127 governorStoragePair = (governors[family1.governor], governors[family2.governor]) 

128 if elementStoragePair[0].element > elementStoragePair[1].element: 128 ↛ 131line 128 didn't jump to line 131, because the condition on line 128 was never true

129 # mypy doesn't realize that tuple(reversed(...)) preserves 

130 # the number of elements. 

131 elementStoragePair = tuple(reversed(elementStoragePair)) # type: ignore 

132 governorStoragePair = tuple(reversed(governorStoragePair)) # type: ignore 

133 overlapStorage = CrossFamilyDimensionOverlapStorage.initialize( 

134 db, 

135 elementStoragePair, 

136 governorStoragePair, 

137 context=context, 

138 ) 

139 elementStoragePair[0].connect(overlapStorage) 

140 elementStoragePair[1].connect(overlapStorage) 

141 overlaps[overlapStorage.elements] = overlapStorage 

142 # Create table that stores DimensionGraph definitions. 

143 dimensionGraphStorage = _DimensionGraphStorage.initialize(db, context, universe=universe) 

144 return cls(db=db, records=records, universe=universe, overlaps=overlaps, 

145 dimensionGraphStorage=dimensionGraphStorage) 

146 

147 def refresh(self) -> None: 

148 # Docstring inherited from DimensionRecordStorageManager. 

149 for dimension in self.universe.getGovernorDimensions(): 

150 storage = self._records[dimension] 

151 assert isinstance(storage, GovernorDimensionRecordStorage) 

152 storage.refresh() 

153 

154 def get(self, element: DimensionElement) -> Optional[DimensionRecordStorage]: 

155 # Docstring inherited from DimensionRecordStorageManager. 

156 r = self._records.get(element) 

157 if r is None and isinstance(element, SkyPixDimension): 

158 return self.universe.skypix[element.system][element.level].makeStorage() 

159 return r 

160 

161 def register(self, element: DimensionElement) -> DimensionRecordStorage: 

162 # Docstring inherited from DimensionRecordStorageManager. 

163 result = self.get(element) 

164 assert result, "All records instances should be created in initialize()." 

165 return result 

166 

167 def saveDimensionGraph(self, graph: DimensionGraph) -> int: 

168 # Docstring inherited from DimensionRecordStorageManager. 

169 return self._dimensionGraphStorage.save(graph) 

170 

171 def loadDimensionGraph(self, key: int) -> DimensionGraph: 

172 # Docstring inherited from DimensionRecordStorageManager. 

173 return self._dimensionGraphStorage.load(key) 

174 

175 def clearCaches(self) -> None: 

176 # Docstring inherited from DimensionRecordStorageManager. 

177 for storage in self._records.values(): 

178 storage.clearCaches() 

179 

180 @classmethod 

181 def currentVersion(cls) -> Optional[VersionTuple]: 

182 # Docstring inherited from VersionedExtension. 

183 return _VERSION 

184 

185 def schemaDigest(self) -> Optional[str]: 

186 # Docstring inherited from VersionedExtension. 

187 tables: List[sqlalchemy.schema.Table] = [] 

188 for recStorage in self._records.values(): 

189 tables += recStorage.digestTables() 

190 for overlapStorage in self._overlaps.values(): 

191 tables += overlapStorage.digestTables() 

192 return self._defaultSchemaDigest(tables, self._db.dialect) 

193 

194 

195class _DimensionGraphStorage: 

196 """Helper object that manages saved DimensionGraph definitions. 

197 

198 Should generally be constructed by calling `initialize` instead of invoking 

199 the constructor directly. 

200 

201 Parameters 

202 ---------- 

203 db : `Database` 

204 Interface to the underlying database engine and namespace. 

205 idTable : `sqlalchemy.schema.Table` 

206 Table that just holds unique IDs for dimension graphs. 

207 definitionTable : `sqlalchemy.schema.Table` 

208 Table that maps dimension names to the IDs of the dimension graphs to 

209 which they belong. 

210 universe : `DimensionUniverse` 

211 All known dimensions. 

212 """ 

213 def __init__( 

214 self, 

215 db: Database, 

216 idTable: sqlalchemy.schema.Table, 

217 definitionTable: sqlalchemy.schema.Table, 

218 universe: DimensionUniverse, 

219 ): 

220 self._db = db 

221 self._idTable = idTable 

222 self._definitionTable = definitionTable 

223 self._universe = universe 

224 self._keysByGraph: Dict[DimensionGraph, int] = {universe.empty: 0} 

225 self._graphsByKey: Dict[int, DimensionGraph] = {0: universe.empty} 

226 

227 @classmethod 

228 def initialize( 

229 cls, 

230 db: Database, 

231 context: StaticTablesContext, *, 

232 universe: DimensionUniverse, 

233 ) -> _DimensionGraphStorage: 

234 """Construct a new instance, including creating tables if necessary. 

235 

236 Parameters 

237 ---------- 

238 db : `Database` 

239 Interface to the underlying database engine and namespace. 

240 context : `StaticTablesContext` 

241 Context object obtained from `Database.declareStaticTables`; used 

242 to declare any tables that should always be present. 

243 universe : `DimensionUniverse` 

244 All known dimensions. 

245 

246 Returns 

247 ------- 

248 storage : `_DimensionGraphStorage` 

249 New instance of this class. 

250 """ 

251 # We need two tables just so we have one where the autoincrement key is 

252 # the only primary key column, as is required by (at least) SQLite. In 

253 # other databases, we might be able to use a Sequence directly. 

254 idTable = context.addTable( 

255 "dimension_graph_key", 

256 ddl.TableSpec( 

257 fields=[ 

258 ddl.FieldSpec( 

259 name="id", 

260 dtype=sqlalchemy.BigInteger, 

261 autoincrement=True, 

262 primaryKey=True, 

263 ), 

264 ], 

265 ) 

266 ) 

267 definitionTable = context.addTable( 

268 "dimension_graph_definition", 

269 ddl.TableSpec( 

270 fields=[ 

271 ddl.FieldSpec(name="dimension_graph_id", dtype=sqlalchemy.BigInteger, primaryKey=True), 

272 ddl.FieldSpec(name="dimension_name", dtype=sqlalchemy.Text, primaryKey=True), 

273 ], 

274 foreignKeys=[ 

275 ddl.ForeignKeySpec( 

276 "dimension_graph_key", 

277 source=("dimension_graph_id",), 

278 target=("id",), 

279 onDelete="CASCADE", 

280 ), 

281 ], 

282 ) 

283 ) 

284 return cls(db, idTable, definitionTable, universe=universe) 

285 

286 def refresh(self) -> None: 

287 """Refresh the in-memory cache of saved DimensionGraph definitions. 

288 

289 This should be done automatically whenever needed, but it can also 

290 be called explicitly. 

291 """ 

292 dimensionNamesByKey: Dict[int, Set[str]] = defaultdict(set) 

293 for row in self._db.query(self._definitionTable.select()): 

294 key = row[self._definitionTable.columns.dimension_graph_id] 

295 dimensionNamesByKey[key].add(row[self._definitionTable.columns.dimension_name]) 

296 keysByGraph: Dict[DimensionGraph, int] = {self._universe.empty: 0} 

297 graphsByKey: Dict[int, DimensionGraph] = {0: self._universe.empty} 

298 for key, dimensionNames in dimensionNamesByKey.items(): 

299 graph = DimensionGraph(self._universe, names=dimensionNames) 

300 keysByGraph[graph] = key 

301 graphsByKey[key] = graph 

302 self._graphsByKey = graphsByKey 

303 self._keysByGraph = keysByGraph 

304 

305 def save(self, graph: DimensionGraph) -> int: 

306 """Save a `DimensionGraph` definition to the database, allowing it to 

307 be retrieved later via the returned key. 

308 

309 Parameters 

310 ---------- 

311 graph : `DimensionGraph` 

312 Set of dimensions to save. 

313 

314 Returns 

315 ------- 

316 key : `int` 

317 Integer used as the unique key for this `DimensionGraph` in the 

318 database. 

319 """ 

320 key = self._keysByGraph.get(graph) 

321 if key is not None: 

322 return key 

323 # Lock tables and then refresh to guard against races where some other 

324 # process is trying to register the exact same dimension graph. This 

325 # is probably not the most efficient way to do it, but it should be a 

326 # rare operation, especially since the short-circuit above will usually 

327 # work in long-lived data repositories. 

328 with self._db.transaction(lock=[self._idTable, self._definitionTable]): 

329 self.refresh() 

330 key = self._keysByGraph.get(graph) 

331 if key is None: 331 ↛ 340line 331 didn't jump to line 340, because the condition on line 331 was never false

332 (key,) = self._db.insert(self._idTable, {}, returnIds=True) # type: ignore 

333 self._db.insert( 

334 self._definitionTable, 

335 *[ 

336 {"dimension_graph_id": key, "dimension_name": name} 

337 for name in graph.required.names 

338 ], 

339 ) 

340 self._keysByGraph[graph] = key 

341 self._graphsByKey[key] = graph 

342 return key 

343 

344 def load(self, key: int) -> DimensionGraph: 

345 """Retrieve a `DimensionGraph` that was previously saved in the 

346 database. 

347 

348 Parameters 

349 ---------- 

350 key : `int` 

351 Integer used as the unique key for this `DimensionGraph` in the 

352 database. 

353 

354 Returns 

355 ------- 

356 graph : `DimensionGraph` 

357 Retrieved graph. 

358 """ 

359 graph = self._graphsByKey.get(key) 

360 if graph is None: 

361 self.refresh() 

362 graph = self._graphsByKey[key] 

363 return graph