Coverage for python/lsst/daf/butler/registry/dimensions/static.py: 92%

153 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-01-11 02:30 -0800

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23import itertools 

24from collections import defaultdict 

25from collections.abc import Mapping, Set 

26from typing import TYPE_CHECKING 

27 

28import sqlalchemy 

29from lsst.daf.relation import Relation 

30 

31from ...core import ( 

32 DatabaseDimensionElement, 

33 DatabaseTopologicalFamily, 

34 DimensionElement, 

35 DimensionGraph, 

36 DimensionUniverse, 

37 GovernorDimension, 

38 NamedKeyDict, 

39 SkyPixDimension, 

40 ddl, 

41) 

42from .._exceptions import MissingSpatialOverlapError 

43from ..interfaces import ( 

44 Database, 

45 DatabaseDimensionOverlapStorage, 

46 DatabaseDimensionRecordStorage, 

47 DimensionRecordStorage, 

48 DimensionRecordStorageManager, 

49 GovernorDimensionRecordStorage, 

50 StaticTablesContext, 

51 VersionTuple, 

52) 

53 

54if TYPE_CHECKING: 54 ↛ 55line 54 didn't jump to line 55, because the condition on line 54 was never true

55 from .. import queries 

56 

57 

58# This has to be updated on every schema change 

59_VERSION = VersionTuple(6, 0, 2) 

60 

61 

62class StaticDimensionRecordStorageManager(DimensionRecordStorageManager): 

63 """An implementation of `DimensionRecordStorageManager` for single-layer 

64 `Registry` and the base layers of multi-layer `Registry`. 

65 

66 This manager creates `DimensionRecordStorage` instances for all elements 

67 in the `DimensionUniverse` in its own `initialize` method, as part of 

68 static table creation, so it never needs to manage any dynamic registry 

69 tables. 

70 

71 Parameters 

72 ---------- 

73 db : `Database` 

74 Interface to the underlying database engine and namespace. 

75 records : `NamedKeyDict` 

76 Mapping from `DimensionElement` to `DimensionRecordStorage` for that 

77 element. 

78 overlaps : `list` [ `DatabaseDimensionOverlapStorage` ] 

79 Objects that manage materialized overlaps between database-backed 

80 dimensions. 

81 dimensionGraphStorage : `_DimensionGraphStorage` 

82 Object that manages saved `DimensionGraph` definitions. 

83 universe : `DimensionUniverse` 

84 All known dimensions. 

85 """ 

86 

87 def __init__( 

88 self, 

89 db: Database, 

90 *, 

91 records: NamedKeyDict[DimensionElement, DimensionRecordStorage], 

92 overlaps: dict[ 

93 tuple[DatabaseDimensionElement, DatabaseDimensionElement], DatabaseDimensionOverlapStorage 

94 ], 

95 dimensionGraphStorage: _DimensionGraphStorage, 

96 universe: DimensionUniverse, 

97 ): 

98 super().__init__(universe=universe) 

99 self._db = db 

100 self._records = records 

101 self._overlaps = overlaps 

102 self._dimensionGraphStorage = dimensionGraphStorage 

103 

104 @classmethod 

105 def initialize( 

106 cls, db: Database, context: StaticTablesContext, *, universe: DimensionUniverse 

107 ) -> DimensionRecordStorageManager: 

108 # Docstring inherited from DimensionRecordStorageManager. 

109 # Start by initializing governor dimensions; those go both in the main 

110 # 'records' mapping we'll pass to init, and a local dictionary that we 

111 # can pass in when initializing storage for DatabaseDimensionElements. 

112 governors = NamedKeyDict[GovernorDimension, GovernorDimensionRecordStorage]() 

113 records = NamedKeyDict[DimensionElement, DimensionRecordStorage]() 

114 for dimension in universe.getGovernorDimensions(): 

115 governorStorage = dimension.makeStorage(db, context=context) 

116 governors[dimension] = governorStorage 

117 records[dimension] = governorStorage 

118 # Next we initialize storage for DatabaseDimensionElements. Some 

119 # elements' storage may be views into anothers; we'll do a first pass 

120 # to gather a mapping from the names of those targets back to their 

121 # views. 

122 view_targets = { 

123 element.viewOf: element 

124 for element in universe.getDatabaseElements() 

125 if element.viewOf is not None 

126 } 

127 # We remember the spatial ones (grouped by family) so we can go back 

128 # and initialize overlap storage for them later. 

129 spatial = NamedKeyDict[DatabaseTopologicalFamily, list[DatabaseDimensionRecordStorage]]() 

130 for element in universe.getDatabaseElements(): 

131 if element.viewOf is not None: 

132 # We'll initialize this storage when the view's target is 

133 # initialized. 

134 continue 

135 elementStorage = element.makeStorage(db, context=context, governors=governors) 

136 records[element] = elementStorage 

137 if element.spatial is not None: 

138 spatial.setdefault(element.spatial, []).append(elementStorage) 

139 if (view_element := view_targets.get(element.name)) is not None: 

140 view_element_storage = view_element.makeStorage( 

141 db, 

142 context=context, 

143 governors=governors, 

144 view_target=elementStorage, 

145 ) 

146 records[view_element] = view_element_storage 

147 if view_element.spatial is not None: 147 ↛ 148line 147 didn't jump to line 148, because the condition on line 147 was never true

148 spatial.setdefault(view_element.spatial, []).append(view_element_storage) 

149 

150 # Finally we initialize overlap storage. The implementation class for 

151 # this is currently hard-coded (it's not obvious there will ever be 

152 # others). Note that overlaps between database-backed dimensions and 

153 # skypix dimensions is internal to `DatabaseDimensionRecordStorage`, 

154 # and hence is not included here. 

155 from ..dimensions.overlaps import CrossFamilyDimensionOverlapStorage 

156 

157 overlaps: dict[ 

158 tuple[DatabaseDimensionElement, DatabaseDimensionElement], DatabaseDimensionOverlapStorage 

159 ] = {} 

160 for (family1, storages1), (family2, storages2) in itertools.combinations(spatial.items(), 2): 

161 for elementStoragePair in itertools.product(storages1, storages2): 

162 governorStoragePair = (governors[family1.governor], governors[family2.governor]) 

163 if elementStoragePair[0].element > elementStoragePair[1].element: 163 ↛ 164line 163 didn't jump to line 164, because the condition on line 163 was never true

164 elementStoragePair = (elementStoragePair[1], elementStoragePair[0]) 

165 governorStoragePair = (governorStoragePair[1], governorStoragePair[1]) 

166 overlapStorage = CrossFamilyDimensionOverlapStorage.initialize( 

167 db, 

168 elementStoragePair, 

169 governorStoragePair, 

170 context=context, 

171 ) 

172 elementStoragePair[0].connect(overlapStorage) 

173 elementStoragePair[1].connect(overlapStorage) 

174 overlaps[overlapStorage.elements] = overlapStorage 

175 # Create table that stores DimensionGraph definitions. 

176 dimensionGraphStorage = _DimensionGraphStorage.initialize(db, context, universe=universe) 

177 return cls( 

178 db=db, 

179 records=records, 

180 universe=universe, 

181 overlaps=overlaps, 

182 dimensionGraphStorage=dimensionGraphStorage, 

183 ) 

184 

185 def get(self, element: DimensionElement | str) -> DimensionRecordStorage | None: 

186 # Docstring inherited from DimensionRecordStorageManager. 

187 r = self._records.get(element) 

188 if r is None: 

189 if isinstance(element, str): 

190 element = self.universe[element] 

191 if isinstance(element, SkyPixDimension): 191 ↛ 193line 191 didn't jump to line 193, because the condition on line 191 was never false

192 return self.universe.skypix[element.system][element.level].makeStorage() 

193 return r 

194 

195 def register(self, element: DimensionElement) -> DimensionRecordStorage: 

196 # Docstring inherited from DimensionRecordStorageManager. 

197 result = self.get(element) 

198 assert result, "All records instances should be created in initialize()." 

199 return result 

200 

201 def saveDimensionGraph(self, graph: DimensionGraph) -> int: 

202 # Docstring inherited from DimensionRecordStorageManager. 

203 return self._dimensionGraphStorage.save(graph) 

204 

205 def loadDimensionGraph(self, key: int) -> DimensionGraph: 

206 # Docstring inherited from DimensionRecordStorageManager. 

207 return self._dimensionGraphStorage.load(key) 

208 

209 def clearCaches(self) -> None: 

210 # Docstring inherited from DimensionRecordStorageManager. 

211 for storage in self._records.values(): 

212 storage.clearCaches() 

213 

214 def make_spatial_join_relation( 

215 self, 

216 element1: str, 

217 element2: str, 

218 context: queries.SqlQueryContext, 

219 governor_constraints: Mapping[str, Set[str]], 

220 ) -> tuple[Relation, bool]: 

221 # Docstring inherited. 

222 storage1 = self[element1] 

223 storage2 = self[element2] 

224 overlaps: Relation | None = None 

225 needs_refinement: bool = False 

226 match (storage1, storage2): 

227 case [ 

228 DatabaseDimensionRecordStorage() as db_storage1, 

229 DatabaseDimensionRecordStorage() as db_storage2, 

230 ]: 

231 # Construction guarantees that we only need to try this in one 

232 # direction; either both storage objects know about the other 

233 # or neither do. 

234 overlaps = db_storage1.make_spatial_join_relation( 

235 db_storage2.element, context, governor_constraints 

236 ) 

237 if overlaps is None: 237 ↛ 255line 237 didn't jump to line 255, because the condition on line 237 was never false

238 # No direct materialized overlaps; use commonSkyPix as an 

239 # intermediary. 

240 common_skypix_overlap1 = db_storage1.make_spatial_join_relation( 

241 self.universe.commonSkyPix, context, governor_constraints 

242 ) 

243 common_skypix_overlap2 = db_storage2.make_spatial_join_relation( 

244 self.universe.commonSkyPix, context, governor_constraints 

245 ) 

246 assert ( 

247 common_skypix_overlap1 is not None and common_skypix_overlap2 is not None 

248 ), "Overlaps with the common skypix dimension should always be available," 

249 overlaps = common_skypix_overlap1.join(common_skypix_overlap2) 

250 needs_refinement = True 

251 case [DatabaseDimensionRecordStorage() as db_storage, other]: 

252 overlaps = db_storage.make_spatial_join_relation(other.element, context, governor_constraints) 

253 case [other, DatabaseDimensionRecordStorage() as db_storage]: 253 ↛ 255line 253 didn't jump to line 255, because the pattern on line 253 never matched

254 overlaps = db_storage.make_spatial_join_relation(other.element, context, governor_constraints) 

255 if overlaps is None: 255 ↛ 269line 255 didn't jump to line 269, because the condition on line 255 was never true

256 # In the future, there's a lot more we could try here: 

257 # 

258 # - for skypix dimensions, looking for materialized overlaps at 

259 # smaller spatial scales (higher-levels) and using bit-shifting; 

260 # 

261 # - for non-skypix dimensions, looking for materialized overlaps 

262 # for more finer-grained members of the same family, and then 

263 # doing SELECT DISTINCT (or even tolerating duplicates) on the 

264 # columns we care about (e.g. use patch overlaps to satisfy a 

265 # request for tract overlaps). 

266 # 

267 # It's not obvious that's better than just telling the user to 

268 # materialize more overlaps, though. 

269 raise MissingSpatialOverlapError( 

270 f"No materialized overlaps for spatial join between {element1!r} and {element2!r}." 

271 ) 

272 return overlaps, needs_refinement 

273 

274 @classmethod 

275 def currentVersion(cls) -> VersionTuple | None: 

276 # Docstring inherited from VersionedExtension. 

277 return _VERSION 

278 

279 def schemaDigest(self) -> str | None: 

280 # Docstring inherited from VersionedExtension. 

281 tables: list[sqlalchemy.schema.Table] = [] 

282 for recStorage in self._records.values(): 

283 tables += recStorage.digestTables() 

284 for overlapStorage in self._overlaps.values(): 

285 tables += overlapStorage.digestTables() 

286 return self._defaultSchemaDigest(tables, self._db.dialect) 

287 

288 

289class _DimensionGraphStorage: 

290 """Helper object that manages saved DimensionGraph definitions. 

291 

292 Should generally be constructed by calling `initialize` instead of invoking 

293 the constructor directly. 

294 

295 Parameters 

296 ---------- 

297 db : `Database` 

298 Interface to the underlying database engine and namespace. 

299 idTable : `sqlalchemy.schema.Table` 

300 Table that just holds unique IDs for dimension graphs. 

301 definitionTable : `sqlalchemy.schema.Table` 

302 Table that maps dimension names to the IDs of the dimension graphs to 

303 which they belong. 

304 universe : `DimensionUniverse` 

305 All known dimensions. 

306 """ 

307 

308 def __init__( 

309 self, 

310 db: Database, 

311 idTable: sqlalchemy.schema.Table, 

312 definitionTable: sqlalchemy.schema.Table, 

313 universe: DimensionUniverse, 

314 ): 

315 self._db = db 

316 self._idTable = idTable 

317 self._definitionTable = definitionTable 

318 self._universe = universe 

319 self._keysByGraph: dict[DimensionGraph, int] = {universe.empty: 0} 

320 self._graphsByKey: dict[int, DimensionGraph] = {0: universe.empty} 

321 

322 @classmethod 

323 def initialize( 

324 cls, 

325 db: Database, 

326 context: StaticTablesContext, 

327 *, 

328 universe: DimensionUniverse, 

329 ) -> _DimensionGraphStorage: 

330 """Construct a new instance, including creating tables if necessary. 

331 

332 Parameters 

333 ---------- 

334 db : `Database` 

335 Interface to the underlying database engine and namespace. 

336 context : `StaticTablesContext` 

337 Context object obtained from `Database.declareStaticTables`; used 

338 to declare any tables that should always be present. 

339 universe : `DimensionUniverse` 

340 All known dimensions. 

341 

342 Returns 

343 ------- 

344 storage : `_DimensionGraphStorage` 

345 New instance of this class. 

346 """ 

347 # We need two tables just so we have one where the autoincrement key is 

348 # the only primary key column, as is required by (at least) SQLite. In 

349 # other databases, we might be able to use a Sequence directly. 

350 idTable = context.addTable( 

351 "dimension_graph_key", 

352 ddl.TableSpec( 

353 fields=[ 

354 ddl.FieldSpec( 

355 name="id", 

356 dtype=sqlalchemy.BigInteger, 

357 autoincrement=True, 

358 primaryKey=True, 

359 ), 

360 ], 

361 ), 

362 ) 

363 definitionTable = context.addTable( 

364 "dimension_graph_definition", 

365 ddl.TableSpec( 

366 fields=[ 

367 ddl.FieldSpec(name="dimension_graph_id", dtype=sqlalchemy.BigInteger, primaryKey=True), 

368 ddl.FieldSpec(name="dimension_name", dtype=sqlalchemy.Text, primaryKey=True), 

369 ], 

370 foreignKeys=[ 

371 ddl.ForeignKeySpec( 

372 "dimension_graph_key", 

373 source=("dimension_graph_id",), 

374 target=("id",), 

375 onDelete="CASCADE", 

376 ), 

377 ], 

378 ), 

379 ) 

380 return cls(db, idTable, definitionTable, universe=universe) 

381 

382 def refresh(self) -> None: 

383 """Refresh the in-memory cache of saved DimensionGraph definitions. 

384 

385 This should be done automatically whenever needed, but it can also 

386 be called explicitly. 

387 """ 

388 dimensionNamesByKey: dict[int, set[str]] = defaultdict(set) 

389 with self._db.query(self._definitionTable.select()) as sql_result: 

390 sql_rows = sql_result.mappings().fetchall() 

391 for row in sql_rows: 

392 key = row[self._definitionTable.columns.dimension_graph_id] 

393 dimensionNamesByKey[key].add(row[self._definitionTable.columns.dimension_name]) 

394 keysByGraph: dict[DimensionGraph, int] = {self._universe.empty: 0} 

395 graphsByKey: dict[int, DimensionGraph] = {0: self._universe.empty} 

396 for key, dimensionNames in dimensionNamesByKey.items(): 

397 graph = DimensionGraph(self._universe, names=dimensionNames) 

398 keysByGraph[graph] = key 

399 graphsByKey[key] = graph 

400 self._graphsByKey = graphsByKey 

401 self._keysByGraph = keysByGraph 

402 

403 def save(self, graph: DimensionGraph) -> int: 

404 """Save a `DimensionGraph` definition to the database, allowing it to 

405 be retrieved later via the returned key. 

406 

407 Parameters 

408 ---------- 

409 graph : `DimensionGraph` 

410 Set of dimensions to save. 

411 

412 Returns 

413 ------- 

414 key : `int` 

415 Integer used as the unique key for this `DimensionGraph` in the 

416 database. 

417 """ 

418 key = self._keysByGraph.get(graph) 

419 if key is not None: 

420 return key 

421 # Lock tables and then refresh to guard against races where some other 

422 # process is trying to register the exact same dimension graph. This 

423 # is probably not the most efficient way to do it, but it should be a 

424 # rare operation, especially since the short-circuit above will usually 

425 # work in long-lived data repositories. 

426 with self._db.transaction(lock=[self._idTable, self._definitionTable]): 

427 self.refresh() 

428 key = self._keysByGraph.get(graph) 

429 if key is None: 429 ↛ 435line 429 didn't jump to line 435, because the condition on line 429 was never false

430 (key,) = self._db.insert(self._idTable, {}, returnIds=True) # type: ignore 

431 self._db.insert( 

432 self._definitionTable, 

433 *[{"dimension_graph_id": key, "dimension_name": name} for name in graph.required.names], 

434 ) 

435 self._keysByGraph[graph] = key 

436 self._graphsByKey[key] = graph 

437 return key 

438 

439 def load(self, key: int) -> DimensionGraph: 

440 """Retrieve a `DimensionGraph` that was previously saved in the 

441 database. 

442 

443 Parameters 

444 ---------- 

445 key : `int` 

446 Integer used as the unique key for this `DimensionGraph` in the 

447 database. 

448 

449 Returns 

450 ------- 

451 graph : `DimensionGraph` 

452 Retrieved graph. 

453 """ 

454 graph = self._graphsByKey.get(key) 

455 if graph is None: 

456 self.refresh() 

457 graph = self._graphsByKey[key] 

458 return graph