Coverage for python/lsst/daf/butler/registry/dimensions/static.py: 97%

326 statements  

« prev     ^ index     » next       coverage.py v7.4.0, created at 2024-01-16 10:43 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27from __future__ import annotations 

28 

29import itertools 

30import logging 

31from collections import defaultdict 

32from collections.abc import Sequence, Set 

33from typing import TYPE_CHECKING, Any 

34 

35import sqlalchemy 

36from lsst.daf.relation import Calculation, ColumnExpression, Join, Relation, sql 

37 

38from ... import ddl 

39from ..._column_tags import DimensionKeyColumnTag, DimensionRecordColumnTag 

40from ..._column_type_info import LogicalColumn 

41from ..._named import NamedKeyDict 

42from ...dimensions import ( 

43 DatabaseTopologicalFamily, 

44 DataCoordinate, 

45 Dimension, 

46 DimensionElement, 

47 DimensionGroup, 

48 DimensionRecord, 

49 DimensionRecordSet, 

50 DimensionUniverse, 

51 SkyPixDimension, 

52 addDimensionForeignKey, 

53) 

54from ...dimensions.record_cache import DimensionRecordCache 

55from .._exceptions import MissingSpatialOverlapError 

56from ..interfaces import Database, DimensionRecordStorageManager, StaticTablesContext, VersionTuple 

57 

58if TYPE_CHECKING: 

59 from .. import queries 

60 

61 

62# This has to be updated on every schema change 

63_VERSION = VersionTuple(6, 0, 2) 

64 

65_LOG = logging.getLogger(__name__) 

66 

67 

68class StaticDimensionRecordStorageManager(DimensionRecordStorageManager): 

69 """An implementation of `DimensionRecordStorageManager` for single-layer 

70 `Registry` and the base layers of multi-layer `Registry`. 

71 

72 This manager creates `DimensionRecordStorage` instances for all elements 

73 in the `DimensionUniverse` in its own `initialize` method, as part of 

74 static table creation, so it never needs to manage any dynamic registry 

75 tables. 

76 

77 Parameters 

78 ---------- 

79 db : `Database` 

80 Interface to the underlying database engine and namespace. 

81 tables : `dict` [ `str`, `sqlalchemy.Table` ] 

82 Mapping from dimension element name to SQL table, for all elements that 

83 have `DimensionElement.has_own_table` `True`. 

84 overlap_tables : `dict` [ `str`, `tuple` [ `sqlalchemy.Table`, \ 

85 `sqlalchemy.Table` ] ] 

86 Mapping from dimension element name to SQL table holding overlaps 

87 between the common skypix dimension and that element, for all elements 

88 that have `DimensionElement.has_own_table` `True` and 

89 `DimensionElement.spatial` not `None`. 

90 dimension_group_storage : `_DimensionGroupStorage` 

91 Object that manages saved `DimensionGroup` definitions. 

92 universe : `DimensionUniverse` 

93 All known dimensions. 

94 registry_schema_version : `VersionTuple` or `None`, optional 

95 Version of registry schema. 

96 """ 

97 

98 def __init__( 

99 self, 

100 db: Database, 

101 *, 

102 tables: dict[str, sqlalchemy.Table], 

103 overlap_tables: dict[str, tuple[sqlalchemy.Table, sqlalchemy.Table]], 

104 dimension_group_storage: _DimensionGroupStorage, 

105 universe: DimensionUniverse, 

106 registry_schema_version: VersionTuple | None = None, 

107 ): 

108 super().__init__(universe=universe, registry_schema_version=registry_schema_version) 

109 self._db = db 

110 self._tables = tables 

111 self._overlap_tables = overlap_tables 

112 self._dimension_group_storage = dimension_group_storage 

113 

114 @classmethod 

115 def initialize( 

116 cls, 

117 db: Database, 

118 context: StaticTablesContext, 

119 *, 

120 universe: DimensionUniverse, 

121 registry_schema_version: VersionTuple | None = None, 

122 ) -> DimensionRecordStorageManager: 

123 # Docstring inherited from DimensionRecordStorageManager. 

124 tables: dict[str, sqlalchemy.Table] = {} 

125 # Define tables for governor dimensions, which are never spatial or 

126 # temporal and always have tables. 

127 for dimension in universe.governor_dimensions: 

128 spec = dimension.RecordClass.fields.makeTableSpec( 

129 TimespanReprClass=db.getTimespanRepresentation() 

130 ) 

131 tables[dimension.name] = context.addTable(dimension.name, spec) 

132 # Define tables for database dimension elements, which may or may not 

133 # have their own tables and may be spatial or temporal. 

134 spatial = NamedKeyDict[DatabaseTopologicalFamily, list[DimensionElement]]() 

135 overlap_tables: dict[str, tuple[sqlalchemy.Table, sqlalchemy.Table]] = {} 

136 for element in universe.database_elements: 

137 if not element.has_own_table: 

138 continue 

139 spec = element.RecordClass.fields.makeTableSpec(TimespanReprClass=db.getTimespanRepresentation()) 

140 tables[element.name] = context.addTable(element.name, spec) 

141 if element.spatial is not None: 

142 spatial.setdefault(element.spatial, []).append(element) 

143 overlap_tables[element.name] = cls._make_skypix_overlap_tables(context, element) 

144 # Add some tables for materialized overlaps between database 

145 # dimensions. We've never used these and no longer plan to, but we 

146 # have to keep creating them to keep schema versioning consistent. 

147 cls._make_legacy_overlap_tables(context, spatial) 

148 # Create tables that store DimensionGraph definitions. 

149 dimension_group_storage = _DimensionGroupStorage.initialize(db, context, universe=universe) 

150 return cls( 

151 db=db, 

152 tables=tables, 

153 overlap_tables=overlap_tables, 

154 universe=universe, 

155 dimension_group_storage=dimension_group_storage, 

156 registry_schema_version=registry_schema_version, 

157 ) 

158 

159 def fetch_cache_dict(self) -> dict[str, DimensionRecordSet]: 

160 # Docstring inherited. 

161 result: dict[str, DimensionRecordSet] = {} 

162 with self._db.transaction(): 

163 for element in self.universe.elements: 

164 if not element.is_cached: 

165 continue 

166 assert not element.temporal, ( 

167 "Cached dimension elements should not be spatial or temporal, as that " 

168 "suggests a large number of records." 

169 ) 

170 if element.implied_union_target is not None: 

171 assert isinstance(element, Dimension), "Only dimensions can be implied dependencies." 

172 table = self._tables[element.implied_union_target.name] 

173 sql = sqlalchemy.select( 

174 table.columns[element.name].label(element.primary_key.name) 

175 ).distinct() 

176 else: 

177 table = self._tables[element.name] 

178 sql = table.select() 

179 with self._db.query(sql) as results: 

180 result[element.name] = DimensionRecordSet( 

181 element=element, 

182 records=[element.RecordClass(**row) for row in results.mappings()], 

183 ) 

184 return result 

185 

186 def insert( 

187 self, 

188 element: DimensionElement, 

189 *records: DimensionRecord, 

190 cache: DimensionRecordCache, 

191 replace: bool = False, 

192 skip_existing: bool = False, 

193 ) -> None: 

194 # Docstring inherited. 

195 if not element.has_own_table: 

196 raise TypeError(f"Cannot insert {element.name} records.") 

197 rows, overlap_insert_rows, overlap_delete_rows, overlap_summary_rows = self._make_record_db_rows( 

198 element, records, replace=replace 

199 ) 

200 table = self._tables[element.name] 

201 with cache.modifying(element.name) as cache_records: 

202 with self._db.transaction(): 

203 if replace: 

204 self._db.replace(table, *rows) 

205 elif skip_existing: 

206 self._db.ensure(table, *rows, primary_key_only=True) 

207 else: 

208 self._db.insert(table, *rows) 

209 self._insert_overlaps( 

210 element, overlap_insert_rows, overlap_delete_rows, skip_existing=skip_existing 

211 ) 

212 for related_element_name, summary_rows in overlap_summary_rows.items(): 

213 self._db.ensure(self._overlap_tables[related_element_name][0], *summary_rows) 

214 # Database transaction succeeded; update the cache to keep them 

215 # consistent. 

216 if cache_records is not None: 

217 cache_records.update(records, replace=not skip_existing) 

218 

219 def sync( 

220 self, record: DimensionRecord, cache: DimensionRecordCache, update: bool = False 

221 ) -> bool | dict[str, Any]: 

222 # Docstring inherited. 

223 if not record.definition.has_own_table: 223 ↛ 224line 223 didn't jump to line 224, because the condition on line 223 was never true

224 raise TypeError(f"Cannot sync {record.definition.name} records.") 

225 # We might not need the overlap rows at all; we won't know until we try 

226 # to insert the main row. But we figure it's better to spend the time 

227 # to compute them in advance always *outside* the database transaction 

228 # than to compute them only as-needed inside the database transaction, 

229 # since in-transaction time is especially precious. 

230 ( 

231 (compared,), 

232 overlap_insert_rows, 

233 overlap_delete_rows, 

234 overlap_summary_rows, 

235 ) = self._make_record_db_rows(record.definition, [record], replace=True) 

236 keys = {} 

237 for name in record.fields.required.names: 

238 keys[name] = compared.pop(name) 

239 with cache.modifying(record.definition.name) as cache_records: 

240 with self._db.transaction(): 

241 _, inserted_or_updated = self._db.sync( 

242 self._tables[record.definition.name], 

243 keys=keys, 

244 compared=compared, 

245 update=update, 

246 ) 

247 if inserted_or_updated: 

248 if inserted_or_updated is True: 

249 # Inserted a new row, so we just need to insert new 

250 # overlap rows (if there are any). 

251 self._insert_overlaps(record.definition, overlap_insert_rows, overlap_delete_rows=[]) 

252 elif "region" in inserted_or_updated: 252 ↛ 256line 252 didn't jump to line 256, because the condition on line 252 was never false

253 # Updated the region, so we need to delete old overlap 

254 # rows and insert new ones. 

255 self._insert_overlaps(record.definition, overlap_insert_rows, overlap_delete_rows) 

256 for related_element_name, summary_rows in overlap_summary_rows.items(): 

257 self._db.ensure(self._overlap_tables[related_element_name][0], *summary_rows) 

258 # We updated something other than a region; no need to change 

259 # the overlap regions. 

260 # Database transaction succeeded; update the cache to keep them 

261 # consistent. 

262 if cache_records is not None and inserted_or_updated: 

263 cache_records.add(record, replace=update) 

264 return inserted_or_updated 

265 

266 def fetch_one( 

267 self, 

268 element_name: str, 

269 data_id: DataCoordinate, 

270 cache: DimensionRecordCache, 

271 ) -> DimensionRecord | None: 

272 # Docstring inherited. 

273 element = self.universe[element_name] 

274 if element_name in cache: 

275 try: 

276 return cache[element_name].find(data_id) 

277 except LookupError: 

278 return None 

279 if element.implied_union_target is not None: 279 ↛ 280line 279 didn't jump to line 280, because the condition on line 279 was never true

280 assert isinstance(element, Dimension), "Only dimensions can be implied dependencies." 

281 table = self._tables[element.implied_union_target.name] 

282 sql = sqlalchemy.select(table.columns[element.name].label(element.primary_key.name)).where( 

283 table.columns[element_name] == data_id[element_name] 

284 ) 

285 elif isinstance(element, SkyPixDimension): 

286 id = data_id[element_name] 

287 return element.RecordClass(id=id, region=element.pixelization.pixel(id)) 

288 else: 

289 table = self._tables[element.name] 

290 sql = table.select().where( 

291 *[ 

292 table.columns[column_name] == data_id[dimension_name] 

293 for column_name, dimension_name in zip( 

294 element.schema.required.names, element.required.names 

295 ) 

296 ] 

297 ) 

298 with self._db.query(sql) as results: 

299 row = results.fetchone() 

300 if row is None: 

301 return None 

302 if element.temporal is not None: 

303 mapping = dict(**row._mapping) 

304 timespan = self._db.getTimespanRepresentation().extract(mapping) 

305 for name in self._db.getTimespanRepresentation().getFieldNames(): 

306 del mapping[name] 

307 mapping["timespan"] = timespan 

308 else: 

309 # MyPy says this isn't a real collections.abc.Mapping, but it 

310 # sure behaves like one. 

311 mapping = row._mapping # type: ignore 

312 return element.RecordClass(**mapping) 

313 

314 def save_dimension_group(self, graph: DimensionGroup) -> int: 

315 # Docstring inherited from DimensionRecordStorageManager. 

316 return self._dimension_group_storage.save(graph) 

317 

318 def load_dimension_group(self, key: int) -> DimensionGroup: 

319 # Docstring inherited from DimensionRecordStorageManager. 

320 return self._dimension_group_storage.load(key) 

321 

322 def join( 

323 self, 

324 element_name: str, 

325 target: Relation, 

326 join: Join, 

327 context: queries.SqlQueryContext, 

328 ) -> Relation: 

329 # Docstring inherited. 

330 element = self.universe[element_name] 

331 # We use Join.partial(...).apply(...) instead of Join.apply(..., ...) 

332 # for the "backtracking" insertion capabilities of the former; more 

333 # specifically, if `target` is a tree that starts with SQL relations 

334 # and ends with iteration-engine operations (e.g. region-overlap 

335 # postprocessing), this will try to perform the join upstream in the 

336 # SQL engine before the transfer to iteration. 

337 if element.has_own_table: 

338 return join.partial(self._make_relation(element, context)).apply(target) 

339 elif element.implied_union_target is not None: 

340 columns = DimensionKeyColumnTag(element.name) 

341 return join.partial( 

342 self._make_relation(element.implied_union_target, context) 

343 .with_only_columns( 

344 {columns}, 

345 preferred_engine=context.preferred_engine, 

346 require_preferred_engine=True, 

347 ) 

348 .without_duplicates() 

349 ).apply(target) 

350 elif isinstance(element, SkyPixDimension): 

351 assert join.predicate.as_trivial(), "Expected trivial join predicate for skypix relation." 

352 id_column = DimensionKeyColumnTag(element.name) 

353 assert id_column in target.columns, "Guaranteed by QueryBuilder.make_dimension_target." 

354 function_name = f"{element.name}_region" 

355 context.iteration_engine.functions[function_name] = element.pixelization.pixel 

356 calculation = Calculation( 

357 tag=DimensionRecordColumnTag(element.name, "region"), 

358 expression=ColumnExpression.function(function_name, ColumnExpression.reference(id_column)), 

359 ) 

360 return calculation.apply( 

361 target, preferred_engine=context.iteration_engine, transfer=True, backtrack=True 

362 ) 

363 else: 

364 raise AssertionError(f"Unexpected definition of {element_name!r}.") 

365 

366 def make_spatial_join_relation( 

367 self, 

368 element1: str, 

369 element2: str, 

370 context: queries.SqlQueryContext, 

371 existing_relationships: Set[frozenset[str]] = frozenset(), 

372 ) -> tuple[Relation, bool]: 

373 # Docstring inherited. 

374 overlap_relationship = frozenset( 

375 self.universe[element1].dimensions.names | self.universe[element2].dimensions.names 

376 ) 

377 if overlap_relationship in existing_relationships: 377 ↛ 378line 377 didn't jump to line 378, because the condition on line 377 was never true

378 return context.preferred_engine.make_join_identity_relation(), False 

379 overlaps: Relation | None = None 

380 needs_refinement: bool = False 

381 if element1 == self.universe.commonSkyPix.name: 

382 (element1, element2) = (element2, element1) 

383 

384 if element1 in self._overlap_tables: 

385 if element2 in self._overlap_tables: 

386 # Use commonSkyPix as an intermediary with post-query 

387 # refinement. 

388 have_overlap1_already = ( 

389 frozenset(self.universe[element1].dimensions.names | {self.universe.commonSkyPix.name}) 

390 in existing_relationships 

391 ) 

392 have_overlap2_already = ( 

393 frozenset(self.universe[element2].dimensions.names | {self.universe.commonSkyPix.name}) 

394 in existing_relationships 

395 ) 

396 overlap1 = context.preferred_engine.make_join_identity_relation() 

397 overlap2 = context.preferred_engine.make_join_identity_relation() 

398 if not have_overlap1_already: 

399 overlap1 = self._make_common_skypix_join_relation(self.universe[element1], context) 

400 if not have_overlap2_already: 

401 overlap2 = self._make_common_skypix_join_relation(self.universe[element2], context) 

402 overlaps = overlap1.join(overlap2) 

403 if not have_overlap1_already and not have_overlap2_already: 

404 # Drop the common skypix ID column from the overlap 

405 # relation we return, since we don't want that column 

406 # to be mistakenly equated with any other appearance of 

407 # that column, since this would mangle queries like 

408 # "join visit to tract and tract to healpix10", by 

409 # incorrectly requiring all visits and healpix10 pixels 

410 # share common skypix pixels, not just tracts. 

411 columns = set(overlaps.columns) 

412 columns.remove(DimensionKeyColumnTag(self.universe.commonSkyPix.name)) 

413 overlaps = overlaps.with_only_columns(columns) 

414 needs_refinement = True 

415 elif element2 == self.universe.commonSkyPix.name: 415 ↛ 417line 415 didn't jump to line 417, because the condition on line 415 was never false

416 overlaps = self._make_common_skypix_join_relation(self.universe[element1], context) 

417 if overlaps is None: 

418 # In the future, there's a lot more we could try here: 

419 # 

420 # - for skypix dimensions, looking for materialized overlaps at 

421 # smaller spatial scales (higher-levels) and using bit-shifting; 

422 # 

423 # - for non-skypix dimensions, looking for materialized overlaps 

424 # for more finer-grained members of the same family, and then 

425 # doing SELECT DISTINCT (or even tolerating duplicates) on the 

426 # columns we care about (e.g. use patch overlaps to satisfy a 

427 # request for tract overlaps). 

428 # 

429 # It's not obvious that's better than just telling the user to 

430 # materialize more overlaps, though. 

431 raise MissingSpatialOverlapError( 

432 f"No materialized overlaps for spatial join between {element1!r} and {element2!r}." 

433 ) 

434 return overlaps, needs_refinement 

435 

436 def _make_relation( 

437 self, 

438 element: DimensionElement, 

439 context: queries.SqlQueryContext, 

440 ) -> Relation: 

441 table = self._tables[element.name] 

442 payload = sql.Payload[LogicalColumn](table) 

443 for tag, field_name in element.RecordClass.fields.columns.items(): 

444 if field_name == "timespan": 

445 payload.columns_available[tag] = self._db.getTimespanRepresentation().from_columns( 

446 table.columns, name=field_name 

447 ) 

448 else: 

449 payload.columns_available[tag] = table.columns[field_name] 

450 return context.sql_engine.make_leaf( 

451 payload.columns_available.keys(), 

452 name=element.name, 

453 payload=payload, 

454 ) 

455 

456 def _make_common_skypix_join_relation( 

457 self, 

458 element: DimensionElement, 

459 context: queries.SqlQueryContext, 

460 ) -> Relation: 

461 """Construct a subquery expression containing overlaps between the 

462 common skypix dimension and the given dimension element. 

463 

464 Parameters 

465 ---------- 

466 element : `DimensionElement` 

467 Spatial dimension element whose overlaps with the common skypix 

468 system are represented by the returned relation. 

469 context : `.queries.SqlQueryContext` 

470 Object that manages relation engines and database-side state 

471 (e.g. temporary tables) for the query. 

472 

473 Returns 

474 ------- 

475 relation : `sql.Relation` 

476 Join relation. 

477 """ 

478 assert element.spatial is not None, "Only called for spatial dimension elements." 

479 assert element.has_own_table, "Only called for dimension elements with their own tables." 

480 _, table = self._overlap_tables[element.name] 

481 payload = sql.Payload[LogicalColumn](table) 

482 payload.columns_available[ 

483 DimensionKeyColumnTag(self.universe.commonSkyPix.name) 

484 ] = payload.from_clause.columns.skypix_index 

485 for dimension_name in element.graph.required.names: 

486 payload.columns_available[DimensionKeyColumnTag(dimension_name)] = payload.from_clause.columns[ 

487 dimension_name 

488 ] 

489 payload.where.append(table.columns.skypix_system == self.universe.commonSkyPix.system.name) 

490 payload.where.append(table.columns.skypix_level == self.universe.commonSkyPix.level) 

491 leaf = context.sql_engine.make_leaf( 

492 payload.columns_available.keys(), 

493 name=f"{element.name}_{self.universe.commonSkyPix.name}_overlap", 

494 payload=payload, 

495 ) 

496 return leaf 

497 

498 @classmethod 

499 def currentVersions(cls) -> list[VersionTuple]: 

500 # Docstring inherited from VersionedExtension. 

501 return [_VERSION] 

502 

503 @classmethod 

504 def _make_skypix_overlap_tables( 

505 cls, context: StaticTablesContext, element: DimensionElement 

506 ) -> tuple[sqlalchemy.Table, sqlalchemy.Table]: 

507 assert element.governor is not None 

508 summary_spec = ddl.TableSpec( 

509 fields=[ 

510 ddl.FieldSpec( 

511 name="skypix_system", 

512 dtype=sqlalchemy.String, 

513 length=16, 

514 nullable=False, 

515 primaryKey=True, 

516 ), 

517 ddl.FieldSpec( 

518 name="skypix_level", 

519 dtype=sqlalchemy.SmallInteger, 

520 nullable=False, 

521 primaryKey=True, 

522 ), 

523 ] 

524 ) 

525 addDimensionForeignKey(summary_spec, element.governor, primaryKey=True) 

526 overlap_spec = ddl.TableSpec( 

527 fields=[ 

528 ddl.FieldSpec( 

529 name="skypix_system", 

530 dtype=sqlalchemy.String, 

531 length=16, 

532 nullable=False, 

533 primaryKey=True, 

534 ), 

535 ddl.FieldSpec( 

536 name="skypix_level", 

537 dtype=sqlalchemy.SmallInteger, 

538 nullable=False, 

539 primaryKey=True, 

540 ), 

541 # (more columns added below) 

542 ], 

543 unique=set(), 

544 indexes={ 

545 # This index has the same fields as the PK, in a different 

546 # order, to facilitate queries that know skypix_index and want 

547 # to find the other element. 

548 ddl.IndexSpec( 

549 "skypix_system", 

550 "skypix_level", 

551 "skypix_index", 

552 *element.graph.required.names, 

553 ), 

554 }, 

555 foreignKeys=[ 

556 # Foreign key to summary table. This makes sure we don't 

557 # materialize any overlaps without remembering that we've done 

558 # so in the summary table, though it can't prevent the converse 

559 # of adding a summary row without adding overlap row (either of 

560 # those is a logic bug, of course, but we want to be defensive 

561 # about those). Using ON DELETE CASCADE, it'd be very easy to 

562 # implement "disabling" an overlap materialization, because we 

563 # can just delete the summary row. 

564 # Note that the governor dimension column is added below, in 

565 # the call to addDimensionForeignKey. 

566 ddl.ForeignKeySpec( 

567 f"{element.name}_skypix_overlap_summary", 

568 source=("skypix_system", "skypix_level", element.governor.name), 

569 target=("skypix_system", "skypix_level", element.governor.name), 

570 onDelete="CASCADE", 

571 ), 

572 ], 

573 ) 

574 # Add fields for the standard element this class manages overlaps for. 

575 # This is guaranteed to add a column for the governor dimension, 

576 # because that's a required dependency of element. 

577 for dimension in element.required: 

578 addDimensionForeignKey(overlap_spec, dimension, primaryKey=True) 

579 # Add field for the actual skypix index. We do this later because I 

580 # think we care (at least a bit) about the order in which the primary 

581 # key is defined, in that we want a non-summary column like this one 

582 # to appear after the governor dimension column. 

583 overlap_spec.fields.add( 

584 ddl.FieldSpec( 

585 name="skypix_index", 

586 dtype=sqlalchemy.BigInteger, 

587 nullable=False, 

588 primaryKey=True, 

589 ) 

590 ) 

591 return ( 

592 context.addTable(f"{element.name}_skypix_overlap_summary", summary_spec), 

593 context.addTable(f"{element.name}_skypix_overlap", overlap_spec), 

594 ) 

595 

596 @classmethod 

597 def _make_legacy_overlap_tables( 

598 cls, 

599 context: StaticTablesContext, 

600 spatial: NamedKeyDict[DatabaseTopologicalFamily, list[DimensionElement]], 

601 ) -> None: 

602 for (_, elements1), (_, elements2) in itertools.combinations(spatial.items(), 2): 

603 for element1, element2 in itertools.product(elements1, elements2): 

604 if element1 > element2: 604 ↛ 605line 604 didn't jump to line 605, because the condition on line 604 was never true

605 (element2, element1) = (element1, element2) 

606 assert element1.spatial is not None and element2.spatial is not None 

607 assert element1.governor != element2.governor 

608 assert element1.governor is not None and element2.governor is not None 

609 summary_spec = ddl.TableSpec(fields=[]) 

610 addDimensionForeignKey(summary_spec, element1.governor, primaryKey=True) 

611 addDimensionForeignKey(summary_spec, element2.governor, primaryKey=True) 

612 context.addTable(f"{element1.name}_{element2.name}_overlap_summary", summary_spec) 

613 overlap_spec = ddl.TableSpec(fields=[]) 

614 addDimensionForeignKey(overlap_spec, element1.governor, primaryKey=True) 

615 addDimensionForeignKey(overlap_spec, element2.governor, primaryKey=True) 

616 for dimension in element1.required: 

617 if dimension != element1.governor: 

618 addDimensionForeignKey(overlap_spec, dimension, primaryKey=True) 

619 for dimension in element2.required: 

620 if dimension != element2.governor: 

621 addDimensionForeignKey(overlap_spec, dimension, primaryKey=True) 

622 context.addTable(f"{element1.name}_{element2.name}_overlap", overlap_spec) 

623 

624 def _make_record_db_rows( 

625 self, element: DimensionElement, records: Sequence[DimensionRecord], replace: bool 

626 ) -> tuple[ 

627 list[dict[str, Any]], 

628 list[dict[str, Any]], 

629 list[dict[str, Any]], 

630 dict[str, list[dict[str, Any]]], 

631 ]: 

632 rows = [record.toDict() for record in records] 

633 if element.temporal is not None: 

634 TimespanReprClass = self._db.getTimespanRepresentation() 

635 for row in rows: 

636 timespan = row.pop("timespan") 

637 TimespanReprClass.update(timespan, result=row) 

638 overlap_delete_rows = [] 

639 overlap_insert_rows = [] 

640 if element.spatial is not None: 

641 overlap_insert_rows = self._compute_common_skypix_overlap_inserts(element, records) 

642 if replace: 

643 overlap_delete_rows = self._compute_common_skypix_overlap_deletes(records) 

644 overlap_summary_rows = {} 

645 if element in self.universe.governor_dimensions: 

646 for related_element_name in self._overlap_tables.keys(): 

647 if self.universe[related_element_name].governor == element: 

648 overlap_summary_rows[related_element_name] = [ 

649 { 

650 "skypix_system": self.universe.commonSkyPix.system.name, 

651 "skypix_level": self.universe.commonSkyPix.level, 

652 element.name: record.dataId[element.name], 

653 } 

654 for record in records 

655 ] 

656 return rows, overlap_insert_rows, overlap_delete_rows, overlap_summary_rows 

657 

658 def _compute_common_skypix_overlap_deletes( 

659 self, records: Sequence[DimensionRecord] 

660 ) -> list[dict[str, Any]]: 

661 return [ 

662 { 

663 "skypix_system": self.universe.commonSkyPix.system.name, 

664 "skypix_level": self.universe.commonSkyPix.level, 

665 **record.dataId.required, 

666 } 

667 for record in records 

668 ] 

669 

670 def _compute_common_skypix_overlap_inserts( 

671 self, 

672 element: DimensionElement, 

673 records: Sequence[DimensionRecord], 

674 ) -> list[dict[str, Any]]: 

675 _LOG.debug("Precomputing common skypix overlaps for %s.", element.name) 

676 overlap_records: list[dict[str, Any]] = [] 

677 for record in records: 

678 if record.region is None: 

679 continue 

680 base_overlap_record = dict(record.dataId.required) 

681 base_overlap_record["skypix_system"] = self.universe.commonSkyPix.system.name 

682 base_overlap_record["skypix_level"] = self.universe.commonSkyPix.level 

683 for begin, end in self.universe.commonSkyPix.pixelization.envelope(record.region): 

684 for index in range(begin, end): 

685 overlap_records.append({"skypix_index": index, **base_overlap_record}) 

686 return overlap_records 

687 

688 def _insert_overlaps( 

689 self, 

690 element: DimensionElement, 

691 overlap_insert_rows: list[dict[str, Any]], 

692 overlap_delete_rows: list[dict[str, Any]], 

693 skip_existing: bool = False, 

694 ) -> None: 

695 if overlap_delete_rows: 

696 # Since any of the new records might have replaced existing ones 

697 # that already have overlap records, and we don't know which, we 

698 # have no choice but to delete all overlaps for these records and 

699 # recompute them. We include the skypix_system and skypix_level 

700 # column values explicitly instead of just letting the query search 

701 # for all of those related to the given records, because they are 

702 # the first columns in the primary key, and hence searching with 

703 # them will be way faster (and we don't want to add a new index 

704 # just for this operation). 

705 _LOG.debug("Deleting old common skypix overlaps for %s.", element.name) 

706 self._db.delete( 

707 self._overlap_tables[element.name][1], 

708 ["skypix_system", "skypix_level"] + list(element.minimal_group.required), 

709 *overlap_delete_rows, 

710 ) 

711 if overlap_insert_rows: 

712 _LOG.debug("Inserting %d new skypix overlap rows for %s.", len(overlap_insert_rows), element.name) 

713 if skip_existing: 

714 self._db.ensure( 

715 self._overlap_tables[element.name][1], *overlap_insert_rows, primary_key_only=True 

716 ) 

717 else: 

718 self._db.insert(self._overlap_tables[element.name][1], *overlap_insert_rows) 

719 # We have only ever put overlaps with the commonSkyPix system into 

720 # this table, and *probably* only ever will. But the schema leaves 

721 # open the possibility that we should be inserting overlaps for 

722 # some other skypix system, as we once thought we'd support. In 

723 # case that door opens again in the future, we need to check the 

724 # "overlap summary" table to see if are any skypix systems other 

725 # than the common skypix system and raise (rolling back the entire 

726 # transaction) if there are. 

727 summary_table = self._overlap_tables[element.name][0] 

728 check_sql = ( 

729 sqlalchemy.sql.select(summary_table.columns.skypix_system, summary_table.columns.skypix_level) 

730 .select_from(summary_table) 

731 .where( 

732 sqlalchemy.sql.not_( 

733 sqlalchemy.sql.and_( 

734 summary_table.columns.skypix_system == self.universe.commonSkyPix.system.name, 

735 summary_table.columns.skypix_level == self.universe.commonSkyPix.level, 

736 ) 

737 ) 

738 ) 

739 ) 

740 with self._db.query(check_sql) as sql_result: 

741 bad_summary_rows = sql_result.fetchall() 

742 if bad_summary_rows: 742 ↛ 743line 742 didn't jump to line 743, because the condition on line 742 was never true

743 bad_skypix_names = [f"{row.skypix_system}{row.skypix.level}" for row in bad_summary_rows] 

744 raise RuntimeError( 

745 f"Data repository has overlaps between {element} and {bad_skypix_names} that " 

746 "are not supported by this version of daf_butler. Please use a newer version." 

747 ) 

748 

749 

750class _DimensionGroupStorage: 

751 """Helper object that manages saved DimensionGroup definitions. 

752 

753 Should generally be constructed by calling `initialize` instead of invoking 

754 the constructor directly. 

755 

756 Parameters 

757 ---------- 

758 db : `Database` 

759 Interface to the underlying database engine and namespace. 

760 idTable : `sqlalchemy.schema.Table` 

761 Table that just holds unique IDs for dimension graphs. 

762 definitionTable : `sqlalchemy.schema.Table` 

763 Table that maps dimension names to the IDs of the dimension graphs to 

764 which they belong. 

765 universe : `DimensionUniverse` 

766 All known dimensions. 

767 """ 

768 

769 def __init__( 

770 self, 

771 db: Database, 

772 idTable: sqlalchemy.schema.Table, 

773 definitionTable: sqlalchemy.schema.Table, 

774 universe: DimensionUniverse, 

775 ): 

776 self._db = db 

777 self._idTable = idTable 

778 self._definitionTable = definitionTable 

779 self._universe = universe 

780 self._keysByGroup: dict[DimensionGroup, int] = {universe.empty.as_group(): 0} 

781 self._groupsByKey: dict[int, DimensionGroup] = {0: universe.empty.as_group()} 

782 

783 @classmethod 

784 def initialize( 

785 cls, 

786 db: Database, 

787 context: StaticTablesContext, 

788 *, 

789 universe: DimensionUniverse, 

790 ) -> _DimensionGroupStorage: 

791 """Construct a new instance, including creating tables if necessary. 

792 

793 Parameters 

794 ---------- 

795 db : `Database` 

796 Interface to the underlying database engine and namespace. 

797 context : `StaticTablesContext` 

798 Context object obtained from `Database.declareStaticTables`; used 

799 to declare any tables that should always be present. 

800 universe : `DimensionUniverse` 

801 All known dimensions. 

802 

803 Returns 

804 ------- 

805 storage : `_DimensionGroupStorage` 

806 New instance of this class. 

807 """ 

808 # We need two tables just so we have one where the autoincrement key is 

809 # the only primary key column, as is required by (at least) SQLite. In 

810 # other databases, we might be able to use a Sequence directly. 

811 idTable = context.addTable( 

812 "dimension_graph_key", 

813 ddl.TableSpec( 

814 fields=[ 

815 ddl.FieldSpec( 

816 name="id", 

817 dtype=sqlalchemy.BigInteger, 

818 autoincrement=True, 

819 primaryKey=True, 

820 ), 

821 ], 

822 ), 

823 ) 

824 definitionTable = context.addTable( 

825 "dimension_graph_definition", 

826 ddl.TableSpec( 

827 fields=[ 

828 ddl.FieldSpec(name="dimension_graph_id", dtype=sqlalchemy.BigInteger, primaryKey=True), 

829 ddl.FieldSpec(name="dimension_name", dtype=sqlalchemy.Text, primaryKey=True), 

830 ], 

831 foreignKeys=[ 

832 ddl.ForeignKeySpec( 

833 "dimension_graph_key", 

834 source=("dimension_graph_id",), 

835 target=("id",), 

836 onDelete="CASCADE", 

837 ), 

838 ], 

839 ), 

840 ) 

841 return cls(db, idTable, definitionTable, universe=universe) 

842 

843 def refresh(self) -> None: 

844 """Refresh the in-memory cache of saved DimensionGraph definitions. 

845 

846 This should be done automatically whenever needed, but it can also 

847 be called explicitly. 

848 """ 

849 dimensionNamesByKey: dict[int, set[str]] = defaultdict(set) 

850 with self._db.query(self._definitionTable.select()) as sql_result: 

851 sql_rows = sql_result.mappings().fetchall() 

852 for row in sql_rows: 

853 key = row[self._definitionTable.columns.dimension_graph_id] 

854 dimensionNamesByKey[key].add(row[self._definitionTable.columns.dimension_name]) 

855 keysByGraph: dict[DimensionGroup, int] = {self._universe.empty.as_group(): 0} 

856 graphsByKey: dict[int, DimensionGroup] = {0: self._universe.empty.as_group()} 

857 for key, dimensionNames in dimensionNamesByKey.items(): 

858 graph = DimensionGroup(self._universe, names=dimensionNames) 

859 keysByGraph[graph] = key 

860 graphsByKey[key] = graph 

861 self._groupsByKey = graphsByKey 

862 self._keysByGroup = keysByGraph 

863 

864 def save(self, group: DimensionGroup) -> int: 

865 """Save a `DimensionGraph` definition to the database, allowing it to 

866 be retrieved later via the returned key. 

867 

868 Parameters 

869 ---------- 

870 group : `DimensionGroup` 

871 Set of dimensions to save. 

872 

873 Returns 

874 ------- 

875 key : `int` 

876 Integer used as the unique key for this `DimensionGraph` in the 

877 database. 

878 """ 

879 key = self._keysByGroup.get(group) 

880 if key is not None: 

881 return key 

882 # Lock tables and then refresh to guard against races where some other 

883 # process is trying to register the exact same dimension graph. This 

884 # is probably not the most efficient way to do it, but it should be a 

885 # rare operation, especially since the short-circuit above will usually 

886 # work in long-lived data repositories. 

887 with self._db.transaction(lock=[self._idTable, self._definitionTable]): 

888 self.refresh() 

889 key = self._keysByGroup.get(group) 

890 if key is None: 

891 (key,) = self._db.insert(self._idTable, {}, returnIds=True) # type: ignore 

892 self._db.insert( 

893 self._definitionTable, 

894 *[{"dimension_graph_id": key, "dimension_name": name} for name in group.required], 

895 ) 

896 self._keysByGroup[group] = key 

897 self._groupsByKey[key] = group 

898 return key 

899 

900 def load(self, key: int) -> DimensionGroup: 

901 """Retrieve a `DimensionGraph` that was previously saved in the 

902 database. 

903 

904 Parameters 

905 ---------- 

906 key : `int` 

907 Integer used as the unique key for this `DimensionGraph` in the 

908 database. 

909 

910 Returns 

911 ------- 

912 graph : `DimensionGraph` 

913 Retrieved graph. 

914 """ 

915 graph = self._groupsByKey.get(key) 

916 if graph is None: 

917 self.refresh() 

918 graph = self._groupsByKey[key] 

919 return graph