Coverage for python/lsst/daf/butler/registry/dimensions/table.py: 86%

237 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2022-09-30 02:18 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ["TableDimensionRecordStorage"] 

24 

25import itertools 

26import logging 

27import warnings 

28from collections import defaultdict 

29from collections.abc import Iterable, Iterator, Mapping, Sequence, Set 

30from typing import Any 

31 

32import sqlalchemy 

33from lsst.utils.ellipsis import Ellipsis, EllipsisType 

34 

35from ...core import ( 

36 DatabaseDimensionElement, 

37 DataCoordinate, 

38 DataCoordinateIterable, 

39 DimensionElement, 

40 DimensionRecord, 

41 GovernorDimension, 

42 NamedKeyDict, 

43 NamedKeyMapping, 

44 NamedValueSet, 

45 SimpleQuery, 

46 SkyPixDimension, 

47 SkyPixSystem, 

48 TimespanDatabaseRepresentation, 

49 addDimensionForeignKey, 

50 ddl, 

51) 

52from ..interfaces import ( 

53 Database, 

54 DatabaseDimensionOverlapStorage, 

55 DatabaseDimensionRecordStorage, 

56 GovernorDimensionRecordStorage, 

57 StaticTablesContext, 

58) 

59from ..queries import QueryBuilder 

60 

61_LOG = logging.getLogger(__name__) 

62 

63 

64MAX_FETCH_CHUNK = 1000 

65"""Maximum number of data IDs we fetch records at a time. 

66 

67Barring something database-engine-specific, this sets the size of the actual 

68SQL query, not just the number of result rows, because the only way to query 

69for multiple data IDs in a single SELECT query via SQLAlchemy is to have an OR 

70term in the WHERE clause for each one. 

71""" 

72 

73 

74class TableDimensionRecordStorage(DatabaseDimensionRecordStorage): 

75 """A record storage implementation uses a regular database table. 

76 

77 Parameters 

78 ---------- 

79 db : `Database` 

80 Interface to the database engine and namespace that will hold these 

81 dimension records. 

82 element : `DatabaseDimensionElement` 

83 The element whose records this storage will manage. 

84 table : `sqlalchemy.schema.Table` 

85 The logical table for the element. 

86 skyPixOverlap : `_SkyPixOverlapStorage`, optional 

87 Object that manages the tables that hold materialized spatial overlap 

88 joins to skypix dimensions. Should be `None` if (and only if) 

89 ``element.spatial is None``. 

90 """ 

91 

92 def __init__( 

93 self, 

94 db: Database, 

95 element: DatabaseDimensionElement, 

96 *, 

97 table: sqlalchemy.schema.Table, 

98 skyPixOverlap: _SkyPixOverlapStorage | None = None, 

99 ): 

100 self._db = db 

101 self._table = table 

102 self._element = element 

103 self._fetchColumns: dict[str, sqlalchemy.sql.ColumnElement] = { 

104 dimension.name: self._table.columns[name] 

105 for dimension, name in zip( 

106 self._element.dimensions, self._element.RecordClass.fields.dimensions.names 

107 ) 

108 } 

109 self._skyPixOverlap = skyPixOverlap 

110 self._otherOverlaps: list[DatabaseDimensionOverlapStorage] = [] 

111 

112 @classmethod 

113 def initialize( 

114 cls, 

115 db: Database, 

116 element: DatabaseDimensionElement, 

117 *, 

118 context: StaticTablesContext | None = None, 

119 config: Mapping[str, Any], 

120 governors: NamedKeyMapping[GovernorDimension, GovernorDimensionRecordStorage], 

121 ) -> DatabaseDimensionRecordStorage: 

122 # Docstring inherited from DatabaseDimensionRecordStorage. 

123 spec = element.RecordClass.fields.makeTableSpec(TimespanReprClass=db.getTimespanRepresentation()) 

124 if context is not None: 124 ↛ 127line 124 didn't jump to line 127, because the condition on line 124 was never false

125 table = context.addTable(element.name, spec) 

126 else: 

127 table = db.ensureTableExists(element.name, spec) 

128 skyPixOverlap: _SkyPixOverlapStorage | None 

129 if element.spatial is not None: 

130 governor = governors[element.spatial.governor] 

131 skyPixOverlap = _SkyPixOverlapStorage.initialize( 

132 db, 

133 element, 

134 context=context, 

135 governor=governor, 

136 ) 

137 result = cls(db, element, table=table, skyPixOverlap=skyPixOverlap) 

138 

139 # Whenever anyone inserts a new governor dimension value, we want 

140 # to enable overlaps for that value between this element and 

141 # commonSkyPix. 

142 def callback(record: DimensionRecord) -> None: 

143 skyPixOverlap.enable( # type: ignore 

144 result, 

145 element.universe.commonSkyPix, 

146 getattr(record, element.spatial.governor.primaryKey.name), # type: ignore 

147 ) 

148 

149 governor.registerInsertionListener(callback) 

150 return result 

151 else: 

152 return cls(db, element, table=table) 

153 

154 @property 

155 def element(self) -> DatabaseDimensionElement: 

156 # Docstring inherited from DimensionRecordStorage.element. 

157 return self._element 

158 

159 def clearCaches(self) -> None: 

160 # Docstring inherited from DimensionRecordStorage.clearCaches. 

161 pass 

162 

163 def join( 

164 self, 

165 builder: QueryBuilder, 

166 *, 

167 regions: NamedKeyDict[DimensionElement, sqlalchemy.sql.ColumnElement] | None = None, 

168 timespans: NamedKeyDict[DimensionElement, TimespanDatabaseRepresentation] | None = None, 

169 ) -> None: 

170 # Docstring inherited from DimensionRecordStorage. 

171 if regions is not None: 

172 dimensions = NamedValueSet(self.element.required) 

173 dimensions.add(self.element.universe.commonSkyPix) 

174 assert self._skyPixOverlap is not None 

175 builder.joinTable( 

176 self._skyPixOverlap.select(self.element.universe.commonSkyPix, Ellipsis), 

177 dimensions, 

178 ) 

179 regionsInTable = self._table.columns["region"] 

180 regions[self.element] = regionsInTable 

181 joinOn = builder.startJoin( 

182 self._table, self.element.dimensions, self.element.RecordClass.fields.dimensions.names 

183 ) 

184 if timespans is not None: 

185 timespanInTable = self._db.getTimespanRepresentation().from_columns(self._table.columns) 

186 for timespanInQuery in timespans.values(): 186 ↛ 187line 186 didn't jump to line 187, because the loop on line 186 never started

187 joinOn.append(timespanInQuery.overlaps(timespanInTable)) 

188 timespans[self.element] = timespanInTable 

189 builder.finishJoin(self._table, joinOn) 

190 return self._table 

191 

192 def fetch(self, dataIds: DataCoordinateIterable) -> Iterable[DimensionRecord]: 

193 # Docstring inherited from DimensionRecordStorage.fetch. 

194 RecordClass = self.element.RecordClass 

195 query = SimpleQuery() 

196 query.columns.extend(self._table.columns[name] for name in RecordClass.fields.standard.names) 

197 if self.element.spatial is not None: 

198 query.columns.append(self._table.columns["region"]) 

199 if self.element.temporal is not None: 

200 TimespanReprClass = self._db.getTimespanRepresentation() 

201 query.columns.extend(self._table.columns[name] for name in TimespanReprClass.getFieldNames()) 

202 query.join(self._table) 

203 dataIds.constrain(query, lambda name: self._fetchColumns[name]) 

204 with warnings.catch_warnings(): 

205 # Some of our generated queries may contain cartesian joins, this 

206 # is not a serious issue as it is properly constrained, so we want 

207 # to suppress sqlalchemy warnings. 

208 warnings.filterwarnings( 

209 "ignore", 

210 message="SELECT statement has a cartesian product", 

211 category=sqlalchemy.exc.SAWarning, 

212 ) 

213 for row in self._db.query(query.combine()): 

214 values = row._asdict() 

215 if self.element.temporal is not None: 

216 values[TimespanDatabaseRepresentation.NAME] = TimespanReprClass.extract(values) 

217 yield RecordClass(**values) 

218 

219 def insert(self, *records: DimensionRecord, replace: bool = False, skip_existing: bool = False) -> None: 

220 # Docstring inherited from DimensionRecordStorage.insert. 

221 elementRows = [record.toDict() for record in records] 

222 if self.element.temporal is not None: 

223 TimespanReprClass = self._db.getTimespanRepresentation() 

224 for row in elementRows: 

225 timespan = row.pop(TimespanDatabaseRepresentation.NAME) 

226 TimespanReprClass.update(timespan, result=row) 

227 with self._db.transaction(): 

228 if replace: 

229 self._db.replace(self._table, *elementRows) 

230 elif skip_existing: 

231 self._db.ensure(self._table, *elementRows, primary_key_only=True) 

232 else: 

233 self._db.insert(self._table, *elementRows) 

234 if self._skyPixOverlap is not None: 

235 self._skyPixOverlap.insert(records, replace=replace, skip_existing=skip_existing) 

236 

237 def sync(self, record: DimensionRecord, update: bool = False) -> bool | dict[str, Any]: 

238 # Docstring inherited from DimensionRecordStorage.sync. 

239 compared = record.toDict() 

240 keys = {} 

241 for name in record.fields.required.names: 

242 keys[name] = compared.pop(name) 

243 if self.element.temporal is not None: 

244 TimespanReprClass = self._db.getTimespanRepresentation() 

245 timespan = compared.pop(TimespanDatabaseRepresentation.NAME) 

246 TimespanReprClass.update(timespan, result=compared) 

247 with self._db.transaction(): 

248 _, inserted_or_updated = self._db.sync( 

249 self._table, 

250 keys=keys, 

251 compared=compared, 

252 update=update, 

253 ) 

254 if inserted_or_updated and self._skyPixOverlap is not None: 

255 if inserted_or_updated is True: 

256 # Inserted a new row, so we just need to insert new overlap 

257 # rows. 

258 self._skyPixOverlap.insert([record]) 

259 elif "region" in inserted_or_updated: 259 ↛ 247line 259 didn't jump to line 247

260 # Updated the region, so we need to delete old overlap rows 

261 # and insert new ones. 

262 # (mypy should be able to tell that inserted_or_updated 

263 # must be a dict if we get to this clause, but it can't) 

264 self._skyPixOverlap.insert([record], replace=True) 

265 # We updated something other than a region. 

266 return inserted_or_updated 

267 

268 def digestTables(self) -> Iterable[sqlalchemy.schema.Table]: 

269 # Docstring inherited from DimensionRecordStorage.digestTables. 

270 result = [self._table] 

271 if self._skyPixOverlap is not None: 

272 result.extend(self._skyPixOverlap.digestTables()) 

273 return result 

274 

275 def connect(self, overlaps: DatabaseDimensionOverlapStorage) -> None: 

276 # Docstring inherited from DatabaseDimensionRecordStorage. 

277 self._otherOverlaps.append(overlaps) 

278 

279 

280class _SkyPixOverlapStorage: 

281 """A helper object for `TableDimensionRecordStorage` that manages its 

282 materialized overlaps with skypix dimensions. 

283 

284 New instances should be constructed by calling `initialize`, not by calling 

285 the constructor directly. 

286 

287 Parameters 

288 ---------- 

289 db : `Database` 

290 Interface to the underlying database engine and namespace. 

291 element : `DatabaseDimensionElement` 

292 Dimension element whose overlaps are to be managed. 

293 summaryTable : `sqlalchemy.schema.Table` 

294 Table that records which combinations of skypix dimensions and 

295 governor dimension values have materialized overlap rows. 

296 overlapTable : `sqlalchemy.schema.Table` 

297 Table containing the actual materialized overlap rows. 

298 governor : `GovernorDimensionRecordStorage` 

299 Record storage backend for this element's governor dimension. 

300 

301 Notes 

302 ----- 

303 This class (and most importantly, the tables it relies on) can in principle 

304 manage overlaps between with any skypix dimension, but at present it is 

305 only being used to manage relationships with the special ``commonSkyPix`` 

306 dimension, because that's all the query system uses. Eventually, we expect 

307 to require users to explicitly materialize all relationships they will 

308 want to use in queries. 

309 

310 Other possible future improvements include: 

311 

312 - allowing finer-grained skypix dimensions to provide overlap rows for 

313 coarser ones, by dividing indices by powers of 4 (and possibly doing 

314 ``SELECT DISTINCT`` in the subquery to remove duplicates); 

315 

316 - allowing finer-grained database elements (e.g. patch) to provide overlap 

317 rows for coarser ones (e.g. tract), by ignoring irrelevant columns 

318 (e.g. the patch IDs) in the subquery (again, possible with 

319 ``SELECT DISTINCT``). 

320 

321 But there's no point to doing any of that until the query system can 

322 figure out how best to ask for overlap rows when an exact match isn't 

323 available. 

324 """ 

325 

326 def __init__( 

327 self, 

328 db: Database, 

329 element: DatabaseDimensionElement, 

330 summaryTable: sqlalchemy.schema.Table, 

331 overlapTable: sqlalchemy.schema.Table, 

332 governor: GovernorDimensionRecordStorage, 

333 ): 

334 self._db = db 

335 self.element = element 

336 assert element.spatial is not None 

337 self._summaryTable = summaryTable 

338 self._overlapTable = overlapTable 

339 self._governor = governor 

340 

341 @classmethod 

342 def initialize( 

343 cls, 

344 db: Database, 

345 element: DatabaseDimensionElement, 

346 *, 

347 context: StaticTablesContext | None, 

348 governor: GovernorDimensionRecordStorage, 

349 ) -> _SkyPixOverlapStorage: 

350 """Construct a new instance, creating tables as needed. 

351 

352 Parameters 

353 ---------- 

354 db : `Database` 

355 Interface to the underlying database engine and namespace. 

356 element : `DatabaseDimensionElement` 

357 Dimension element whose overlaps are to be managed. 

358 context : `StaticTablesContext`, optional 

359 If provided, an object to use to create any new tables. If not 

360 provided, ``db.ensureTableExists`` should be used instead. 

361 governor : `GovernorDimensionRecordStorage` 

362 Record storage backend for this element's governor dimension. 

363 """ 

364 if context is not None: 364 ↛ 367line 364 didn't jump to line 367, because the condition on line 364 was never false

365 op = context.addTable 

366 else: 

367 op = db.ensureTableExists 

368 summaryTable = op( 

369 cls._SUMMARY_TABLE_NAME_SPEC.format(element=element), 

370 cls._makeSummaryTableSpec(element), 

371 ) 

372 overlapTable = op( 

373 cls._OVERLAP_TABLE_NAME_SPEC.format(element=element), 

374 cls._makeOverlapTableSpec(element), 

375 ) 

376 return _SkyPixOverlapStorage( 

377 db, element, summaryTable=summaryTable, overlapTable=overlapTable, governor=governor 

378 ) 

379 

380 _SUMMARY_TABLE_NAME_SPEC = "{element.name}_skypix_overlap_summary" 

381 

382 @classmethod 

383 def _makeSummaryTableSpec(cls, element: DatabaseDimensionElement) -> ddl.TableSpec: 

384 """Create a specification for the table that records which combinations 

385 of skypix dimension and governor value have materialized overlaps. 

386 

387 Parameters 

388 ---------- 

389 element : `DatabaseDimensionElement` 

390 Dimension element whose overlaps are to be managed. 

391 

392 Returns 

393 ------- 

394 tableSpec : `ddl.TableSpec` 

395 Table specification. 

396 """ 

397 assert element.spatial is not None 

398 tableSpec = ddl.TableSpec( 

399 fields=[ 

400 ddl.FieldSpec( 

401 name="skypix_system", 

402 dtype=sqlalchemy.String, 

403 length=16, 

404 nullable=False, 

405 primaryKey=True, 

406 ), 

407 ddl.FieldSpec( 

408 name="skypix_level", 

409 dtype=sqlalchemy.SmallInteger, 

410 nullable=False, 

411 primaryKey=True, 

412 ), 

413 ] 

414 ) 

415 addDimensionForeignKey(tableSpec, element.spatial.governor, primaryKey=True) 

416 return tableSpec 

417 

418 _OVERLAP_TABLE_NAME_SPEC = "{element.name}_skypix_overlap" 

419 

420 @classmethod 

421 def _makeOverlapTableSpec(cls, element: DatabaseDimensionElement) -> ddl.TableSpec: 

422 """Create a specification for the table that holds materialized 

423 overlap rows. 

424 

425 Parameters 

426 ---------- 

427 element : `DatabaseDimensionElement` 

428 Dimension element whose overlaps are to be managed. 

429 

430 Returns 

431 ------- 

432 tableSpec : `ddl.TableSpec` 

433 Table specification. 

434 """ 

435 assert element.spatial is not None 

436 tableSpec = ddl.TableSpec( 

437 fields=[ 

438 ddl.FieldSpec( 

439 name="skypix_system", 

440 dtype=sqlalchemy.String, 

441 length=16, 

442 nullable=False, 

443 primaryKey=True, 

444 ), 

445 ddl.FieldSpec( 

446 name="skypix_level", 

447 dtype=sqlalchemy.SmallInteger, 

448 nullable=False, 

449 primaryKey=True, 

450 ), 

451 # (more columns added below) 

452 ], 

453 unique=set(), 

454 indexes={ 

455 # This index has the same fields as the PK, in a different 

456 # order, to facilitate queries that know skypix_index and want 

457 # to find the other element. 

458 ( 

459 "skypix_system", 

460 "skypix_level", 

461 "skypix_index", 

462 ) 

463 + tuple(element.graph.required.names), 

464 }, 

465 foreignKeys=[ 

466 # Foreign key to summary table. This makes sure we don't 

467 # materialize any overlaps without remembering that we've done 

468 # so in the summary table, though it can't prevent the converse 

469 # of adding a summary row without adding overlap row (either of 

470 # those is a logic bug, of course, but we want to be defensive 

471 # about those). Using ON DELETE CASCADE, it'd be very easy to 

472 # implement "disabling" an overlap materialization, because we 

473 # can just delete the summary row. 

474 # Note that the governor dimension column is added below, in 

475 # the call to addDimensionForeignKey. 

476 ddl.ForeignKeySpec( 

477 cls._SUMMARY_TABLE_NAME_SPEC.format(element=element), 

478 source=("skypix_system", "skypix_level", element.spatial.governor.name), 

479 target=("skypix_system", "skypix_level", element.spatial.governor.name), 

480 onDelete="CASCADE", 

481 ), 

482 ], 

483 ) 

484 # Add fields for the standard element this class manages overlaps for. 

485 # This is guaranteed to add a column for the governor dimension, 

486 # because that's a required dependency of element. 

487 for dimension in element.required: 

488 addDimensionForeignKey(tableSpec, dimension, primaryKey=True) 

489 # Add field for the actual skypix index. We do this later because I 

490 # think we care (at least a bit) about the order in which the primary 

491 # key is defined, in that we want a non-summary column like this one 

492 # to appear after the governor dimension column. 

493 tableSpec.fields.add( 

494 ddl.FieldSpec( 

495 name="skypix_index", 

496 dtype=sqlalchemy.BigInteger, 

497 nullable=False, 

498 primaryKey=True, 

499 ) 

500 ) 

501 return tableSpec 

502 

503 def enable( 

504 self, 

505 storage: TableDimensionRecordStorage, 

506 skypix: SkyPixDimension, 

507 governorValue: str, 

508 ) -> None: 

509 """Enable materialization of overlaps between a skypix dimension 

510 and the records of ``self.element`` with a particular governor value. 

511 

512 Parameters 

513 ---------- 

514 storage : `TableDimensionRecordStorage` 

515 Storage object for the records of ``self.element``. 

516 skypix : `SkyPixDimension` 

517 The skypix dimension (system and level) for which overlaps should 

518 be materialized. 

519 governorValue : `str` 

520 Value of this element's governor dimension for which overlaps 

521 should be materialized. For example, if ``self.element`` is 

522 ``visit``, this is an instrument name; if ``self.element`` is 

523 ``patch``, this is a skymap name. 

524 

525 Notes 

526 ----- 

527 If there are existing rows for the given ``governorValue``, overlap 

528 rows for them will be immediately computed and inserted. At present, 

529 that never happens, because we only enable overlaps with 

530 `DimensionUniverse.commonSkyPix`, and that happens immediately after 

531 each governor row is inserted (and there can't be any patch rows, 

532 for example, until after the corresponding skymap row is inserted). 

533 

534 After calling `enable` for a particular combination, any new records 

535 for ``self.element`` that are inserted will automatically be 

536 accompanied by overlap records (via calls to `insert` made 

537 by `TableDimensionRecordStorage` methods). 

538 """ 

539 # Because we're essentially materializing a view in Python, we 

540 # aggressively lock all tables we're reading and writing in order to be 

541 # sure nothing gets out of sync. This may not be the most efficient 

542 # approach possible, but we'll focus on correct before we focus on 

543 # fast, and enabling a new overlap combination should be a very rare 

544 # operation anyway, and never one we do in parallel. 

545 with self._db.transaction( 

546 lock=[self._governor.table, storage._table, self._summaryTable, self._overlapTable] 

547 ): 

548 result, inserted = self._db.sync( 

549 self._summaryTable, 

550 keys={ 

551 "skypix_system": skypix.system.name, 

552 "skypix_level": skypix.level, 

553 self._governor.element.name: governorValue, 

554 }, 

555 ) 

556 if inserted: 

557 _LOG.debug( 

558 "Precomputing initial overlaps for %s vs %s for %s=%s", 

559 skypix.name, 

560 self.element.name, 

561 self._governor.element.name, 

562 governorValue, 

563 ) 

564 self._fill(storage=storage, skypix=skypix, governorValue=governorValue) 

565 else: 

566 _LOG.debug( 

567 "Overlaps already precomputed for %s vs %s for %s=%s", 

568 skypix.name, 

569 self.element.name, 

570 self._governor.element.name, 

571 governorValue, 

572 ) 

573 

574 def _fill( 

575 self, 

576 *, 

577 storage: TableDimensionRecordStorage, 

578 skypix: SkyPixDimension, 

579 governorValue: str, 

580 ) -> None: 

581 """Insert overlap records for a newly-enabled combination of skypix 

582 dimension and governor value. 

583 

584 This method should only be called by `enable`. 

585 

586 Parameters 

587 ---------- 

588 storage : `TableDimensionRecordStorage` 

589 Storage object for the records of ``self.element``. 

590 skypix : `SkyPixDimension` 

591 The skypix dimension (system and level) for which overlaps should 

592 be materialized. 

593 governorValue : `str` 

594 Value of this element's governor dimension for which overlaps 

595 should be materialized. For example, if ``self.element`` is 

596 ``visit``, this is an instrument name; if ``self.element`` is 

597 ``patch``, this is a skymap name. 

598 """ 

599 overlapRecords: list[dict] = [] 

600 # `DimensionRecordStorage.fetch` as defined by the ABC expects to be 

601 # given iterables of data IDs that correspond to that element's graph 

602 # (e.g. {instrument, visit, detector}), not just some subset of it 

603 # (e.g. {instrument}). But we know the implementation of `fetch` for 

604 # `TableDimensionRecordStorage will use this iterable to do exactly 

605 # what we want. 

606 governorDataId = DataCoordinate.standardize( 

607 {self._governor.element.name: governorValue}, graph=self._governor.element.graph 

608 ) 

609 for record in storage.fetch(DataCoordinateIterable.fromScalar(governorDataId)): 609 ↛ 610line 609 didn't jump to line 610, because the loop on line 609 never started

610 if record.region is None: 

611 continue 

612 baseOverlapRecord = record.dataId.byName() 

613 baseOverlapRecord["skypix_system"] = skypix.system.name 

614 baseOverlapRecord["skypix_level"] = skypix.level 

615 for begin, end in skypix.pixelization.envelope(record.region): 

616 overlapRecords.extend( 

617 dict(baseOverlapRecord, skypix_index=index) for index in range(begin, end) 

618 ) 

619 _LOG.debug( 

620 "Inserting %d initial overlap rows for %s vs %s for %s=%r", 

621 len(overlapRecords), 

622 skypix.name, 

623 self.element.name, 

624 self._governor.element.name, 

625 governorValue, 

626 ) 

627 self._db.insert(self._overlapTable, *overlapRecords) 

628 

629 def insert( 

630 self, records: Sequence[DimensionRecord], replace: bool = False, skip_existing: bool = False 

631 ) -> None: 

632 """Insert overlaps for a sequence of ``self.element`` records that 

633 have just been inserted. 

634 

635 This must be called by any method that inserts records for that 

636 element (i.e. `TableDimensionRecordStorage.insert` and 

637 `TableDimensionRecordStorage.sync`), within the same transaction. 

638 

639 Parameters 

640 ---------- 

641 records : `Sequence` [ `DimensionRecord` ] 

642 Records for ``self.element``. Records with `None` regions are 

643 ignored. 

644 replace : `bool`, optional 

645 If `True` (`False` is default) one or more of the given records may 

646 already exist and is being updated, so we need to delete any 

647 existing overlap records first. 

648 skip_existing : `bool`, optional 

649 If `True` (`False` is default), skip insertion if a record with 

650 the same primary key values already exists. 

651 """ 

652 # Group records by family.governor value. 

653 grouped: dict[str, list[DimensionRecord]] = defaultdict(list) 

654 for record in records: 

655 grouped[getattr(record, self._governor.element.name)].append(record) 

656 _LOG.debug( 

657 "Precomputing new skypix overlaps for %s where %s in %s.", 

658 self.element.name, 

659 self._governor.element.name, 

660 grouped.keys(), 

661 ) 

662 # Make sure the set of combinations to materialize does not change 

663 # while we are materializing the ones we have, by locking the summary 

664 # table. Because we aren't planning to write to the summary table, 

665 # this could just be a SHARED lock instead of an EXCLUSIVE one, but 

666 # there's no API for that right now. 

667 with self._db.transaction(lock=[self._summaryTable]): 

668 # Query for the skypix dimensions to be associated with each 

669 # governor value. 

670 gvCol = self._summaryTable.columns[self._governor.element.name] 

671 sysCol = self._summaryTable.columns.skypix_system 

672 lvlCol = self._summaryTable.columns.skypix_level 

673 query = ( 

674 sqlalchemy.sql.select( 

675 gvCol, 

676 sysCol, 

677 lvlCol, 

678 ) 

679 .select_from(self._summaryTable) 

680 .where(gvCol.in_(list(grouped.keys()))) 

681 ) 

682 # Group results by governor value, then skypix system. 

683 skypix: dict[str, NamedKeyDict[SkyPixSystem, list[int]]] = { 

684 gv: NamedKeyDict() for gv in grouped.keys() 

685 } 

686 for summaryRow in self._db.query(query).mappings(): 

687 system = self.element.universe.skypix[summaryRow[sysCol]] 

688 skypix[summaryRow[gvCol]].setdefault(system, []).append(summaryRow[lvlCol]) 

689 if replace: 

690 # Construct constraints for a DELETE query as a list of dicts. 

691 # We include the skypix_system and skypix_level column values 

692 # explicitly instead of just letting the query search for all 

693 # of those related to the given records, because they are the 

694 # first columns in the primary key, and hence searching with 

695 # them will be way faster (and we don't want to add a new index 

696 # just for this operation). 

697 to_delete: list[dict[str, Any]] = [] 

698 for gv, skypix_systems in skypix.items(): 

699 for system, skypix_levels in skypix_systems.items(): 

700 to_delete.extend( 

701 {"skypix_system": system.name, "skypix_level": level, **record.dataId.byName()} 

702 for record, level in itertools.product(grouped[gv], skypix_levels) 

703 ) 

704 self._db.delete( 

705 self._overlapTable, 

706 ["skypix_system", "skypix_level"] + list(self.element.graph.required.names), 

707 *to_delete, 

708 ) 

709 overlapRecords: list[dict] = [] 

710 # Compute overlaps for one governor value at a time, but gather 

711 # them all up for one insert. 

712 for gv, group in grouped.items(): 

713 overlapRecords.extend(self._compute(group, skypix[gv], gv)) 

714 _LOG.debug( 

715 "Inserting %d new skypix overlap rows for %s where %s in %s.", 

716 len(overlapRecords), 

717 self.element.name, 

718 self._governor.element.name, 

719 grouped.keys(), 

720 ) 

721 if skip_existing: 

722 self._db.ensure(self._overlapTable, *overlapRecords, primary_key_only=True) 

723 else: 

724 self._db.insert(self._overlapTable, *overlapRecords) 

725 

726 def _compute( 

727 self, 

728 records: Sequence[DimensionRecord], 

729 skypix: NamedKeyDict[SkyPixSystem, list[int]], 

730 governorValue: str, 

731 ) -> Iterator[dict]: 

732 """Compute all overlap rows for a particular governor dimension value 

733 and all of the skypix dimensions for which its overlaps are enabled. 

734 

735 This method should only be called by `insert`. 

736 

737 Parameters 

738 ---------- 

739 records : `Sequence` [ `DimensionRecord` ] 

740 Records for ``self.element``. Records with `None` regions are 

741 ignored. All must have the governor value given. 

742 skypix : `NamedKeyDict` [ `SkyPixSystem`, `list` [ `int` ] ] 

743 Mapping containing all skypix systems and levels for which overlaps 

744 should be computed, grouped by `SkyPixSystem`. 

745 governorValue : `str` 

746 Value of this element's governor dimension for which overlaps 

747 should be computed. For example, if ``self.element`` is ``visit``, 

748 this is an instrument name; if ``self.element`` is ``patch``, this 

749 is a skymap name. 

750 

751 Yields 

752 ------ 

753 row : `dict` 

754 Dictionary representing an overlap row. 

755 """ 

756 # Process input records one at time, computing all skypix indices for 

757 # each. 

758 for record in records: 

759 if record.region is None: 

760 continue 

761 assert getattr(record, self._governor.element.name) == governorValue 

762 for system, levels in skypix.items(): 

763 if not levels: 763 ↛ 764line 763 didn't jump to line 764, because the condition on line 763 was never true

764 continue 

765 baseOverlapRecord = record.dataId.byName() 

766 baseOverlapRecord["skypix_system"] = system.name 

767 levels.sort(reverse=True) 

768 # Start with the first level, which is the finest-grained one. 

769 # Compute skypix envelope indices directly for that. 

770 indices: dict[int, set[int]] = {levels[0]: set()} 

771 for begin, end in system[levels[0]].pixelization.envelope(record.region): 

772 indices[levels[0]].update(range(begin, end)) 

773 # Divide those indices by powers of 4 (and remove duplicates) 

774 # work our way up to the last (coarsest) level. 

775 for lastLevel, nextLevel in zip(levels[:-1], levels[1:]): 775 ↛ 776line 775 didn't jump to line 776, because the loop on line 775 never started

776 factor = 4 ** (lastLevel - nextLevel) 

777 indices[nextLevel] = {index // factor for index in indices[lastLevel]} 

778 for level in levels: 

779 yield from ( 

780 { 

781 "skypix_level": level, 

782 "skypix_index": index, 

783 **baseOverlapRecord, # type: ignore 

784 } 

785 for index in indices[level] 

786 ) 

787 

788 def select( 

789 self, 

790 skypix: SkyPixDimension, 

791 governorValues: Set[str] | EllipsisType, 

792 ) -> sqlalchemy.sql.FromClause: 

793 """Construct a subquery expression containing overlaps between the 

794 given skypix dimension and governor values. 

795 

796 Parameters 

797 ---------- 

798 skypix : `SkyPixDimension` 

799 The skypix dimension (system and level) for which overlaps should 

800 be materialized. 

801 governorValues : `~collections.abc.Set` [ `str` ] 

802 Values of this element's governor dimension for which overlaps 

803 should be returned. For example, if ``self.element`` is ``visit``, 

804 this is a set of instrument names; if ``self.element`` is 

805 ``patch``, this is a set of skymap names. If ``...`` all values 

806 in the database are used (`GovernorDimensionRecordStorage.values`). 

807 

808 Returns 

809 ------- 

810 subquery : `sqlalchemy.sql.FromClause` 

811 A SELECT query with an alias, intended for use as a subquery, with 

812 columns equal to ``self.element.required.names`` + ``skypix.name``. 

813 """ 

814 if skypix != self.element.universe.commonSkyPix: 814 ↛ 819line 814 didn't jump to line 819

815 # We guarantee elsewhere that we always materialize all overlaps 

816 # vs. commonSkyPix, but for everything else, we need to check that 

817 # we have materialized this combination of governor values and 

818 # skypix. 

819 summaryWhere = [ 

820 self._summaryTable.columns.skypix_system == skypix.system.name, 

821 self._summaryTable.columns.skypix_level == skypix.level, 

822 ] 

823 gvCol = self._summaryTable.columns[self._governor.element.name] 

824 if governorValues is not Ellipsis: 

825 summaryWhere.append(gvCol.in_(list(governorValues))) 

826 summaryQuery = ( 

827 sqlalchemy.sql.select(gvCol) 

828 .select_from(self._summaryTable) 

829 .where(sqlalchemy.sql.and_(*summaryWhere)) 

830 ) 

831 materializedGovernorValues = {row._mapping[gvCol] for row in self._db.query(summaryQuery)} 

832 if governorValues is Ellipsis: 

833 missingGovernorValues = self._governor.values - materializedGovernorValues 

834 else: 

835 missingGovernorValues = governorValues - materializedGovernorValues 

836 if missingGovernorValues: 

837 raise RuntimeError( 

838 f"Query requires an overlap join between {skypix.name} and {self.element.name} " 

839 f"(for {self._governor.element.name} in {missingGovernorValues}), but these " 

840 f"have not been materialized." 

841 ) 

842 columns = [self._overlapTable.columns.skypix_index.label(skypix.name)] 

843 columns.extend(self._overlapTable.columns[name] for name in self.element.graph.required.names) 

844 overlapWhere = [ 

845 self._overlapTable.columns.skypix_system == skypix.system.name, 

846 self._overlapTable.columns.skypix_level == skypix.level, 

847 ] 

848 if governorValues is not Ellipsis: 848 ↛ 849line 848 didn't jump to line 849, because the condition on line 848 was never true

849 overlapWhere.append( 

850 self._overlapTable.columns[self._governor.element.name].in_(list(governorValues)) 

851 ) 

852 overlapQuery = ( 

853 sqlalchemy.sql.select(*columns) 

854 .select_from(self._overlapTable) 

855 .where(sqlalchemy.sql.and_(*overlapWhere)) 

856 ) 

857 return overlapQuery.alias(f"{self.element.name}_{skypix.name}_overlap") 

858 

859 def digestTables(self) -> Iterable[sqlalchemy.schema.Table]: 

860 """Return tables used for schema digest. 

861 

862 Returns 

863 ------- 

864 tables : `Iterable` [ `sqlalchemy.schema.Table` ] 

865 Possibly empty set of tables for schema digest calculations. 

866 """ 

867 return [self._summaryTable, self._overlapTable]