Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ["TableDimensionRecordStorage"] 

24 

25from collections import defaultdict 

26import logging 

27from typing import ( 

28 AbstractSet, 

29 Any, 

30 Dict, 

31 Iterable, 

32 Iterator, 

33 List, 

34 Mapping, 

35 Optional, 

36 Sequence, 

37 Set, 

38 Union, 

39) 

40 

41import sqlalchemy 

42 

43from ...core import ( 

44 addDimensionForeignKey, 

45 DatabaseDimensionElement, 

46 DataCoordinate, 

47 DataCoordinateIterable, 

48 ddl, 

49 DimensionElement, 

50 DimensionRecord, 

51 GovernorDimension, 

52 NamedKeyDict, 

53 NamedKeyMapping, 

54 NamedValueSet, 

55 SimpleQuery, 

56 SkyPixDimension, 

57 SkyPixSystem, 

58 SpatialRegionDatabaseRepresentation, 

59 TimespanDatabaseRepresentation, 

60) 

61from ..interfaces import ( 

62 Database, 

63 DatabaseDimensionOverlapStorage, 

64 DatabaseDimensionRecordStorage, 

65 GovernorDimensionRecordStorage, 

66 StaticTablesContext, 

67) 

68from ..queries import QueryBuilder 

69from ..wildcards import Ellipsis, EllipsisType 

70 

71 

72_LOG = logging.getLogger(__name__) 

73 

74 

75MAX_FETCH_CHUNK = 1000 

76"""Maximum number of data IDs we fetch records at a time. 

77 

78Barring something database-engine-specific, this sets the size of the actual 

79SQL query, not just the number of result rows, because the only way to query 

80for multiple data IDs in a single SELECT query via SQLAlchemy is to have an OR 

81term in the WHERE clause for each one. 

82""" 

83 

84 

85class TableDimensionRecordStorage(DatabaseDimensionRecordStorage): 

86 """A record storage implementation uses a regular database table. 

87 

88 Parameters 

89 ---------- 

90 db : `Database` 

91 Interface to the database engine and namespace that will hold these 

92 dimension records. 

93 element : `DatabaseDimensionElement` 

94 The element whose records this storage will manage. 

95 table : `sqlalchemy.schema.Table` 

96 The logical table for the element. 

97 skyPixOverlap : `_SkyPixOverlapStorage`, optional 

98 Object that manages the tables that hold materialized spatial overlap 

99 joins to skypix dimensions. Should be `None` if (and only if) 

100 ``element.spatial is None``. 

101 """ 

102 def __init__(self, db: Database, element: DatabaseDimensionElement, *, table: sqlalchemy.schema.Table, 

103 skyPixOverlap: Optional[_SkyPixOverlapStorage] = None): 

104 self._db = db 

105 self._table = table 

106 self._element = element 

107 self._fetchColumns: Dict[str, sqlalchemy.sql.ColumnElement] = { 

108 dimension.name: self._table.columns[name] 

109 for dimension, name in zip(self._element.dimensions, 

110 self._element.RecordClass.fields.dimensions.names) 

111 } 

112 self._skyPixOverlap = skyPixOverlap 

113 self._otherOverlaps: List[DatabaseDimensionOverlapStorage] = [] 

114 

115 @classmethod 

116 def initialize( 

117 cls, 

118 db: Database, 

119 element: DatabaseDimensionElement, *, 

120 context: Optional[StaticTablesContext] = None, 

121 config: Mapping[str, Any], 

122 governors: NamedKeyMapping[GovernorDimension, GovernorDimensionRecordStorage], 

123 ) -> DatabaseDimensionRecordStorage: 

124 # Docstring inherited from DatabaseDimensionRecordStorage. 

125 spec = element.RecordClass.fields.makeTableSpec( 

126 RegionReprClass=db.getSpatialRegionRepresentation(), 

127 TimespanReprClass=db.getTimespanRepresentation(), 

128 ) 

129 if context is not None: 129 ↛ 132line 129 didn't jump to line 132, because the condition on line 129 was never false

130 table = context.addTable(element.name, spec) 

131 else: 

132 table = db.ensureTableExists(element.name, spec) 

133 skyPixOverlap: Optional[_SkyPixOverlapStorage] 

134 if element.spatial is not None: 

135 governor = governors[element.spatial.governor] 

136 skyPixOverlap = _SkyPixOverlapStorage.initialize( 

137 db, 

138 element, 

139 context=context, 

140 governor=governor, 

141 ) 

142 result = cls(db, element, table=table, skyPixOverlap=skyPixOverlap) 

143 

144 # Whenever anyone inserts a new governor dimension value, we want 

145 # to enable overlaps for that value between this element and 

146 # commonSkyPix. 

147 def callback(record: DimensionRecord) -> None: 

148 skyPixOverlap.enable( # type: ignore 

149 result, 

150 element.universe.commonSkyPix, 

151 getattr(record, element.spatial.governor.primaryKey.name), # type: ignore 

152 ) 

153 

154 governor.registerInsertionListener(callback) 

155 return result 

156 else: 

157 return cls(db, element, table=table) 

158 

159 @property 

160 def element(self) -> DatabaseDimensionElement: 

161 # Docstring inherited from DimensionRecordStorage.element. 

162 return self._element 

163 

164 def clearCaches(self) -> None: 

165 # Docstring inherited from DimensionRecordStorage.clearCaches. 

166 pass 

167 

168 def join( 

169 self, 

170 builder: QueryBuilder, *, 

171 regions: Optional[NamedKeyDict[DimensionElement, SpatialRegionDatabaseRepresentation]] = None, 

172 timespans: Optional[NamedKeyDict[DimensionElement, TimespanDatabaseRepresentation]] = None, 

173 ) -> None: 

174 # Docstring inherited from DimensionRecordStorage. 

175 if regions is not None: 

176 dimensions = NamedValueSet(self.element.required) 

177 dimensions.add(self.element.universe.commonSkyPix) 

178 assert self._skyPixOverlap is not None 

179 builder.joinTable( 

180 self._skyPixOverlap.select(self.element.universe.commonSkyPix, Ellipsis), 

181 dimensions, 

182 ) 

183 regionsInTable = self._db.getSpatialRegionRepresentation().fromSelectable(self._table) 

184 regions[self.element] = regionsInTable 

185 joinOn = builder.startJoin(self._table, self.element.dimensions, 

186 self.element.RecordClass.fields.dimensions.names) 

187 if timespans is not None: 

188 timespanInTable = self._db.getTimespanRepresentation().fromSelectable(self._table) 

189 for timespanInQuery in timespans.values(): 189 ↛ 190line 189 didn't jump to line 190, because the loop on line 189 never started

190 joinOn.append(timespanInQuery.overlaps(timespanInTable)) 

191 timespans[self.element] = timespanInTable 

192 builder.finishJoin(self._table, joinOn) 

193 return self._table 

194 

195 def fetch(self, dataIds: DataCoordinateIterable) -> Iterable[DimensionRecord]: 

196 # Docstring inherited from DimensionRecordStorage.fetch. 

197 RecordClass = self.element.RecordClass 

198 query = SimpleQuery() 

199 query.columns.extend(self._table.columns[name] for name in RecordClass.fields.standard.names) 

200 if self.element.spatial is not None: 

201 query.columns.append(self._table.columns["region"]) 

202 if self.element.temporal is not None: 

203 TimespanReprClass = self._db.getTimespanRepresentation() 

204 query.columns.extend(self._table.columns[name] for name in TimespanReprClass.getFieldNames()) 

205 query.join(self._table) 

206 dataIds.constrain(query, lambda name: self._fetchColumns[name]) 

207 for row in self._db.query(query.combine()): 

208 values = dict(row) 

209 if self.element.temporal is not None: 

210 values[TimespanDatabaseRepresentation.NAME] = TimespanReprClass.extract(values) 

211 yield RecordClass(**values) 

212 

213 def insert(self, *records: DimensionRecord) -> None: 

214 # Docstring inherited from DimensionRecordStorage.insert. 

215 elementRows = [record.toDict() for record in records] 

216 if self.element.temporal is not None: 

217 TimespanReprClass = self._db.getTimespanRepresentation() 

218 for row in elementRows: 

219 timespan = row.pop(TimespanDatabaseRepresentation.NAME) 

220 TimespanReprClass.update(timespan, result=row) 

221 with self._db.transaction(): 

222 self._db.insert(self._table, *elementRows) 

223 if self._skyPixOverlap is not None: 

224 self._skyPixOverlap.insert(records) 

225 

226 def sync(self, record: DimensionRecord) -> bool: 

227 # Docstring inherited from DimensionRecordStorage.sync. 

228 compared = record.toDict() 

229 keys = {} 

230 for name in record.fields.required.names: 

231 keys[name] = compared.pop(name) 

232 if self.element.temporal is not None: 232 ↛ 233line 232 didn't jump to line 233, because the condition on line 232 was never true

233 TimespanReprClass = self._db.getTimespanRepresentation() 

234 timespan = compared.pop(TimespanDatabaseRepresentation.NAME) 

235 TimespanReprClass.update(timespan, result=compared) 

236 with self._db.transaction(): 

237 _, inserted = self._db.sync( 

238 self._table, 

239 keys=keys, 

240 compared=compared, 

241 ) 

242 if inserted and self._skyPixOverlap is not None: 242 ↛ 243line 242 didn't jump to line 243, because the condition on line 242 was never true

243 self._skyPixOverlap.insert([record]) 

244 return inserted 

245 

246 def digestTables(self) -> Iterable[sqlalchemy.schema.Table]: 

247 # Docstring inherited from DimensionRecordStorage.digestTables. 

248 result = [self._table] 

249 if self._skyPixOverlap is not None: 

250 result.extend(self._skyPixOverlap.digestTables()) 

251 return result 

252 

253 def connect(self, overlaps: DatabaseDimensionOverlapStorage) -> None: 

254 # Docstring inherited from DatabaseDimensionRecordStorage. 

255 self._otherOverlaps.append(overlaps) 

256 

257 

258class _SkyPixOverlapStorage: 

259 """A helper object for `TableDimensionRecordStorage` that manages its 

260 materialized overlaps with skypix dimensions. 

261 

262 New instances should be constructed by calling `initialize`, not by calling 

263 the constructor directly. 

264 

265 Parameters 

266 ---------- 

267 db : `Database` 

268 Interface to the underlying database engine and namespace. 

269 element : `DatabaseDimensionElement` 

270 Dimension element whose overlaps are to be managed. 

271 summaryTable : `sqlalchemy.schema.Table` 

272 Table that records which combinations of skypix dimensions and 

273 governor dimension values have materialized overlap rows. 

274 overlapTable : `sqlalchemy.schema.Table` 

275 Table containing the actual materialized overlap rows. 

276 governor : `GovernorDimensionRecordStorage` 

277 Record storage backend for this element's governor dimension. 

278 

279 Notes 

280 ----- 

281 This class (and most importantly, the tables it relies on) can in principle 

282 manage overlaps between with any skypix dimension, but at present it is 

283 only being used to manage relationships with the special ``commonSkyPix`` 

284 dimension, because that's all the query system uses. Eventually, we expect 

285 to require users to explicitly materialize all relationships they will 

286 want to use in queries. 

287 

288 Other possible future improvements include: 

289 

290 - allowing finer-grained skypix dimensions to provide overlap rows for 

291 coarser ones, by dividing indices by powers of 4 (and possibly doing 

292 ``SELECT DISTINCT`` in the subquery to remove duplicates); 

293 

294 - allowing finer-grained database elements (e.g. patch) to provide overlap 

295 rows for coarser ones (e.g. tract), by ignoring irrelevant columns 

296 (e.g. the patch IDs) in the subquery (again, possible with 

297 ``SELECT DISTINCT``). 

298 

299 But there's no point to doing any of that until the query system can 

300 figure out how best to ask for overlap rows when an exact match isn't 

301 available. 

302 """ 

303 def __init__( 

304 self, 

305 db: Database, 

306 element: DatabaseDimensionElement, 

307 summaryTable: sqlalchemy.schema.Table, 

308 overlapTable: sqlalchemy.schema.Table, 

309 governor: GovernorDimensionRecordStorage, 

310 ): 

311 self._db = db 

312 self.element = element 

313 assert element.spatial is not None 

314 self._summaryTable = summaryTable 

315 self._overlapTable = overlapTable 

316 self._governor = governor 

317 

318 @classmethod 

319 def initialize( 

320 cls, 

321 db: Database, 

322 element: DatabaseDimensionElement, *, 

323 context: Optional[StaticTablesContext], 

324 governor: GovernorDimensionRecordStorage, 

325 ) -> _SkyPixOverlapStorage: 

326 """Construct a new instance, creating tables as needed. 

327 

328 Parameters 

329 ---------- 

330 db : `Database` 

331 Interface to the underlying database engine and namespace. 

332 element : `DatabaseDimensionElement` 

333 Dimension element whose overlaps are to be managed. 

334 context : `StaticTablesContext`, optional 

335 If provided, an object to use to create any new tables. If not 

336 provided, ``db.ensureTableExists`` should be used instead. 

337 governor : `GovernorDimensionRecordStorage` 

338 Record storage backend for this element's governor dimension. 

339 """ 

340 if context is not None: 340 ↛ 343line 340 didn't jump to line 343, because the condition on line 340 was never false

341 op = context.addTable 

342 else: 

343 op = db.ensureTableExists 

344 summaryTable = op( 

345 cls._SUMMARY_TABLE_NAME_SPEC.format(element=element), 

346 cls._makeSummaryTableSpec(element), 

347 ) 

348 overlapTable = op( 

349 cls._OVERLAP_TABLE_NAME_SPEC.format(element=element), 

350 cls._makeOverlapTableSpec(element), 

351 ) 

352 return _SkyPixOverlapStorage(db, element, summaryTable=summaryTable, overlapTable=overlapTable, 

353 governor=governor) 

354 

355 _SUMMARY_TABLE_NAME_SPEC = "{element.name}_skypix_overlap_summary" 

356 

357 @classmethod 

358 def _makeSummaryTableSpec(cls, element: DatabaseDimensionElement) -> ddl.TableSpec: 

359 """Create a specification for the table that records which combinations 

360 of skypix dimension and governor value have materialized overlaps. 

361 

362 Parameters 

363 ---------- 

364 element : `DatabaseDimensionElement` 

365 Dimension element whose overlaps are to be managed. 

366 

367 Returns 

368 ------- 

369 tableSpec : `ddl.TableSpec` 

370 Table specification. 

371 """ 

372 assert element.spatial is not None 

373 tableSpec = ddl.TableSpec( 

374 fields=[ 

375 ddl.FieldSpec( 

376 name="skypix_system", 

377 dtype=sqlalchemy.String, 

378 length=16, 

379 nullable=False, 

380 primaryKey=True, 

381 ), 

382 ddl.FieldSpec( 

383 name="skypix_level", 

384 dtype=sqlalchemy.SmallInteger, 

385 nullable=False, 

386 primaryKey=True, 

387 ), 

388 ] 

389 ) 

390 addDimensionForeignKey(tableSpec, element.spatial.governor, primaryKey=True) 

391 return tableSpec 

392 

393 _OVERLAP_TABLE_NAME_SPEC = "{element.name}_skypix_overlap" 

394 

395 @classmethod 

396 def _makeOverlapTableSpec(cls, element: DatabaseDimensionElement) -> ddl.TableSpec: 

397 """Create a specification for the table that holds materialized 

398 overlap rows. 

399 

400 Parameters 

401 ---------- 

402 element : `DatabaseDimensionElement` 

403 Dimension element whose overlaps are to be managed. 

404 

405 Returns 

406 ------- 

407 tableSpec : `ddl.TableSpec` 

408 Table specification. 

409 """ 

410 assert element.spatial is not None 

411 tableSpec = ddl.TableSpec( 

412 fields=[ 

413 ddl.FieldSpec( 

414 name="skypix_system", 

415 dtype=sqlalchemy.String, 

416 length=16, 

417 nullable=False, 

418 primaryKey=True, 

419 ), 

420 ddl.FieldSpec( 

421 name="skypix_level", 

422 dtype=sqlalchemy.SmallInteger, 

423 nullable=False, 

424 primaryKey=True, 

425 ), 

426 # (more columns added below) 

427 ], 

428 unique=set(), 

429 foreignKeys=[ 

430 # Foreign key to summary table. This makes sure we don't 

431 # materialize any overlaps without remembering that we've done 

432 # so in the summary table, though it can't prevent the converse 

433 # of adding a summary row without adding overlap row (either of 

434 # those is a logic bug, of course, but we want to be defensive 

435 # about those). Using ON DELETE CASCADE, it'd be very easy to 

436 # implement "disabling" an overlap materialization, because we 

437 # can just delete the summary row. 

438 # Note that the governor dimension column is added below, in 

439 # the call to addDimensionForeignKey. 

440 ddl.ForeignKeySpec( 

441 cls._SUMMARY_TABLE_NAME_SPEC.format(element=element), 

442 source=("skypix_system", "skypix_level", element.spatial.governor.name), 

443 target=("skypix_system", "skypix_level", element.spatial.governor.name), 

444 onDelete="CASCADE", 

445 ), 

446 ], 

447 ) 

448 # Add fields for the standard element this class manages overlaps for. 

449 # This is guaranteed to add a column for the governor dimension, 

450 # because that's a required dependency of element. 

451 for dimension in element.required: 

452 addDimensionForeignKey(tableSpec, dimension, primaryKey=True) 

453 # Add field for the actual skypix index. We do this later because I 

454 # think we care (at least a bit) about the order in which the primary 

455 # key is defined, in that we want a non-summary column like this one 

456 # to appear after the governor dimension column. 

457 tableSpec.fields.add( 

458 ddl.FieldSpec( 

459 name="skypix_index", 

460 dtype=sqlalchemy.BigInteger, 

461 nullable=False, 

462 primaryKey=True, 

463 ) 

464 ) 

465 return tableSpec 

466 

467 def enable( 

468 self, 

469 storage: TableDimensionRecordStorage, 

470 skypix: SkyPixDimension, 

471 governorValue: str, 

472 ) -> None: 

473 """Enable materialization of overlaps between a skypix dimension 

474 and the records of ``self.element`` with a particular governor value. 

475 

476 Parameters 

477 ---------- 

478 storage : `TableDimensionRecordStorage` 

479 Storage object for the records of ``self.element``. 

480 skypix : `SkyPixDimension` 

481 The skypix dimension (system and level) for which overlaps should 

482 be materialized. 

483 governorValue : `str` 

484 Value of this element's governor dimension for which overlaps 

485 should be materialized. For example, if ``self.element`` is 

486 ``visit``, this is an instrument name; if ``self.element`` is 

487 ``patch``, this is a skymap name. 

488 

489 Notes 

490 ----- 

491 If there are existing rows for the given ``governorValue``, overlap 

492 rows for them will be immediately computed and inserted. At present, 

493 that never happens, because we only enable overlaps with 

494 `DimensionUniverse.commonSkyPix`, and that happens immediately after 

495 each governor row is inserted (and there can't be any patch rows, 

496 for example, until after the corresponding skymap row is inserted). 

497 

498 After calling `enable` for a particular combination, any new records 

499 for ``self.element`` that are inserted will automatically be 

500 accompanied by overlap records (via calls to `insert` made 

501 by `TableDimensionRecordStorage` methods). 

502 """ 

503 # Because we're essentially materializing a view in Python, we 

504 # aggressively lock all tables we're reading and writing in order to be 

505 # sure nothing gets out of sync. This may not be the most efficient 

506 # approach possible, but we'll focus on correct before we focus on 

507 # fast, and enabling a new overlap combination should be a very rare 

508 # operation anyway, and never one we do in parallel. 

509 with self._db.transaction(lock=[self._governor.table, storage._table, 

510 self._summaryTable, self._overlapTable]): 

511 result, inserted = self._db.sync( 

512 self._summaryTable, 

513 keys={ 

514 "skypix_system": skypix.system.name, 

515 "skypix_level": skypix.level, 

516 self._governor.element.name: governorValue, 

517 }, 

518 ) 

519 if inserted: 519 ↛ 529line 519 didn't jump to line 529, because the condition on line 519 was never false

520 _LOG.debug( 

521 "Precomputing initial overlaps for %s vs %s for %s=%s", 

522 skypix.name, 

523 self.element.name, 

524 self._governor.element.name, 

525 governorValue 

526 ) 

527 self._fill(storage=storage, skypix=skypix, governorValue=governorValue) 

528 else: 

529 _LOG.debug( 

530 "Overlaps already precomputed for %s vs %s for %s=%s", 

531 skypix.name, 

532 self.element.name, 

533 self._governor.element.name, 

534 governorValue 

535 ) 

536 

537 def _fill( 

538 self, *, 

539 storage: TableDimensionRecordStorage, 

540 skypix: SkyPixDimension, 

541 governorValue: str, 

542 ) -> None: 

543 """Insert overlap records for a newly-enabled combination of skypix 

544 dimension and governor value. 

545 

546 This method should only be called by `enable`. 

547 

548 Parameters 

549 ---------- 

550 storage : `TableDimensionRecordStorage` 

551 Storage object for the records of ``self.element``. 

552 skypix : `SkyPixDimension` 

553 The skypix dimension (system and level) for which overlaps should 

554 be materialized. 

555 governorValue : `str` 

556 Value of this element's governor dimension for which overlaps 

557 should be materialized. For example, if ``self.element`` is 

558 ``visit``, this is an instrument name; if ``self.element`` is 

559 ``patch``, this is a skymap name. 

560 """ 

561 overlapRecords: List[dict] = [] 

562 # `DimensionRecordStorage.fetch` as defined by the ABC expects to be 

563 # given iterables of data IDs that correspond to that element's graph 

564 # (e.g. {instrument, visit, detector}), not just some subset of it 

565 # (e.g. {instrument}). But we know the implementation of `fetch` for 

566 # `TableDimensionRecordStorage will use this iterable to do exactly 

567 # what we want. 

568 governorDataId = DataCoordinate.standardize({self._governor.element.name: governorValue}, 

569 graph=self._governor.element.graph) 

570 for record in storage.fetch(DataCoordinateIterable.fromScalar(governorDataId)): 570 ↛ 571line 570 didn't jump to line 571, because the loop on line 570 never started

571 if record.region is None: 

572 continue 

573 baseOverlapRecord = record.dataId.byName() 

574 baseOverlapRecord["skypix_system"] = skypix.system.name 

575 baseOverlapRecord["skypix_level"] = skypix.level 

576 for begin, end in skypix.pixelization.envelope(record.region): 

577 overlapRecords.extend( 

578 dict(baseOverlapRecord, skypix_index=index) for index in range(begin, end) 

579 ) 

580 _LOG.debug( 

581 "Inserting %d initial overlap rows for %s vs %s for %s=%r", 

582 len(overlapRecords), 

583 skypix.name, 

584 self.element.name, 

585 self._governor.element.name, 

586 governorValue, 

587 ) 

588 self._db.insert(self._overlapTable, *overlapRecords) 

589 

590 def insert(self, records: Sequence[DimensionRecord]) -> None: 

591 """Insert overlaps for a sequence of ``self.element`` records that 

592 have just been inserted. 

593 

594 This must be called by any method that inserts records for that 

595 element (i.e. `TableDimensionRecordStorage.insert` and 

596 `TableDimensionRecordStorage.sync`), within the same transaction. 

597 

598 Parameters 

599 ---------- 

600 records : `Sequence` [ `DimensionRecord` ] 

601 Records for ``self.element``. Records with `None` regions are 

602 ignored. 

603 """ 

604 # Group records by family.governor value. 

605 grouped: Dict[str, List[DimensionRecord]] = defaultdict(list) 

606 for record in records: 

607 grouped[getattr(record, self._governor.element.name)].append(record) 

608 _LOG.debug( 

609 "Precomputing new skypix overlaps for %s where %s in %s.", 

610 self.element.name, self._governor.element.name, grouped.keys() 

611 ) 

612 # Make sure the set of combinations to materialize does not change 

613 # while we are materializing the ones we have, by locking the summary 

614 # table. Because we aren't planning to write to the summary table, 

615 # this could just be a SHARED lock instead of an EXCLUSIVE one, but 

616 # there's no API for that right now. 

617 with self._db.transaction(lock=[self._summaryTable]): 

618 # Query for the skypix dimensions to be associated with each 

619 # governor value. 

620 gvCol = self._summaryTable.columns[self._governor.element.name] 

621 sysCol = self._summaryTable.columns.skypix_system 

622 lvlCol = self._summaryTable.columns.skypix_level 

623 query = sqlalchemy.sql.select( 

624 [gvCol, sysCol, lvlCol], 

625 ).select_from( 

626 self._summaryTable 

627 ).where( 

628 gvCol.in_(list(grouped.keys())) 

629 ) 

630 # Group results by governor value, then skypix system. 

631 skypix: Dict[str, NamedKeyDict[SkyPixSystem, List[int]]] = { 

632 gv: NamedKeyDict() for gv in grouped.keys() 

633 } 

634 for summaryRow in self._db.query(query): 

635 system = self.element.universe.skypix[summaryRow[sysCol]] 

636 skypix[summaryRow[gvCol]].setdefault(system, []).append(summaryRow[lvlCol]) 

637 overlapRecords: List[dict] = [] 

638 # Compute overlaps for one governor value at a time, but gather 

639 # them all up for one insert. 

640 for gv, group in grouped.items(): 

641 overlapRecords.extend(self._compute(group, skypix[gv], gv)) 

642 _LOG.debug( 

643 "Inserting %d new skypix overlap rows for %s where %s in %s.", 

644 len(overlapRecords), self.element.name, self._governor.element.name, grouped.keys() 

645 ) 

646 self._db.insert(self._overlapTable, *overlapRecords) 

647 

648 def _compute( 

649 self, 

650 records: Sequence[DimensionRecord], 

651 skypix: NamedKeyDict[SkyPixSystem, List[int]], 

652 governorValue: str, 

653 ) -> Iterator[dict]: 

654 """Compute all overlap rows for a particular governor dimension value 

655 and all of the skypix dimensions for which its overlaps are enabled. 

656 

657 This method should only be called by `insert`. 

658 

659 Parameters 

660 ---------- 

661 records : `Sequence` [ `DimensionRecord` ] 

662 Records for ``self.element``. Records with `None` regions are 

663 ignored. All must have the governor value given. 

664 skypix : `NamedKeyDict` [ `SkyPixSystem`, `list` [ `int` ] ] 

665 Mapping containing all skypix systems and levels for which overlaps 

666 should be computed, grouped by `SkyPixSystem`. 

667 governorValue : `str` 

668 Value of this element's governor dimension for which overlaps 

669 should be computed. For example, if ``self.element`` is ``visit``, 

670 this is an instrument name; if ``self.element`` is ``patch``, this 

671 is a skymap name. 

672 

673 Yields 

674 ------ 

675 row : `dict` 

676 Dictionary representing an overlap row. 

677 """ 

678 # Process input records one at time, computing all skypix indices for 

679 # each. 

680 for record in records: 

681 if record.region is None: 

682 continue 

683 assert getattr(record, self._governor.element.name) == governorValue 

684 for system, levels in skypix.items(): 

685 if not levels: 685 ↛ 686line 685 didn't jump to line 686, because the condition on line 685 was never true

686 continue 

687 baseOverlapRecord = record.dataId.byName() 

688 baseOverlapRecord["skypix_system"] = system.name 

689 levels.sort(reverse=True) 

690 # Start with the first level, which is the finest-grained one. 

691 # Compute skypix envelope indices directly for that. 

692 indices: Dict[int, Set[int]] = {levels[0]: set()} 

693 for begin, end in system[levels[0]].pixelization.envelope(record.region): 

694 indices[levels[0]].update(range(begin, end)) 

695 # Divide those indices by powers of 4 (and remove duplicates) 

696 # work our way up to the last (coarsest) level. 

697 for lastLevel, nextLevel in zip(levels[:-1], levels[1:]): 697 ↛ 698line 697 didn't jump to line 698, because the loop on line 697 never started

698 factor = 4**(lastLevel - nextLevel) 

699 indices[nextLevel] = {index//factor for index in indices[lastLevel]} 

700 for level in levels: 

701 yield from ( 

702 { 

703 "skypix_level": level, 

704 "skypix_index": index, 

705 **baseOverlapRecord, # type: ignore 

706 } for index in indices[level] 

707 ) 

708 

709 def select( 

710 self, 

711 skypix: SkyPixDimension, 

712 governorValues: Union[AbstractSet[str], EllipsisType], 

713 ) -> sqlalchemy.sql.FromClause: 

714 """Construct a subquery expression containing overlaps between the 

715 given skypix dimension and governor values. 

716 

717 Parameters 

718 ---------- 

719 skypix : `SkyPixDimension` 

720 The skypix dimension (system and level) for which overlaps should 

721 be materialized. 

722 governorValues : `str` 

723 Values of this element's governor dimension for which overlaps 

724 should be returned. For example, if ``self.element`` is ``visit``, 

725 this is a set of instrument names; if ``self.element`` is 

726 ``patch``, this is a set of skymap names. If ``...`` all values 

727 in the database are used (`GovernorDimensionRecordStorage.values`). 

728 

729 Returns 

730 ------- 

731 subquery : `sqlalchemy.sql.FromClause` 

732 A SELECT query with an alias, intended for use as a subquery, with 

733 columns equal to ``self.element.required.names`` + ``skypix.name``. 

734 """ 

735 if skypix != self.element.universe.commonSkyPix: 735 ↛ 740line 735 didn't jump to line 740

736 # We guarantee elsewhere that we always materialize all overlaps 

737 # vs. commonSkyPix, but for everything else, we need to check that 

738 # we have materialized this combination of governor values and 

739 # skypix. 

740 summaryWhere = [ 

741 self._summaryTable.columns.skypix_system == skypix.system.name, 

742 self._summaryTable.columns.skypix_level == skypix.level, 

743 ] 

744 gvCol = self._summaryTable.columns[self._governor.element.name] 

745 if governorValues is not Ellipsis: 

746 summaryWhere.append(gvCol.in_(list(governorValues))) 

747 summaryQuery = sqlalchemy.sql.select( 

748 [gvCol] 

749 ).select_from( 

750 self._summaryTable 

751 ).where( 

752 sqlalchemy.sql.and_(*summaryWhere) 

753 ) 

754 materializedGovernorValues = {row[gvCol] for row in self._db.query(summaryQuery)} 

755 if governorValues is Ellipsis: 

756 missingGovernorValues = self._governor.values - materializedGovernorValues 

757 else: 

758 missingGovernorValues = governorValues - materializedGovernorValues 

759 if missingGovernorValues: 

760 raise RuntimeError( 

761 f"Query requires an overlap join between {skypix.name} and {self.element.name} " 

762 f"(for {self._governor.element.name} in {missingGovernorValues}), but these " 

763 f"have not been materialized." 

764 ) 

765 columns = [self._overlapTable.columns.skypix_index.label(skypix.name)] 

766 columns.extend(self._overlapTable.columns[name] for name in self.element.graph.required.names) 

767 overlapWhere = [ 

768 self._overlapTable.columns.skypix_system == skypix.system.name, 

769 self._overlapTable.columns.skypix_level == skypix.level, 

770 ] 

771 if governorValues is not Ellipsis: 771 ↛ 772line 771 didn't jump to line 772, because the condition on line 771 was never true

772 overlapWhere.append( 

773 self._overlapTable.columns[self._governor.element.name].in_(list(governorValues)) 

774 ) 

775 overlapQuery = sqlalchemy.sql.select( 

776 columns 

777 ).select_from( 

778 self._overlapTable 

779 ).where( 

780 sqlalchemy.sql.and_(*overlapWhere) 

781 ) 

782 return overlapQuery.alias(f"{self.element.name}_{skypix.name}_overlap") 

783 

784 def digestTables(self) -> Iterable[sqlalchemy.schema.Table]: 

785 """Return tables used for schema digest. 

786 

787 Returns 

788 ------- 

789 tables : `Iterable` [ `sqlalchemy.schema.Table` ] 

790 Possibly empty set of tables for schema digest calculations. 

791 """ 

792 return [self._summaryTable, self._overlapTable]