Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ["TableDimensionRecordStorage"] 

24 

25from collections import defaultdict 

26import logging 

27from typing import ( 

28 AbstractSet, 

29 Any, 

30 Dict, 

31 Iterable, 

32 Iterator, 

33 List, 

34 Mapping, 

35 Optional, 

36 Sequence, 

37 Set, 

38 Union, 

39) 

40 

41import sqlalchemy 

42 

43from ...core import ( 

44 addDimensionForeignKey, 

45 DatabaseDimensionElement, 

46 DataCoordinate, 

47 DataCoordinateIterable, 

48 ddl, 

49 DimensionElement, 

50 DimensionRecord, 

51 GovernorDimension, 

52 NamedKeyDict, 

53 NamedKeyMapping, 

54 NamedValueSet, 

55 REGION_FIELD_SPEC, 

56 SimpleQuery, 

57 SkyPixDimension, 

58 SkyPixSystem, 

59 TimespanDatabaseRepresentation, 

60) 

61from ..interfaces import ( 

62 Database, 

63 DatabaseDimensionOverlapStorage, 

64 DatabaseDimensionRecordStorage, 

65 GovernorDimensionRecordStorage, 

66 StaticTablesContext, 

67) 

68from ..queries import QueryBuilder 

69from ..wildcards import Ellipsis, EllipsisType 

70 

71 

72_LOG = logging.getLogger(__name__) 

73 

74 

75MAX_FETCH_CHUNK = 1000 

76"""Maximum number of data IDs we fetch records at a time. 

77 

78Barring something database-engine-specific, this sets the size of the actual 

79SQL query, not just the number of result rows, because the only way to query 

80for multiple data IDs in a single SELECT query via SQLAlchemy is to have an OR 

81term in the WHERE clause for each one. 

82""" 

83 

84 

85class TableDimensionRecordStorage(DatabaseDimensionRecordStorage): 

86 """A record storage implementation uses a regular database table. 

87 

88 Parameters 

89 ---------- 

90 db : `Database` 

91 Interface to the database engine and namespace that will hold these 

92 dimension records. 

93 element : `DatabaseDimensionElement` 

94 The element whose records this storage will manage. 

95 table : `sqlalchemy.schema.Table` 

96 The logical table for the element. 

97 skyPixOverlap : `_SkyPixOverlapStorage`, optional 

98 Object that manages the tables that hold materialized spatial overlap 

99 joins to skypix dimensions. Should be `None` if (and only if) 

100 ``element.spatial is None``. 

101 """ 

102 def __init__(self, db: Database, element: DatabaseDimensionElement, *, table: sqlalchemy.schema.Table, 

103 skyPixOverlap: Optional[_SkyPixOverlapStorage] = None): 

104 self._db = db 

105 self._table = table 

106 self._element = element 

107 self._fetchColumns: Dict[str, sqlalchemy.sql.ColumnElement] = { 

108 dimension.name: self._table.columns[name] 

109 for dimension, name in zip(self._element.dimensions, 

110 self._element.RecordClass.fields.dimensions.names) 

111 } 

112 self._skyPixOverlap = skyPixOverlap 

113 self._otherOverlaps: List[DatabaseDimensionOverlapStorage] = [] 

114 

115 @classmethod 

116 def initialize( 

117 cls, 

118 db: Database, 

119 element: DatabaseDimensionElement, *, 

120 context: Optional[StaticTablesContext] = None, 

121 config: Mapping[str, Any], 

122 governors: NamedKeyMapping[GovernorDimension, GovernorDimensionRecordStorage], 

123 ) -> DatabaseDimensionRecordStorage: 

124 # Docstring inherited from DatabaseDimensionRecordStorage. 

125 spec = element.RecordClass.fields.makeTableSpec(tsRepr=db.getTimespanRepresentation()) 

126 if context is not None: 126 ↛ 129line 126 didn't jump to line 129, because the condition on line 126 was never false

127 table = context.addTable(element.name, spec) 

128 else: 

129 table = db.ensureTableExists(element.name, spec) 

130 skyPixOverlap: Optional[_SkyPixOverlapStorage] 

131 if element.spatial is not None: 

132 governor = governors[element.spatial.governor] 

133 skyPixOverlap = _SkyPixOverlapStorage.initialize( 

134 db, 

135 element, 

136 context=context, 

137 governor=governor, 

138 ) 

139 result = cls(db, element, table=table, skyPixOverlap=skyPixOverlap) 

140 

141 # Whenever anyone inserts a new governor dimension value, we want 

142 # to enable overlaps for that value between this element and 

143 # commonSkyPix. 

144 def callback(record: DimensionRecord) -> None: 

145 skyPixOverlap.enable( # type: ignore 

146 result, 

147 element.universe.commonSkyPix, 

148 getattr(record, element.spatial.governor.primaryKey.name), # type: ignore 

149 ) 

150 

151 governor.registerInsertionListener(callback) 

152 return result 

153 else: 

154 return cls(db, element, table=table) 

155 

156 @property 

157 def element(self) -> DatabaseDimensionElement: 

158 # Docstring inherited from DimensionRecordStorage.element. 

159 return self._element 

160 

161 def clearCaches(self) -> None: 

162 # Docstring inherited from DimensionRecordStorage.clearCaches. 

163 pass 

164 

165 def join( 

166 self, 

167 builder: QueryBuilder, *, 

168 regions: Optional[NamedKeyDict[DimensionElement, sqlalchemy.sql.ColumnElement]] = None, 

169 timespans: Optional[NamedKeyDict[DimensionElement, TimespanDatabaseRepresentation]] = None, 

170 ) -> None: 

171 # Docstring inherited from DimensionRecordStorage. 

172 if regions is not None: 

173 dimensions = NamedValueSet(self.element.required) 

174 dimensions.add(self.element.universe.commonSkyPix) 

175 assert self._skyPixOverlap is not None 

176 builder.joinTable( 

177 self._skyPixOverlap.select(self.element.universe.commonSkyPix, Ellipsis), 

178 dimensions, 

179 ) 

180 regions[self.element] = self._table.columns[REGION_FIELD_SPEC.name] 

181 joinOn = builder.startJoin(self._table, self.element.dimensions, 

182 self.element.RecordClass.fields.dimensions.names) 

183 if timespans is not None: 183 ↛ 184line 183 didn't jump to line 184, because the condition on line 183 was never true

184 timespanInTable = self._db.getTimespanRepresentation().fromSelectable(self._table) 

185 for timespanInQuery in timespans.values(): 

186 joinOn.append(timespanInQuery.overlaps(timespanInTable)) 

187 timespans[self.element] = timespanInTable 

188 builder.finishJoin(self._table, joinOn) 

189 return self._table 

190 

191 def fetch(self, dataIds: DataCoordinateIterable) -> Iterable[DimensionRecord]: 

192 # Docstring inherited from DimensionRecordStorage.fetch. 

193 RecordClass = self.element.RecordClass 

194 query = SimpleQuery() 

195 query.columns.extend(self._table.columns[name] for name in RecordClass.fields.standard.names) 

196 if self.element.spatial is not None: 

197 query.columns.append(self._table.columns["region"]) 

198 if self.element.temporal is not None: 

199 tsRepr = self._db.getTimespanRepresentation() 

200 query.columns.extend(self._table.columns[name] for name in tsRepr.getFieldNames()) 

201 query.join(self._table) 

202 dataIds.constrain(query, lambda name: self._fetchColumns[name]) 

203 for row in self._db.query(query.combine()): 

204 values = dict(row) 

205 if self.element.temporal is not None: 

206 values[TimespanDatabaseRepresentation.NAME] = tsRepr.extract(values) 

207 yield RecordClass(**values) 

208 

209 def insert(self, *records: DimensionRecord) -> None: 

210 # Docstring inherited from DimensionRecordStorage.insert. 

211 elementRows = [record.toDict() for record in records] 

212 if self.element.temporal is not None: 

213 tsRepr = self._db.getTimespanRepresentation() 

214 for row in elementRows: 

215 timespan = row.pop(TimespanDatabaseRepresentation.NAME) 

216 tsRepr.update(timespan, result=row) 

217 with self._db.transaction(): 

218 self._db.insert(self._table, *elementRows) 

219 if self._skyPixOverlap is not None: 

220 self._skyPixOverlap.insert(records) 

221 

222 def sync(self, record: DimensionRecord) -> bool: 

223 # Docstring inherited from DimensionRecordStorage.sync. 

224 compared = record.toDict() 

225 keys = {} 

226 for name in record.fields.required.names: 

227 keys[name] = compared.pop(name) 

228 if self.element.temporal is not None: 228 ↛ 229line 228 didn't jump to line 229, because the condition on line 228 was never true

229 tsRepr = self._db.getTimespanRepresentation() 

230 timespan = compared.pop(TimespanDatabaseRepresentation.NAME) 

231 tsRepr.update(timespan, result=compared) 

232 with self._db.transaction(): 

233 _, inserted = self._db.sync( 

234 self._table, 

235 keys=keys, 

236 compared=compared, 

237 ) 

238 if inserted and self._skyPixOverlap is not None: 238 ↛ 239line 238 didn't jump to line 239, because the condition on line 238 was never true

239 self._skyPixOverlap.insert([record]) 

240 return inserted 

241 

242 def digestTables(self) -> Iterable[sqlalchemy.schema.Table]: 

243 # Docstring inherited from DimensionRecordStorage.digestTables. 

244 result = [self._table] 

245 if self._skyPixOverlap is not None: 

246 result.extend(self._skyPixOverlap.digestTables()) 

247 return result 

248 

249 def connect(self, overlaps: DatabaseDimensionOverlapStorage) -> None: 

250 # Docstring inherited from DatabaseDimensionRecordStorage. 

251 self._otherOverlaps.append(overlaps) 

252 

253 

254class _SkyPixOverlapStorage: 

255 """A helper object for `TableDimensionRecordStorage` that manages its 

256 materialized overlaps with skypix dimensions. 

257 

258 New instances should be constructed by calling `initialize`, not by calling 

259 the constructor directly. 

260 

261 Parameters 

262 ---------- 

263 db : `Database` 

264 Interface to the underlying database engine and namespace. 

265 element : `DatabaseDimensionElement` 

266 Dimension element whose overlaps are to be managed. 

267 summaryTable : `sqlalchemy.schema.Table` 

268 Table that records which combinations of skypix dimensions and 

269 governor dimension values have materialized overlap rows. 

270 overlapTable : `sqlalchemy.schema.Table` 

271 Table containing the actual materialized overlap rows. 

272 governor : `GovernorDimensionRecordStorage` 

273 Record storage backend for this element's governor dimension. 

274 

275 Notes 

276 ----- 

277 This class (and most importantly, the tables it relies on) can in principle 

278 manage overlaps between with any skypix dimension, but at present it is 

279 only being used to manage relationships with the special ``commonSkyPix`` 

280 dimension, because that's all the query system uses. Eventually, we expect 

281 to require users to explicitly materialize all relationships they will 

282 want to use in queries. 

283 

284 Other possible future improvements include: 

285 

286 - allowing finer-grained skypix dimensions to provide overlap rows for 

287 coarser ones, by dividing indices by powers of 4 (and possibly doing 

288 ``SELECT DISTINCT`` in the subquery to remove duplicates); 

289 

290 - allowing finer-grained database elements (e.g. patch) to provide overlap 

291 rows for coarser ones (e.g. tract), by ignoring irrelevant columns 

292 (e.g. the patch IDs) in the subquery (again, possible with 

293 ``SELECT DISTINCT``). 

294 

295 But there's no point to doing any of that until the query system can 

296 figure out how best to ask for overlap rows when an exact match isn't 

297 available. 

298 """ 

299 def __init__( 

300 self, 

301 db: Database, 

302 element: DatabaseDimensionElement, 

303 summaryTable: sqlalchemy.schema.Table, 

304 overlapTable: sqlalchemy.schema.Table, 

305 governor: GovernorDimensionRecordStorage, 

306 ): 

307 self._db = db 

308 self.element = element 

309 assert element.spatial is not None 

310 self._summaryTable = summaryTable 

311 self._overlapTable = overlapTable 

312 self._governor = governor 

313 

314 @classmethod 

315 def initialize( 

316 cls, 

317 db: Database, 

318 element: DatabaseDimensionElement, *, 

319 context: Optional[StaticTablesContext], 

320 governor: GovernorDimensionRecordStorage, 

321 ) -> _SkyPixOverlapStorage: 

322 """Construct a new instance, creating tables as needed. 

323 

324 Parameters 

325 ---------- 

326 db : `Database` 

327 Interface to the underlying database engine and namespace. 

328 element : `DatabaseDimensionElement` 

329 Dimension element whose overlaps are to be managed. 

330 context : `StaticTablesContext`, optional 

331 If provided, an object to use to create any new tables. If not 

332 provided, ``db.ensureTableExists`` should be used instead. 

333 governor : `GovernorDimensionRecordStorage` 

334 Record storage backend for this element's governor dimension. 

335 """ 

336 if context is not None: 336 ↛ 339line 336 didn't jump to line 339, because the condition on line 336 was never false

337 op = context.addTable 

338 else: 

339 op = db.ensureTableExists 

340 summaryTable = op( 

341 cls._SUMMARY_TABLE_NAME_SPEC.format(element=element), 

342 cls._makeSummaryTableSpec(element), 

343 ) 

344 overlapTable = op( 

345 cls._OVERLAP_TABLE_NAME_SPEC.format(element=element), 

346 cls._makeOverlapTableSpec(element), 

347 ) 

348 return _SkyPixOverlapStorage(db, element, summaryTable=summaryTable, overlapTable=overlapTable, 

349 governor=governor) 

350 

351 _SUMMARY_TABLE_NAME_SPEC = "{element.name}_skypix_overlap_summary" 

352 

353 @classmethod 

354 def _makeSummaryTableSpec(cls, element: DatabaseDimensionElement) -> ddl.TableSpec: 

355 """Create a specification for the table that records which combinations 

356 of skypix dimension and governor value have materialized overlaps. 

357 

358 Parameters 

359 ---------- 

360 element : `DatabaseDimensionElement` 

361 Dimension element whose overlaps are to be managed. 

362 

363 Returns 

364 ------- 

365 tableSpec : `ddl.TableSpec` 

366 Table specification. 

367 """ 

368 assert element.spatial is not None 

369 tableSpec = ddl.TableSpec( 

370 fields=[ 

371 ddl.FieldSpec( 

372 name="skypix_system", 

373 dtype=sqlalchemy.String, 

374 length=16, 

375 nullable=False, 

376 primaryKey=True, 

377 ), 

378 ddl.FieldSpec( 

379 name="skypix_level", 

380 dtype=sqlalchemy.SmallInteger, 

381 nullable=False, 

382 primaryKey=True, 

383 ), 

384 ] 

385 ) 

386 addDimensionForeignKey(tableSpec, element.spatial.governor, primaryKey=True) 

387 return tableSpec 

388 

389 _OVERLAP_TABLE_NAME_SPEC = "{element.name}_skypix_overlap" 

390 

391 @classmethod 

392 def _makeOverlapTableSpec(cls, element: DatabaseDimensionElement) -> ddl.TableSpec: 

393 """Create a specification for the table that holds materialized 

394 overlap rows. 

395 

396 Parameters 

397 ---------- 

398 element : `DatabaseDimensionElement` 

399 Dimension element whose overlaps are to be managed. 

400 

401 Returns 

402 ------- 

403 tableSpec : `ddl.TableSpec` 

404 Table specification. 

405 """ 

406 assert element.spatial is not None 

407 tableSpec = ddl.TableSpec( 

408 fields=[ 

409 ddl.FieldSpec( 

410 name="skypix_system", 

411 dtype=sqlalchemy.String, 

412 length=16, 

413 nullable=False, 

414 primaryKey=True, 

415 ), 

416 ddl.FieldSpec( 

417 name="skypix_level", 

418 dtype=sqlalchemy.SmallInteger, 

419 nullable=False, 

420 primaryKey=True, 

421 ), 

422 # (more columns added below) 

423 ], 

424 unique=set(), 

425 foreignKeys=[ 

426 # Foreign key to summary table. This makes sure we don't 

427 # materialize any overlaps without remembering that we've done 

428 # so in the summary table, though it can't prevent the converse 

429 # of adding a summary row without adding overlap row (either of 

430 # those is a logic bug, of course, but we want to be defensive 

431 # about those). Using ON DELETE CASCADE, it'd be very easy to 

432 # implement "disabling" an overlap materialization, because we 

433 # can just delete the summary row. 

434 # Note that the governor dimension column is added below, in 

435 # the call to addDimensionForeignKey. 

436 ddl.ForeignKeySpec( 

437 cls._SUMMARY_TABLE_NAME_SPEC.format(element=element), 

438 source=("skypix_system", "skypix_level", element.spatial.governor.name), 

439 target=("skypix_system", "skypix_level", element.spatial.governor.name), 

440 onDelete="CASCADE", 

441 ), 

442 ], 

443 ) 

444 # Add fields for the standard element this class manages overlaps for. 

445 # This is guaranteed to add a column for the governor dimension, 

446 # because that's a required dependency of element. 

447 for dimension in element.required: 

448 addDimensionForeignKey(tableSpec, dimension, primaryKey=True) 

449 # Add field for the actual skypix index. We do this later because I 

450 # think we care (at least a bit) about the order in which the primary 

451 # key is defined, in that we want a non-summary column like this one 

452 # to appear after the governor dimension column. 

453 tableSpec.fields.add( 

454 ddl.FieldSpec( 

455 name="skypix_index", 

456 dtype=sqlalchemy.BigInteger, 

457 nullable=False, 

458 primaryKey=True, 

459 ) 

460 ) 

461 return tableSpec 

462 

463 def enable( 

464 self, 

465 storage: TableDimensionRecordStorage, 

466 skypix: SkyPixDimension, 

467 governorValue: str, 

468 ) -> None: 

469 """Enable materialization of overlaps between a skypix dimension 

470 and the records of ``self.element`` with a particular governor value. 

471 

472 Parameters 

473 ---------- 

474 storage : `TableDimensionRecordStorage` 

475 Storage object for the records of ``self.element``. 

476 skypix : `SkyPixDimension` 

477 The skypix dimension (system and level) for which overlaps should 

478 be materialized. 

479 governorValue : `str` 

480 Value of this element's governor dimension for which overlaps 

481 should be materialized. For example, if ``self.element`` is 

482 ``visit``, this is an instrument name; if ``self.element`` is 

483 ``patch``, this is a skymap name. 

484 

485 Notes 

486 ----- 

487 If there are existing rows for the given ``governorValue``, overlap 

488 rows for them will be immediately computed and inserted. At present, 

489 that never happens, because we only enable overlaps with 

490 `DimensionUniverse.commonSkyPix`, and that happens immediately after 

491 each governor row is inserted (and there can't be any patch rows, 

492 for example, until after the corresponding skymap row is inserted). 

493 

494 After calling `enable` for a particular combination, any new records 

495 for ``self.element`` that are inserted will automatically be 

496 accompanied by overlap records (via calls to `insert` made 

497 by `TableDimensionRecordStorage` methods). 

498 """ 

499 # Because we're essentially materializing a view in Python, we 

500 # aggressively lock all tables we're reading and writing in order to be 

501 # sure nothing gets out of sync. This may not be the most efficient 

502 # approach possible, but we'll focus on correct before we focus on 

503 # fast, and enabling a new overlap combination should be a very rare 

504 # operation anyway, and never one we do in parallel. 

505 with self._db.transaction(lock=[self._governor.table, storage._table, 

506 self._summaryTable, self._overlapTable]): 

507 result, inserted = self._db.sync( 

508 self._summaryTable, 

509 keys={ 

510 "skypix_system": skypix.system.name, 

511 "skypix_level": skypix.level, 

512 self._governor.element.name: governorValue, 

513 }, 

514 ) 

515 if inserted: 515 ↛ 525line 515 didn't jump to line 525, because the condition on line 515 was never false

516 _LOG.debug( 

517 "Precomputing initial overlaps for %s vs %s for %s=%s", 

518 skypix.name, 

519 self.element.name, 

520 self._governor.element.name, 

521 governorValue 

522 ) 

523 self._fill(storage=storage, skypix=skypix, governorValue=governorValue) 

524 else: 

525 _LOG.debug( 

526 "Overlaps already precomputed for %s vs %s for %s=%s", 

527 skypix.name, 

528 self.element.name, 

529 self._governor.element.name, 

530 governorValue 

531 ) 

532 

533 def _fill( 

534 self, *, 

535 storage: TableDimensionRecordStorage, 

536 skypix: SkyPixDimension, 

537 governorValue: str, 

538 ) -> None: 

539 """Insert overlap records for a newly-enabled combination of skypix 

540 dimension and governor value. 

541 

542 This method should only be called by `enable`. 

543 

544 Parameters 

545 ---------- 

546 storage : `TableDimensionRecordStorage` 

547 Storage object for the records of ``self.element``. 

548 skypix : `SkyPixDimension` 

549 The skypix dimension (system and level) for which overlaps should 

550 be materialized. 

551 governorValue : `str` 

552 Value of this element's governor dimension for which overlaps 

553 should be materialized. For example, if ``self.element`` is 

554 ``visit``, this is an instrument name; if ``self.element`` is 

555 ``patch``, this is a skymap name. 

556 """ 

557 overlapRecords: List[dict] = [] 

558 # `DimensionRecordStorage.fetch` as defined by the ABC expects to be 

559 # given iterables of data IDs that correspond to that element's graph 

560 # (e.g. {instrument, visit, detector}), not just some subset of it 

561 # (e.g. {instrument}). But we know the implementation of `fetch` for 

562 # `TableDimensionRecordStorage will use this iterable to do exactly 

563 # what we want. 

564 governorDataId = DataCoordinate.standardize({self._governor.element.name: governorValue}, 

565 graph=self._governor.element.graph) 

566 for record in storage.fetch(DataCoordinateIterable.fromScalar(governorDataId)): 566 ↛ 567line 566 didn't jump to line 567, because the loop on line 566 never started

567 if record.region is None: 

568 continue 

569 baseOverlapRecord = record.dataId.byName() 

570 baseOverlapRecord["skypix_system"] = skypix.system.name 

571 baseOverlapRecord["skypix_level"] = skypix.level 

572 for begin, end in skypix.pixelization.envelope(record.region): 

573 overlapRecords.extend( 

574 dict(baseOverlapRecord, skypix_index=index) for index in range(begin, end) 

575 ) 

576 _LOG.debug( 

577 "Inserting %d initial overlap rows for %s vs %s for %s=%r", 

578 len(overlapRecords), 

579 skypix.name, 

580 self.element.name, 

581 self._governor.element.name, 

582 governorValue, 

583 ) 

584 self._db.insert(self._overlapTable, *overlapRecords) 

585 

586 def insert(self, records: Sequence[DimensionRecord]) -> None: 

587 """Insert overlaps for a sequence of ``self.element`` records that 

588 have just been inserted. 

589 

590 This must be called by any method that inserts records for that 

591 element (i.e. `TableDimensionRecordStorage.insert` and 

592 `TableDimensionRecordStorage.sync`), within the same transaction. 

593 

594 Parameters 

595 ---------- 

596 records : `Sequence` [ `DimensionRecord` ] 

597 Records for ``self.element``. Records with `None` regions are 

598 ignored. 

599 """ 

600 # Group records by family.governor value. 

601 grouped: Dict[str, List[DimensionRecord]] = defaultdict(list) 

602 for record in records: 

603 grouped[getattr(record, self._governor.element.name)].append(record) 

604 _LOG.debug( 

605 "Precomputing new skypix overlaps for %s where %s in %s.", 

606 self.element.name, self._governor.element.name, grouped.keys() 

607 ) 

608 # Make sure the set of combinations to materialize does not change 

609 # while we are materializing the ones we have, by locking the summary 

610 # table. Because we aren't planning to write to the summary table, 

611 # this could just be a SHARED lock instead of an EXCLUSIVE one, but 

612 # there's no API for that right now. 

613 with self._db.transaction(lock=[self._summaryTable]): 

614 # Query for the skypix dimensions to be associated with each 

615 # governor value. 

616 gvCol = self._summaryTable.columns[self._governor.element.name] 

617 sysCol = self._summaryTable.columns.skypix_system 

618 lvlCol = self._summaryTable.columns.skypix_level 

619 query = sqlalchemy.sql.select( 

620 [gvCol, sysCol, lvlCol], 

621 ).select_from( 

622 self._summaryTable 

623 ).where( 

624 gvCol.in_(list(grouped.keys())) 

625 ) 

626 # Group results by governor value, then skypix system. 

627 skypix: Dict[str, NamedKeyDict[SkyPixSystem, List[int]]] = { 

628 gv: NamedKeyDict() for gv in grouped.keys() 

629 } 

630 for summaryRow in self._db.query(query): 

631 system = self.element.universe.skypix[summaryRow[sysCol]] 

632 skypix[summaryRow[gvCol]].setdefault(system, []).append(summaryRow[lvlCol]) 

633 overlapRecords: List[dict] = [] 

634 # Compute overlaps for one governor value at a time, but gather 

635 # them all up for one insert. 

636 for gv, group in grouped.items(): 

637 overlapRecords.extend(self._compute(group, skypix[gv], gv)) 

638 _LOG.debug( 

639 "Inserting %d new skypix overlap rows for %s where %s in %s.", 

640 len(overlapRecords), self.element.name, self._governor.element.name, grouped.keys() 

641 ) 

642 self._db.insert(self._overlapTable, *overlapRecords) 

643 

644 def _compute( 

645 self, 

646 records: Sequence[DimensionRecord], 

647 skypix: NamedKeyDict[SkyPixSystem, List[int]], 

648 governorValue: str, 

649 ) -> Iterator[dict]: 

650 """Compute all overlap rows for a particular governor dimension value 

651 and all of the skypix dimensions for which its overlaps are enabled. 

652 

653 This method should only be called by `insert`. 

654 

655 Parameters 

656 ---------- 

657 records : `Sequence` [ `DimensionRecord` ] 

658 Records for ``self.element``. Records with `None` regions are 

659 ignored. All must have the governor value given. 

660 skypix : `NamedKeyDict` [ `SkyPixSystem`, `list` [ `int` ] ] 

661 Mapping containing all skypix systems and levels for which overlaps 

662 should be computed, grouped by `SkyPixSystem`. 

663 governorValue : `str` 

664 Value of this element's governor dimension for which overlaps 

665 should be computed. For example, if ``self.element`` is ``visit``, 

666 this is an instrument name; if ``self.element`` is ``patch``, this 

667 is a skymap name. 

668 

669 Yields 

670 ------ 

671 row : `dict` 

672 Dictionary representing an overlap row. 

673 """ 

674 # Process input records one at time, computing all skypix indices for 

675 # each. 

676 for record in records: 

677 if record.region is None: 

678 continue 

679 assert getattr(record, self._governor.element.name) == governorValue 

680 for system, levels in skypix.items(): 

681 if not levels: 681 ↛ 682line 681 didn't jump to line 682, because the condition on line 681 was never true

682 continue 

683 baseOverlapRecord = record.dataId.byName() 

684 baseOverlapRecord["skypix_system"] = system.name 

685 levels.sort(reverse=True) 

686 # Start with the first level, which is the finest-grained one. 

687 # Compute skypix envelope indices directly for that. 

688 indices: Dict[int, Set[int]] = {levels[0]: set()} 

689 for begin, end in system[levels[0]].pixelization.envelope(record.region): 

690 indices[levels[0]].update(range(begin, end)) 

691 # Divide those indices by powers of 4 (and remove duplicates) 

692 # work our way up to the last (coarsest) level. 

693 for lastLevel, nextLevel in zip(levels[:-1], levels[1:]): 693 ↛ 694line 693 didn't jump to line 694, because the loop on line 693 never started

694 factor = 4**(lastLevel - nextLevel) 

695 indices[nextLevel] = {index//factor for index in indices[lastLevel]} 

696 for level in levels: 

697 yield from ( 

698 { 

699 "skypix_level": level, 

700 "skypix_index": index, 

701 **baseOverlapRecord, # type: ignore 

702 } for index in indices[level] 

703 ) 

704 

705 def select( 

706 self, 

707 skypix: SkyPixDimension, 

708 governorValues: Union[AbstractSet[str], EllipsisType], 

709 ) -> sqlalchemy.sql.FromClause: 

710 """Construct a subquery expression containing overlaps between the 

711 given skypix dimension and governor values. 

712 

713 Parameters 

714 ---------- 

715 skypix : `SkyPixDimension` 

716 The skypix dimension (system and level) for which overlaps should 

717 be materialized. 

718 governorValues : `str` 

719 Values of this element's governor dimension for which overlaps 

720 should be returned. For example, if ``self.element`` is ``visit``, 

721 this is a set of instrument names; if ``self.element`` is 

722 ``patch``, this is a set of skymap names. If ``...`` all values 

723 in the database are used (`GovernorDimensionRecordStorage.values`). 

724 

725 Returns 

726 ------- 

727 subquery : `sqlalchemy.sql.FromClause` 

728 A SELECT query with an alias, intended for use as a subquery, with 

729 columns equal to ``self.element.required.names`` + ``skypix.name``. 

730 """ 

731 if skypix != self.element.universe.commonSkyPix: 731 ↛ 736line 731 didn't jump to line 736

732 # We guarantee elsewhere that we always materialize all overlaps 

733 # vs. commonSkyPix, but for everything else, we need to check that 

734 # we have materialized this combination of governor values and 

735 # skypix. 

736 summaryWhere = [ 

737 self._summaryTable.columns.skypix_system == skypix.system.name, 

738 self._summaryTable.columns.skypix_level == skypix.level, 

739 ] 

740 gvCol = self._summaryTable.columns[self._governor.element.name] 

741 if governorValues is not Ellipsis: 

742 summaryWhere.append(gvCol.in_(list(governorValues))) 

743 summaryQuery = sqlalchemy.sql.select( 

744 [gvCol] 

745 ).select_from( 

746 self._summaryTable 

747 ).where( 

748 sqlalchemy.sql.and_(*summaryWhere) 

749 ) 

750 materializedGovernorValues = {row[gvCol] for row in self._db.query(summaryQuery)} 

751 if governorValues is Ellipsis: 

752 missingGovernorValues = self._governor.values - materializedGovernorValues 

753 else: 

754 missingGovernorValues = governorValues - materializedGovernorValues 

755 if missingGovernorValues: 

756 raise RuntimeError( 

757 f"Query requires an overlap join between {skypix.name} and {self.element.name} " 

758 f"(for {self._governor.element.name} in {missingGovernorValues}), but these " 

759 f"have not been materialized." 

760 ) 

761 columns = [self._overlapTable.columns.skypix_index.label(skypix.name)] 

762 columns.extend(self._overlapTable.columns[name] for name in self.element.graph.required.names) 

763 overlapWhere = [ 

764 self._overlapTable.columns.skypix_system == skypix.system.name, 

765 self._overlapTable.columns.skypix_level == skypix.level, 

766 ] 

767 if governorValues is not Ellipsis: 767 ↛ 768line 767 didn't jump to line 768, because the condition on line 767 was never true

768 overlapWhere.append( 

769 self._overlapTable.columns[self._governor.element.name].in_(list(governorValues)) 

770 ) 

771 overlapQuery = sqlalchemy.sql.select( 

772 columns 

773 ).select_from( 

774 self._overlapTable 

775 ).where( 

776 sqlalchemy.sql.and_(*overlapWhere) 

777 ) 

778 return overlapQuery.alias(f"{self.element.name}_{skypix.name}_overlap") 

779 

780 def digestTables(self) -> Iterable[sqlalchemy.schema.Table]: 

781 """Return tables used for schema digest. 

782 

783 Returns 

784 ------- 

785 tables : `Iterable` [ `sqlalchemy.schema.Table` ] 

786 Possibly empty set of tables for schema digest calculations. 

787 """ 

788 return [self._summaryTable, self._overlapTable]