Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ["TableDimensionRecordStorage"] 

24 

25from collections import defaultdict 

26import logging 

27from typing import ( 

28 AbstractSet, 

29 Dict, 

30 Iterable, 

31 Iterator, 

32 List, 

33 Optional, 

34 Sequence, 

35 Set, 

36 Union, 

37) 

38 

39import sqlalchemy 

40 

41from ...core import ( 

42 addDimensionForeignKey, 

43 Config, 

44 DatabaseDimensionElement, 

45 DataCoordinate, 

46 DataCoordinateIterable, 

47 ddl, 

48 DimensionElement, 

49 DimensionRecord, 

50 GovernorDimension, 

51 NamedKeyDict, 

52 NamedKeyMapping, 

53 NamedValueSet, 

54 REGION_FIELD_SPEC, 

55 SimpleQuery, 

56 SkyPixDimension, 

57 SkyPixSystem, 

58 TimespanDatabaseRepresentation, 

59) 

60from ..interfaces import ( 

61 Database, 

62 DatabaseDimensionOverlapStorage, 

63 DatabaseDimensionRecordStorage, 

64 GovernorDimensionRecordStorage, 

65 StaticTablesContext, 

66) 

67from ..queries import QueryBuilder 

68from ..wildcards import Ellipsis, EllipsisType 

69 

70 

71_LOG = logging.getLogger(__name__) 

72 

73 

74MAX_FETCH_CHUNK = 1000 

75"""Maximum number of data IDs we fetch records at a time. 

76 

77Barring something database-engine-specific, this sets the size of the actual 

78SQL query, not just the number of result rows, because the only way to query 

79for multiple data IDs in a single SELECT query via SQLAlchemy is to have an OR 

80term in the WHERE clause for each one. 

81""" 

82 

83 

84class TableDimensionRecordStorage(DatabaseDimensionRecordStorage): 

85 """A record storage implementation uses a regular database table. 

86 

87 Parameters 

88 ---------- 

89 db : `Database` 

90 Interface to the database engine and namespace that will hold these 

91 dimension records. 

92 element : `DatabaseDimensionElement` 

93 The element whose records this storage will manage. 

94 table : `sqlalchemy.schema.Table` 

95 The logical table for the element. 

96 skyPixOverlap : `_SkyPixOverlapStorage`, optional 

97 Object that manages the tables that hold materialized spatial overlap 

98 joins to skypix dimensions. Should be `None` if (and only if) 

99 ``element.spatial is None``. 

100 """ 

101 def __init__(self, db: Database, element: DatabaseDimensionElement, *, table: sqlalchemy.schema.Table, 

102 skyPixOverlap: Optional[_SkyPixOverlapStorage] = None): 

103 self._db = db 

104 self._table = table 

105 self._element = element 

106 self._fetchColumns: Dict[str, sqlalchemy.sql.ColumnElement] = { 

107 dimension.name: self._table.columns[name] 

108 for dimension, name in zip(self._element.dimensions, 

109 self._element.RecordClass.fields.dimensions.names) 

110 } 

111 self._skyPixOverlap = skyPixOverlap 

112 self._otherOverlaps: List[DatabaseDimensionOverlapStorage] = [] 

113 

114 @classmethod 

115 def initialize( 

116 cls, 

117 db: Database, 

118 element: DatabaseDimensionElement, *, 

119 context: Optional[StaticTablesContext] = None, 

120 config: Config, 

121 governors: NamedKeyMapping[GovernorDimension, GovernorDimensionRecordStorage], 

122 ) -> DatabaseDimensionRecordStorage: 

123 # Docstring inherited from DatabaseDimensionRecordStorage. 

124 spec = element.RecordClass.fields.makeTableSpec(tsRepr=db.getTimespanRepresentation()) 

125 if context is not None: 125 ↛ 128line 125 didn't jump to line 128, because the condition on line 125 was never false

126 table = context.addTable(element.name, spec) 

127 else: 

128 table = db.ensureTableExists(element.name, spec) 

129 skyPixOverlap: Optional[_SkyPixOverlapStorage] 

130 if element.spatial is not None: 

131 governor = governors[element.spatial.governor] 

132 skyPixOverlap = _SkyPixOverlapStorage.initialize( 

133 db, 

134 element, 

135 context=context, 

136 governor=governor, 

137 ) 

138 result = cls(db, element, table=table, skyPixOverlap=skyPixOverlap) 

139 

140 # Whenever anyone inserts a new governor dimension value, we want 

141 # to enable overlaps for that value between this element and 

142 # commonSkyPix. 

143 def callback(record: DimensionRecord) -> None: 

144 skyPixOverlap.enable( # type: ignore 

145 result, 

146 element.universe.commonSkyPix, 

147 getattr(record, element.spatial.governor.primaryKey.name), # type: ignore 

148 ) 

149 

150 governor.registerInsertionListener(callback) 

151 return result 

152 else: 

153 return cls(db, element, table=table) 

154 

155 @property 

156 def element(self) -> DatabaseDimensionElement: 

157 # Docstring inherited from DimensionRecordStorage.element. 

158 return self._element 

159 

160 def clearCaches(self) -> None: 

161 # Docstring inherited from DimensionRecordStorage.clearCaches. 

162 pass 

163 

164 def join( 

165 self, 

166 builder: QueryBuilder, *, 

167 regions: Optional[NamedKeyDict[DimensionElement, sqlalchemy.sql.ColumnElement]] = None, 

168 timespans: Optional[NamedKeyDict[DimensionElement, TimespanDatabaseRepresentation]] = None, 

169 ) -> None: 

170 # Docstring inherited from DimensionRecordStorage. 

171 if regions is not None: 

172 dimensions = NamedValueSet(self.element.required) 

173 dimensions.add(self.element.universe.commonSkyPix) 

174 assert self._skyPixOverlap is not None 

175 builder.joinTable( 

176 self._skyPixOverlap.select(self.element.universe.commonSkyPix, Ellipsis), 

177 dimensions, 

178 ) 

179 regions[self.element] = self._table.columns[REGION_FIELD_SPEC.name] 

180 joinOn = builder.startJoin(self._table, self.element.dimensions, 

181 self.element.RecordClass.fields.dimensions.names) 

182 if timespans is not None: 182 ↛ 183line 182 didn't jump to line 183, because the condition on line 182 was never true

183 timespanInTable = self._db.getTimespanRepresentation().fromSelectable(self._table) 

184 for timespanInQuery in timespans.values(): 

185 joinOn.append(timespanInQuery.overlaps(timespanInTable)) 

186 timespans[self.element] = timespanInTable 

187 builder.finishJoin(self._table, joinOn) 

188 return self._table 

189 

190 def fetch(self, dataIds: DataCoordinateIterable) -> Iterable[DimensionRecord]: 

191 # Docstring inherited from DimensionRecordStorage.fetch. 

192 RecordClass = self.element.RecordClass 

193 query = SimpleQuery() 

194 query.columns.extend(self._table.columns[name] for name in RecordClass.fields.standard.names) 

195 if self.element.spatial is not None: 

196 query.columns.append(self._table.columns["region"]) 

197 if self.element.temporal is not None: 

198 tsRepr = self._db.getTimespanRepresentation() 

199 query.columns.extend(self._table.columns[name] for name in tsRepr.getFieldNames()) 

200 query.join(self._table) 

201 dataIds.constrain(query, lambda name: self._fetchColumns[name]) 

202 for row in self._db.query(query.combine()): 

203 values = dict(row) 

204 if self.element.temporal is not None: 

205 values[TimespanDatabaseRepresentation.NAME] = tsRepr.extract(values) 

206 yield RecordClass(**values) 

207 

208 def insert(self, *records: DimensionRecord) -> None: 

209 # Docstring inherited from DimensionRecordStorage.insert. 

210 elementRows = [record.toDict() for record in records] 

211 if self.element.temporal is not None: 

212 tsRepr = self._db.getTimespanRepresentation() 

213 for row in elementRows: 

214 timespan = row.pop(TimespanDatabaseRepresentation.NAME) 

215 tsRepr.update(timespan, result=row) 

216 with self._db.transaction(): 

217 self._db.insert(self._table, *elementRows) 

218 if self._skyPixOverlap is not None: 

219 self._skyPixOverlap.insert(records) 

220 

221 def sync(self, record: DimensionRecord) -> bool: 

222 # Docstring inherited from DimensionRecordStorage.sync. 

223 compared = record.toDict() 

224 keys = {} 

225 for name in record.fields.required.names: 

226 keys[name] = compared.pop(name) 

227 if self.element.temporal is not None: 227 ↛ 228line 227 didn't jump to line 228, because the condition on line 227 was never true

228 tsRepr = self._db.getTimespanRepresentation() 

229 timespan = compared.pop(TimespanDatabaseRepresentation.NAME) 

230 tsRepr.update(timespan, result=compared) 

231 with self._db.transaction(): 

232 _, inserted = self._db.sync( 

233 self._table, 

234 keys=keys, 

235 compared=compared, 

236 ) 

237 if inserted and self._skyPixOverlap is not None: 237 ↛ 238line 237 didn't jump to line 238, because the condition on line 237 was never true

238 self._skyPixOverlap.insert([record]) 

239 return inserted 

240 

241 def digestTables(self) -> Iterable[sqlalchemy.schema.Table]: 

242 # Docstring inherited from DimensionRecordStorage.digestTables. 

243 result = [self._table] 

244 if self._skyPixOverlap is not None: 

245 result.extend(self._skyPixOverlap.digestTables()) 

246 return result 

247 

248 def connect(self, overlaps: DatabaseDimensionOverlapStorage) -> None: 

249 # Docstring inherited from DatabaseDimensionRecordStorage. 

250 self._otherOverlaps.append(overlaps) 

251 

252 

253class _SkyPixOverlapStorage: 

254 """A helper object for `TableDimensionRecordStorage` that manages its 

255 materialized overlaps with skypix dimensions. 

256 

257 New instances should be constructed by calling `initialize`, not by calling 

258 the constructor directly. 

259 

260 Parameters 

261 ---------- 

262 db : `Database` 

263 Interface to the underlying database engine and namespace. 

264 element : `DatabaseDimensionElement` 

265 Dimension element whose overlaps are to be managed. 

266 summaryTable : `sqlalchemy.schema.Table` 

267 Table that records which combinations of skypix dimensions and 

268 governor dimension values have materialized overlap rows. 

269 overlapTable : `sqlalchemy.schema.Table` 

270 Table containing the actual materialized overlap rows. 

271 governor : `GovernorDimensionRecordStorage` 

272 Record storage backend for this element's governor dimension. 

273 

274 Notes 

275 ----- 

276 This class (and most importantly, the tables it relies on) can in principle 

277 manage overlaps between with any skypix dimension, but at present it is 

278 only being used to manage relationships with the special ``commonSkyPix`` 

279 dimension, because that's all the query system uses. Eventually, we expect 

280 to require users to explicitly materialize all relationships they will 

281 want to use in queries. 

282 

283 Other possible future improvements include: 

284 

285 - allowing finer-grained skypix dimensions to provide overlap rows for 

286 coarser ones, by dividing indices by powers of 4 (and possibly doing 

287 ``SELECT DISTINCT`` in the subquery to remove duplicates); 

288 

289 - allowing finer-grained database elements (e.g. patch) to provide overlap 

290 rows for coarser ones (e.g. tract), by ignoring irrelevant columns 

291 (e.g. the patch IDs) in the subquery (again, possible with 

292 ``SELECT DISTINCT``). 

293 

294 But there's no point to doing any of that until the query system can 

295 figure out how best to ask for overlap rows when an exact match isn't 

296 available. 

297 """ 

298 def __init__( 

299 self, 

300 db: Database, 

301 element: DatabaseDimensionElement, 

302 summaryTable: sqlalchemy.schema.Table, 

303 overlapTable: sqlalchemy.schema.Table, 

304 governor: GovernorDimensionRecordStorage, 

305 ): 

306 self._db = db 

307 self.element = element 

308 assert element.spatial is not None 

309 self._summaryTable = summaryTable 

310 self._overlapTable = overlapTable 

311 self._governor = governor 

312 

313 @classmethod 

314 def initialize( 

315 cls, 

316 db: Database, 

317 element: DatabaseDimensionElement, *, 

318 context: Optional[StaticTablesContext], 

319 governor: GovernorDimensionRecordStorage, 

320 ) -> _SkyPixOverlapStorage: 

321 """Construct a new instance, creating tables as needed. 

322 

323 Parameters 

324 ---------- 

325 db : `Database` 

326 Interface to the underlying database engine and namespace. 

327 element : `DatabaseDimensionElement` 

328 Dimension element whose overlaps are to be managed. 

329 context : `StaticTablesContext`, optional 

330 If provided, an object to use to create any new tables. If not 

331 provided, ``db.ensureTableExists`` should be used instead. 

332 governor : `GovernorDimensionRecordStorage` 

333 Record storage backend for this element's governor dimension. 

334 """ 

335 if context is not None: 335 ↛ 338line 335 didn't jump to line 338, because the condition on line 335 was never false

336 op = context.addTable 

337 else: 

338 op = db.ensureTableExists 

339 summaryTable = op( 

340 cls._SUMMARY_TABLE_NAME_SPEC.format(element=element), 

341 cls._makeSummaryTableSpec(element), 

342 ) 

343 overlapTable = op( 

344 cls._OVERLAP_TABLE_NAME_SPEC.format(element=element), 

345 cls._makeOverlapTableSpec(element), 

346 ) 

347 return _SkyPixOverlapStorage(db, element, summaryTable=summaryTable, overlapTable=overlapTable, 

348 governor=governor) 

349 

350 _SUMMARY_TABLE_NAME_SPEC = "{element.name}_skypix_overlap_summary" 

351 

352 @classmethod 

353 def _makeSummaryTableSpec(cls, element: DatabaseDimensionElement) -> ddl.TableSpec: 

354 """Create a specification for the table that records which combinations 

355 of skypix dimension and governor value have materialized overlaps. 

356 

357 Parameters 

358 ---------- 

359 element : `DatabaseDimensionElement` 

360 Dimension element whose overlaps are to be managed. 

361 

362 Returns 

363 ------- 

364 tableSpec : `ddl.TableSpec` 

365 Table specification. 

366 """ 

367 assert element.spatial is not None 

368 tableSpec = ddl.TableSpec( 

369 fields=[ 

370 ddl.FieldSpec( 

371 name="skypix_system", 

372 dtype=sqlalchemy.String, 

373 length=16, 

374 nullable=False, 

375 primaryKey=True, 

376 ), 

377 ddl.FieldSpec( 

378 name="skypix_level", 

379 dtype=sqlalchemy.SmallInteger, 

380 nullable=False, 

381 primaryKey=True, 

382 ), 

383 ] 

384 ) 

385 addDimensionForeignKey(tableSpec, element.spatial.governor, primaryKey=True) 

386 return tableSpec 

387 

388 _OVERLAP_TABLE_NAME_SPEC = "{element.name}_skypix_overlap" 

389 

390 @classmethod 

391 def _makeOverlapTableSpec(cls, element: DatabaseDimensionElement) -> ddl.TableSpec: 

392 """Create a specification for the table that holds materialized 

393 overlap rows. 

394 

395 Parameters 

396 ---------- 

397 element : `DatabaseDimensionElement` 

398 Dimension element whose overlaps are to be managed. 

399 

400 Returns 

401 ------- 

402 tableSpec : `ddl.TableSpec` 

403 Table specification. 

404 """ 

405 assert element.spatial is not None 

406 tableSpec = ddl.TableSpec( 

407 fields=[ 

408 ddl.FieldSpec( 

409 name="skypix_system", 

410 dtype=sqlalchemy.String, 

411 length=16, 

412 nullable=False, 

413 primaryKey=True, 

414 ), 

415 ddl.FieldSpec( 

416 name="skypix_level", 

417 dtype=sqlalchemy.SmallInteger, 

418 nullable=False, 

419 primaryKey=True, 

420 ), 

421 # (more columns added below) 

422 ], 

423 unique=set(), 

424 foreignKeys=[ 

425 # Foreign key to summary table. This makes sure we don't 

426 # materialize any overlaps without remembering that we've done 

427 # so in the summary table, though it can't prevent the converse 

428 # of adding a summary row without adding overlap row (either of 

429 # those is a logic bug, of course, but we want to be defensive 

430 # about those). Using ON DELETE CASCADE, it'd be very easy to 

431 # implement "disabling" an overlap materialization, because we 

432 # can just delete the summary row. 

433 # Note that the governor dimension column is added below, in 

434 # the call to addDimensionForeignKey. 

435 ddl.ForeignKeySpec( 

436 cls._SUMMARY_TABLE_NAME_SPEC.format(element=element), 

437 source=("skypix_system", "skypix_level", element.spatial.governor.name), 

438 target=("skypix_system", "skypix_level", element.spatial.governor.name), 

439 onDelete="CASCADE", 

440 ), 

441 ], 

442 ) 

443 # Add fields for the standard element this class manages overlaps for. 

444 # This is guaranteed to add a column for the governor dimension, 

445 # because that's a required dependency of element. 

446 for dimension in element.required: 

447 addDimensionForeignKey(tableSpec, dimension, primaryKey=True) 

448 # Add field for the actual skypix index. We do this later because I 

449 # think we care (at least a bit) about the order in which the primary 

450 # key is defined, in that we want a non-summary column like this one 

451 # to appear after the governor dimension column. 

452 tableSpec.fields.add( 

453 ddl.FieldSpec( 

454 name="skypix_index", 

455 dtype=sqlalchemy.BigInteger, 

456 nullable=False, 

457 primaryKey=True, 

458 ) 

459 ) 

460 return tableSpec 

461 

462 def enable( 

463 self, 

464 storage: TableDimensionRecordStorage, 

465 skypix: SkyPixDimension, 

466 governorValue: str, 

467 ) -> None: 

468 """Enable materialization of overlaps between a skypix dimension 

469 and the records of ``self.element`` with a particular governor value. 

470 

471 Parameters 

472 ---------- 

473 storage : `TableDimensionRecordStorage` 

474 Storage object for the records of ``self.element``. 

475 skypix : `SkyPixDimension` 

476 The skypix dimension (system and level) for which overlaps should 

477 be materialized. 

478 governorValue : `str` 

479 Value of this element's governor dimension for which overlaps 

480 should be materialized. For example, if ``self.element`` is 

481 ``visit``, this is an instrument name; if ``self.element`` is 

482 ``patch``, this is a skymap name. 

483 

484 Notes 

485 ----- 

486 If there are existing rows for the given ``governorValue``, overlap 

487 rows for them will be immediately computed and inserted. At present, 

488 that never happens, because we only enable overlaps with 

489 `DimensionUniverse.commonSkyPix`, and that happens immediately after 

490 each governor row is inserted (and there can't be any patch rows, 

491 for example, until after the corresponding skymap row is inserted). 

492 

493 After calling `enable` for a particular combination, any new records 

494 for ``self.element`` that are inserted will automatically be 

495 accompanied by overlap records (via calls to `insert` made 

496 by `TableDimensionRecordStorage` methods). 

497 """ 

498 # Because we're essentially materializing a view in Python, we 

499 # aggressively lock all tables we're reading and writing in order to be 

500 # sure nothing gets out of sync. This may not be the most efficient 

501 # approach possible, but we'll focus on correct before we focus on 

502 # fast, and enabling a new overlap combination should be a very rare 

503 # operation anyway, and never one we do in parallel. 

504 with self._db.transaction(lock=[self._governor.table, storage._table, 

505 self._summaryTable, self._overlapTable]): 

506 result, inserted = self._db.sync( 

507 self._summaryTable, 

508 keys={ 

509 "skypix_system": skypix.system.name, 

510 "skypix_level": skypix.level, 

511 self._governor.element.name: governorValue, 

512 }, 

513 ) 

514 if inserted: 514 ↛ 524line 514 didn't jump to line 524, because the condition on line 514 was never false

515 _LOG.debug( 

516 "Precomputing initial overlaps for %s vs %s for %s=%s", 

517 skypix.name, 

518 self.element.name, 

519 self._governor.element.name, 

520 governorValue 

521 ) 

522 self._fill(storage=storage, skypix=skypix, governorValue=governorValue) 

523 else: 

524 _LOG.debug( 

525 "Overlaps already precomputed for %s vs %s for %s=%s", 

526 skypix.name, 

527 self.element.name, 

528 self._governor.element.name, 

529 governorValue 

530 ) 

531 

532 def _fill( 

533 self, *, 

534 storage: TableDimensionRecordStorage, 

535 skypix: SkyPixDimension, 

536 governorValue: str, 

537 ) -> None: 

538 """Insert overlap records for a newly-enabled combination of skypix 

539 dimension and governor value. 

540 

541 This method should only be called by `enable`. 

542 

543 Parameters 

544 ---------- 

545 storage : `TableDimensionRecordStorage` 

546 Storage object for the records of ``self.element``. 

547 skypix : `SkyPixDimension` 

548 The skypix dimension (system and level) for which overlaps should 

549 be materialized. 

550 governorValue : `str` 

551 Value of this element's governor dimension for which overlaps 

552 should be materialized. For example, if ``self.element`` is 

553 ``visit``, this is an instrument name; if ``self.element`` is 

554 ``patch``, this is a skymap name. 

555 """ 

556 overlapRecords: List[dict] = [] 

557 # `DimensionRecordStorage.fetch` as defined by the ABC expects to be 

558 # given iterables of data IDs that correspond to that element's graph 

559 # (e.g. {instrument, visit, detector}), not just some subset of it 

560 # (e.g. {instrument}). But we know the implementation of `fetch` for 

561 # `TableDimensionRecordStorage will use this iterable to do exactly 

562 # what we want. 

563 governorDataId = DataCoordinate.standardize({self._governor.element.name: governorValue}, 

564 graph=self._governor.element.graph) 

565 for record in storage.fetch(DataCoordinateIterable.fromScalar(governorDataId)): 565 ↛ 566line 565 didn't jump to line 566, because the loop on line 565 never started

566 if record.region is None: # type: ignore 

567 continue 

568 baseOverlapRecord = record.dataId.byName() 

569 baseOverlapRecord["skypix_system"] = skypix.system.name 

570 baseOverlapRecord["skypix_level"] = skypix.level 

571 for begin, end in skypix.pixelization.envelope(record.region): # type: ignore 

572 overlapRecords.extend( 

573 dict(baseOverlapRecord, skypix_index=index) for index in range(begin, end) 

574 ) 

575 _LOG.debug( 

576 "Inserting %d initial overlap rows for %s vs %s for %s=%r", 

577 len(overlapRecords), 

578 skypix.name, 

579 self.element.name, 

580 self._governor.element.name, 

581 governorValue, 

582 ) 

583 self._db.insert(self._overlapTable, *overlapRecords) 

584 

585 def insert(self, records: Sequence[DimensionRecord]) -> None: 

586 """Insert overlaps for a sequence of ``self.element`` records that 

587 have just been inserted. 

588 

589 This must be called by any method that inserts records for that 

590 element (i.e. `TableDimensionRecordStorage.insert` and 

591 `TableDimensionRecordStorage.sync`), within the same transaction. 

592 

593 Parameters 

594 ---------- 

595 records : `Sequence` [ `DimensionRecord` ] 

596 Records for ``self.element``. Records with `None` regions are 

597 ignored. 

598 """ 

599 # Group records by family.governor value. 

600 grouped: Dict[str, List[DimensionRecord]] = defaultdict(list) 

601 for record in records: 

602 grouped[getattr(record, self._governor.element.name)].append(record) 

603 _LOG.debug( 

604 "Precomputing new skypix overlaps for %s where %s in %s.", 

605 self.element.name, self._governor.element.name, grouped.keys() 

606 ) 

607 # Make sure the set of combinations to materialize does not change 

608 # while we are materializing the ones we have, by locking the summary 

609 # table. Because we aren't planning to write to the summary table, 

610 # this could just be a SHARED lock instead of an EXCLUSIVE one, but 

611 # there's no API for that right now. 

612 with self._db.transaction(lock=[self._summaryTable]): 

613 # Query for the skypix dimensions to be associated with each 

614 # governor value. 

615 gvCol = self._summaryTable.columns[self._governor.element.name] 

616 sysCol = self._summaryTable.columns.skypix_system 

617 lvlCol = self._summaryTable.columns.skypix_level 

618 query = sqlalchemy.sql.select( 

619 [gvCol, sysCol, lvlCol], 

620 ).select_from( 

621 self._summaryTable 

622 ).where( 

623 gvCol.in_(list(grouped.keys())) 

624 ) 

625 # Group results by governor value, then skypix system. 

626 skypix: Dict[str, NamedKeyDict[SkyPixSystem, List[int]]] = { 

627 gv: NamedKeyDict() for gv in grouped.keys() 

628 } 

629 for summaryRow in self._db.query(query): 

630 system = self.element.universe.skypix[summaryRow[sysCol]] 

631 skypix[summaryRow[gvCol]].setdefault(system, []).append(summaryRow[lvlCol]) 

632 overlapRecords: List[dict] = [] 

633 # Compute overlaps for one governor value at a time, but gather 

634 # them all up for one insert. 

635 for gv, group in grouped.items(): 

636 overlapRecords.extend(self._compute(group, skypix[gv], gv)) 

637 _LOG.debug( 

638 "Inserting %d new skypix overlap rows for %s where %s in %s.", 

639 len(overlapRecords), self.element.name, self._governor.element.name, grouped.keys() 

640 ) 

641 self._db.insert(self._overlapTable, *overlapRecords) 

642 

643 def _compute( 

644 self, 

645 records: Sequence[DimensionRecord], 

646 skypix: NamedKeyDict[SkyPixSystem, List[int]], 

647 governorValue: str, 

648 ) -> Iterator[dict]: 

649 """Compute all overlap rows for a particular governor dimension value 

650 and all of the skypix dimensions for which its overlaps are enabled. 

651 

652 This method should only be called by `insert`. 

653 

654 Parameters 

655 ---------- 

656 records : `Sequence` [ `DimensionRecord` ] 

657 Records for ``self.element``. Records with `None` regions are 

658 ignored. All must have the governor value given. 

659 skypix : `NamedKeyDict` [ `SkyPixSystem`, `list` [ `int` ] ] 

660 Mapping containing all skypix systems and levels for which overlaps 

661 should be computed, grouped by `SkyPixSystem`. 

662 governorValue : `str` 

663 Value of this element's governor dimension for which overlaps 

664 should be computed. For example, if ``self.element`` is ``visit``, 

665 this is an instrument name; if ``self.element`` is ``patch``, this 

666 is a skymap name. 

667 

668 Yields 

669 ------ 

670 row : `dict` 

671 Dictionary representing an overlap row. 

672 """ 

673 # Process input records one at time, computing all skypix indices for 

674 # each. 

675 for record in records: 

676 if record.region is None: # type: ignore 

677 continue 

678 assert getattr(record, self._governor.element.name) == governorValue 

679 for system, levels in skypix.items(): 

680 if not levels: 680 ↛ 681line 680 didn't jump to line 681, because the condition on line 680 was never true

681 continue 

682 baseOverlapRecord = record.dataId.byName() 

683 baseOverlapRecord["skypix_system"] = system.name 

684 levels.sort(reverse=True) 

685 # Start with the first level, which is the finest-grained one. 

686 # Compute skypix envelope indices directly for that. 

687 indices: Dict[int, Set[int]] = {levels[0]: set()} 

688 for begin, end in system[levels[0]].pixelization.envelope(record.region): # type: ignore 

689 indices[levels[0]].update(range(begin, end)) 

690 # Divide those indices by powers of 4 (and remove duplicates) 

691 # work our way up to the last (coarsest) level. 

692 for lastLevel, nextLevel in zip(levels[:-1], levels[1:]): 692 ↛ 693line 692 didn't jump to line 693, because the loop on line 692 never started

693 factor = 4**(lastLevel - nextLevel) 

694 indices[nextLevel] = {index//factor for index in indices[lastLevel]} 

695 for level in levels: 

696 yield from ( 

697 { 

698 "skypix_level": level, 

699 "skypix_index": index, 

700 **baseOverlapRecord, # type: ignore 

701 } for index in indices[level] 

702 ) 

703 

704 def select( 

705 self, 

706 skypix: SkyPixDimension, 

707 governorValues: Union[AbstractSet[str], EllipsisType], 

708 ) -> sqlalchemy.sql.FromClause: 

709 """Construct a subquery expression containing overlaps between the 

710 given skypix dimension and governor values. 

711 

712 Parameters 

713 ---------- 

714 skypix : `SkyPixDimension` 

715 The skypix dimension (system and level) for which overlaps should 

716 be materialized. 

717 governorValues : `str` 

718 Values of this element's governor dimension for which overlaps 

719 should be returned. For example, if ``self.element`` is ``visit``, 

720 this is a set of instrument names; if ``self.element`` is 

721 ``patch``, this is a set of skymap names. If ``...`` all values 

722 in the database are used (`GovernorDimensionRecordStorage.values`). 

723 

724 Returns 

725 ------- 

726 subquery : `sqlalchemy.sql.FromClause` 

727 A SELECT query with an alias, intended for use as a subquery, with 

728 columns equal to ``self.element.required.names`` + ``skypix.name``. 

729 """ 

730 if skypix != self.element.universe.commonSkyPix: 730 ↛ 735line 730 didn't jump to line 735

731 # We guarantee elsewhere that we always materialize all overlaps 

732 # vs. commonSkyPix, but for everything else, we need to check that 

733 # we have materialized this combination of governor values and 

734 # skypix. 

735 summaryWhere = [ 

736 self._summaryTable.columns.skypix_system == skypix.system.name, 

737 self._summaryTable.columns.skypix_level == skypix.level, 

738 ] 

739 gvCol = self._summaryTable.columns[self._governor.element.name] 

740 if governorValues is not Ellipsis: 

741 summaryWhere.append(gvCol.in_(list(governorValues))) 

742 summaryQuery = sqlalchemy.sql.select( 

743 [gvCol] 

744 ).select_from( 

745 self._summaryTable 

746 ).where( 

747 sqlalchemy.sql.and_(*summaryWhere) 

748 ) 

749 materializedGovernorValues = {row[gvCol] for row in self._db.query(summaryQuery)} 

750 if governorValues is Ellipsis: 

751 missingGovernorValues = self._governor.values - materializedGovernorValues 

752 else: 

753 missingGovernorValues = governorValues - materializedGovernorValues 

754 if missingGovernorValues: 

755 raise RuntimeError( 

756 f"Query requires an overlap join between {skypix.name} and {self.element.name} " 

757 f"(for {self._governor.element.name} in {missingGovernorValues}), but these " 

758 f"have not been materialized." 

759 ) 

760 columns = [self._overlapTable.columns.skypix_index.label(skypix.name)] 

761 columns.extend(self._overlapTable.columns[name] for name in self.element.graph.required.names) 

762 overlapWhere = [ 

763 self._overlapTable.columns.skypix_system == skypix.system.name, 

764 self._overlapTable.columns.skypix_level == skypix.level, 

765 ] 

766 if governorValues is not Ellipsis: 766 ↛ 767line 766 didn't jump to line 767, because the condition on line 766 was never true

767 overlapWhere.append( 

768 self._overlapTable.columns[self._governor.element.name].in_(list(governorValues)) 

769 ) 

770 overlapQuery = sqlalchemy.sql.select( 

771 columns 

772 ).select_from( 

773 self._overlapTable 

774 ).where( 

775 sqlalchemy.sql.and_(*overlapWhere) 

776 ) 

777 return overlapQuery.alias(f"{self.element.name}_{skypix.name}_overlap") 

778 

779 def digestTables(self) -> Iterable[sqlalchemy.schema.Table]: 

780 """Return tables used for schema digest. 

781 

782 Returns 

783 ------- 

784 tables : `Iterable` [ `sqlalchemy.schema.Table` ] 

785 Possibly empty set of tables for schema digest calculations. 

786 """ 

787 return [self._summaryTable, self._overlapTable]