Coverage for python/lsst/daf/butler/registry/dimensions/table.py: 85%

228 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2022-12-01 19:54 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ["TableDimensionRecordStorage"] 

24 

25from collections import defaultdict 

26import itertools 

27import logging 

28from typing import ( 

29 AbstractSet, 

30 Any, 

31 Dict, 

32 Iterable, 

33 Iterator, 

34 List, 

35 Mapping, 

36 Optional, 

37 Sequence, 

38 Set, 

39 Union, 

40) 

41 

42import sqlalchemy 

43 

44from ...core import ( 

45 addDimensionForeignKey, 

46 DatabaseDimensionElement, 

47 DataCoordinate, 

48 DataCoordinateIterable, 

49 ddl, 

50 DimensionElement, 

51 DimensionRecord, 

52 GovernorDimension, 

53 NamedKeyDict, 

54 NamedKeyMapping, 

55 NamedValueSet, 

56 SimpleQuery, 

57 SkyPixDimension, 

58 SkyPixSystem, 

59 SpatialRegionDatabaseRepresentation, 

60 TimespanDatabaseRepresentation, 

61) 

62from ..interfaces import ( 

63 Database, 

64 DatabaseDimensionOverlapStorage, 

65 DatabaseDimensionRecordStorage, 

66 GovernorDimensionRecordStorage, 

67 StaticTablesContext, 

68) 

69from ..queries import QueryBuilder 

70from ..wildcards import Ellipsis, EllipsisType 

71 

72 

73_LOG = logging.getLogger(__name__) 

74 

75 

76MAX_FETCH_CHUNK = 1000 

77"""Maximum number of data IDs we fetch records at a time. 

78 

79Barring something database-engine-specific, this sets the size of the actual 

80SQL query, not just the number of result rows, because the only way to query 

81for multiple data IDs in a single SELECT query via SQLAlchemy is to have an OR 

82term in the WHERE clause for each one. 

83""" 

84 

85 

86class TableDimensionRecordStorage(DatabaseDimensionRecordStorage): 

87 """A record storage implementation uses a regular database table. 

88 

89 Parameters 

90 ---------- 

91 db : `Database` 

92 Interface to the database engine and namespace that will hold these 

93 dimension records. 

94 element : `DatabaseDimensionElement` 

95 The element whose records this storage will manage. 

96 table : `sqlalchemy.schema.Table` 

97 The logical table for the element. 

98 skyPixOverlap : `_SkyPixOverlapStorage`, optional 

99 Object that manages the tables that hold materialized spatial overlap 

100 joins to skypix dimensions. Should be `None` if (and only if) 

101 ``element.spatial is None``. 

102 """ 

103 def __init__(self, db: Database, element: DatabaseDimensionElement, *, table: sqlalchemy.schema.Table, 

104 skyPixOverlap: Optional[_SkyPixOverlapStorage] = None): 

105 self._db = db 

106 self._table = table 

107 self._element = element 

108 self._fetchColumns: Dict[str, sqlalchemy.sql.ColumnElement] = { 

109 dimension.name: self._table.columns[name] 

110 for dimension, name in zip(self._element.dimensions, 

111 self._element.RecordClass.fields.dimensions.names) 

112 } 

113 self._skyPixOverlap = skyPixOverlap 

114 self._otherOverlaps: List[DatabaseDimensionOverlapStorage] = [] 

115 

116 @classmethod 

117 def initialize( 

118 cls, 

119 db: Database, 

120 element: DatabaseDimensionElement, *, 

121 context: Optional[StaticTablesContext] = None, 

122 config: Mapping[str, Any], 

123 governors: NamedKeyMapping[GovernorDimension, GovernorDimensionRecordStorage], 

124 ) -> DatabaseDimensionRecordStorage: 

125 # Docstring inherited from DatabaseDimensionRecordStorage. 

126 spec = element.RecordClass.fields.makeTableSpec( 

127 RegionReprClass=db.getSpatialRegionRepresentation(), 

128 TimespanReprClass=db.getTimespanRepresentation(), 

129 ) 

130 if context is not None: 130 ↛ 133line 130 didn't jump to line 133, because the condition on line 130 was never false

131 table = context.addTable(element.name, spec) 

132 else: 

133 table = db.ensureTableExists(element.name, spec) 

134 skyPixOverlap: Optional[_SkyPixOverlapStorage] 

135 if element.spatial is not None: 

136 governor = governors[element.spatial.governor] 

137 skyPixOverlap = _SkyPixOverlapStorage.initialize( 

138 db, 

139 element, 

140 context=context, 

141 governor=governor, 

142 ) 

143 result = cls(db, element, table=table, skyPixOverlap=skyPixOverlap) 

144 

145 # Whenever anyone inserts a new governor dimension value, we want 

146 # to enable overlaps for that value between this element and 

147 # commonSkyPix. 

148 def callback(record: DimensionRecord) -> None: 

149 skyPixOverlap.enable( # type: ignore 

150 result, 

151 element.universe.commonSkyPix, 

152 getattr(record, element.spatial.governor.primaryKey.name), # type: ignore 

153 ) 

154 

155 governor.registerInsertionListener(callback) 

156 return result 

157 else: 

158 return cls(db, element, table=table) 

159 

160 @property 

161 def element(self) -> DatabaseDimensionElement: 

162 # Docstring inherited from DimensionRecordStorage.element. 

163 return self._element 

164 

165 def clearCaches(self) -> None: 

166 # Docstring inherited from DimensionRecordStorage.clearCaches. 

167 pass 

168 

169 def join( 

170 self, 

171 builder: QueryBuilder, *, 

172 regions: Optional[NamedKeyDict[DimensionElement, SpatialRegionDatabaseRepresentation]] = None, 

173 timespans: Optional[NamedKeyDict[DimensionElement, TimespanDatabaseRepresentation]] = None, 

174 ) -> None: 

175 # Docstring inherited from DimensionRecordStorage. 

176 if regions is not None: 

177 dimensions = NamedValueSet(self.element.required) 

178 dimensions.add(self.element.universe.commonSkyPix) 

179 assert self._skyPixOverlap is not None 

180 builder.joinTable( 

181 self._skyPixOverlap.select(self.element.universe.commonSkyPix, Ellipsis), 

182 dimensions, 

183 ) 

184 regionsInTable = self._db.getSpatialRegionRepresentation().fromSelectable(self._table) 

185 regions[self.element] = regionsInTable 

186 joinOn = builder.startJoin(self._table, self.element.dimensions, 

187 self.element.RecordClass.fields.dimensions.names) 

188 if timespans is not None: 

189 timespanInTable = self._db.getTimespanRepresentation().fromSelectable(self._table) 

190 for timespanInQuery in timespans.values(): 190 ↛ 191line 190 didn't jump to line 191, because the loop on line 190 never started

191 joinOn.append(timespanInQuery.overlaps(timespanInTable)) 

192 timespans[self.element] = timespanInTable 

193 builder.finishJoin(self._table, joinOn) 

194 return self._table 

195 

196 def fetch(self, dataIds: DataCoordinateIterable) -> Iterable[DimensionRecord]: 

197 # Docstring inherited from DimensionRecordStorage.fetch. 

198 RecordClass = self.element.RecordClass 

199 query = SimpleQuery() 

200 query.columns.extend(self._table.columns[name] for name in RecordClass.fields.standard.names) 

201 if self.element.spatial is not None: 

202 query.columns.append(self._table.columns["region"]) 

203 if self.element.temporal is not None: 

204 TimespanReprClass = self._db.getTimespanRepresentation() 

205 query.columns.extend(self._table.columns[name] for name in TimespanReprClass.getFieldNames()) 

206 query.join(self._table) 

207 dataIds.constrain(query, lambda name: self._fetchColumns[name]) 

208 for row in self._db.query(query.combine()): 

209 values = row._asdict() 

210 if self.element.temporal is not None: 

211 values[TimespanDatabaseRepresentation.NAME] = TimespanReprClass.extract(values) 

212 yield RecordClass(**values) 

213 

214 def insert(self, *records: DimensionRecord, replace: bool = False) -> None: 

215 # Docstring inherited from DimensionRecordStorage.insert. 

216 elementRows = [record.toDict() for record in records] 

217 if self.element.temporal is not None: 

218 TimespanReprClass = self._db.getTimespanRepresentation() 

219 for row in elementRows: 

220 timespan = row.pop(TimespanDatabaseRepresentation.NAME) 

221 TimespanReprClass.update(timespan, result=row) 

222 with self._db.transaction(): 

223 if replace: 

224 self._db.replace(self._table, *elementRows) 

225 else: 

226 self._db.insert(self._table, *elementRows) 

227 if self._skyPixOverlap is not None: 

228 self._skyPixOverlap.insert(records, replace=replace) 

229 

230 def sync(self, record: DimensionRecord, update: bool = False) -> Union[bool, Dict[str, Any]]: 

231 # Docstring inherited from DimensionRecordStorage.sync. 

232 compared = record.toDict() 

233 keys = {} 

234 for name in record.fields.required.names: 

235 keys[name] = compared.pop(name) 

236 if self.element.temporal is not None: 

237 TimespanReprClass = self._db.getTimespanRepresentation() 

238 timespan = compared.pop(TimespanDatabaseRepresentation.NAME) 

239 TimespanReprClass.update(timespan, result=compared) 

240 with self._db.transaction(): 

241 _, inserted_or_updated = self._db.sync( 

242 self._table, 

243 keys=keys, 

244 compared=compared, 

245 update=update, 

246 ) 

247 if inserted_or_updated and self._skyPixOverlap is not None: 

248 if inserted_or_updated is True: 

249 # Inserted a new row, so we just need to insert new overlap 

250 # rows. 

251 self._skyPixOverlap.insert([record]) 

252 elif "region" in inserted_or_updated: # type: ignore 252 ↛ 259line 252 didn't jump to line 259, because the condition on line 252 was never false

253 # Updated the region, so we need to delete old overlap rows 

254 # and insert new ones. 

255 # (mypy should be able to tell that inserted_or_updated 

256 # must be a dict if we get to this clause, but it can't) 

257 self._skyPixOverlap.insert([record], replace=True) 

258 # We updated something other than a region. 

259 return inserted_or_updated 

260 

261 def digestTables(self) -> Iterable[sqlalchemy.schema.Table]: 

262 # Docstring inherited from DimensionRecordStorage.digestTables. 

263 result = [self._table] 

264 if self._skyPixOverlap is not None: 

265 result.extend(self._skyPixOverlap.digestTables()) 

266 return result 

267 

268 def connect(self, overlaps: DatabaseDimensionOverlapStorage) -> None: 

269 # Docstring inherited from DatabaseDimensionRecordStorage. 

270 self._otherOverlaps.append(overlaps) 

271 

272 

273class _SkyPixOverlapStorage: 

274 """A helper object for `TableDimensionRecordStorage` that manages its 

275 materialized overlaps with skypix dimensions. 

276 

277 New instances should be constructed by calling `initialize`, not by calling 

278 the constructor directly. 

279 

280 Parameters 

281 ---------- 

282 db : `Database` 

283 Interface to the underlying database engine and namespace. 

284 element : `DatabaseDimensionElement` 

285 Dimension element whose overlaps are to be managed. 

286 summaryTable : `sqlalchemy.schema.Table` 

287 Table that records which combinations of skypix dimensions and 

288 governor dimension values have materialized overlap rows. 

289 overlapTable : `sqlalchemy.schema.Table` 

290 Table containing the actual materialized overlap rows. 

291 governor : `GovernorDimensionRecordStorage` 

292 Record storage backend for this element's governor dimension. 

293 

294 Notes 

295 ----- 

296 This class (and most importantly, the tables it relies on) can in principle 

297 manage overlaps between with any skypix dimension, but at present it is 

298 only being used to manage relationships with the special ``commonSkyPix`` 

299 dimension, because that's all the query system uses. Eventually, we expect 

300 to require users to explicitly materialize all relationships they will 

301 want to use in queries. 

302 

303 Other possible future improvements include: 

304 

305 - allowing finer-grained skypix dimensions to provide overlap rows for 

306 coarser ones, by dividing indices by powers of 4 (and possibly doing 

307 ``SELECT DISTINCT`` in the subquery to remove duplicates); 

308 

309 - allowing finer-grained database elements (e.g. patch) to provide overlap 

310 rows for coarser ones (e.g. tract), by ignoring irrelevant columns 

311 (e.g. the patch IDs) in the subquery (again, possible with 

312 ``SELECT DISTINCT``). 

313 

314 But there's no point to doing any of that until the query system can 

315 figure out how best to ask for overlap rows when an exact match isn't 

316 available. 

317 """ 

318 def __init__( 

319 self, 

320 db: Database, 

321 element: DatabaseDimensionElement, 

322 summaryTable: sqlalchemy.schema.Table, 

323 overlapTable: sqlalchemy.schema.Table, 

324 governor: GovernorDimensionRecordStorage, 

325 ): 

326 self._db = db 

327 self.element = element 

328 assert element.spatial is not None 

329 self._summaryTable = summaryTable 

330 self._overlapTable = overlapTable 

331 self._governor = governor 

332 

333 @classmethod 

334 def initialize( 

335 cls, 

336 db: Database, 

337 element: DatabaseDimensionElement, *, 

338 context: Optional[StaticTablesContext], 

339 governor: GovernorDimensionRecordStorage, 

340 ) -> _SkyPixOverlapStorage: 

341 """Construct a new instance, creating tables as needed. 

342 

343 Parameters 

344 ---------- 

345 db : `Database` 

346 Interface to the underlying database engine and namespace. 

347 element : `DatabaseDimensionElement` 

348 Dimension element whose overlaps are to be managed. 

349 context : `StaticTablesContext`, optional 

350 If provided, an object to use to create any new tables. If not 

351 provided, ``db.ensureTableExists`` should be used instead. 

352 governor : `GovernorDimensionRecordStorage` 

353 Record storage backend for this element's governor dimension. 

354 """ 

355 if context is not None: 355 ↛ 358line 355 didn't jump to line 358, because the condition on line 355 was never false

356 op = context.addTable 

357 else: 

358 op = db.ensureTableExists 

359 summaryTable = op( 

360 cls._SUMMARY_TABLE_NAME_SPEC.format(element=element), 

361 cls._makeSummaryTableSpec(element), 

362 ) 

363 overlapTable = op( 

364 cls._OVERLAP_TABLE_NAME_SPEC.format(element=element), 

365 cls._makeOverlapTableSpec(element), 

366 ) 

367 return _SkyPixOverlapStorage(db, element, summaryTable=summaryTable, overlapTable=overlapTable, 

368 governor=governor) 

369 

370 _SUMMARY_TABLE_NAME_SPEC = "{element.name}_skypix_overlap_summary" 

371 

372 @classmethod 

373 def _makeSummaryTableSpec(cls, element: DatabaseDimensionElement) -> ddl.TableSpec: 

374 """Create a specification for the table that records which combinations 

375 of skypix dimension and governor value have materialized overlaps. 

376 

377 Parameters 

378 ---------- 

379 element : `DatabaseDimensionElement` 

380 Dimension element whose overlaps are to be managed. 

381 

382 Returns 

383 ------- 

384 tableSpec : `ddl.TableSpec` 

385 Table specification. 

386 """ 

387 assert element.spatial is not None 

388 tableSpec = ddl.TableSpec( 

389 fields=[ 

390 ddl.FieldSpec( 

391 name="skypix_system", 

392 dtype=sqlalchemy.String, 

393 length=16, 

394 nullable=False, 

395 primaryKey=True, 

396 ), 

397 ddl.FieldSpec( 

398 name="skypix_level", 

399 dtype=sqlalchemy.SmallInteger, 

400 nullable=False, 

401 primaryKey=True, 

402 ), 

403 ] 

404 ) 

405 addDimensionForeignKey(tableSpec, element.spatial.governor, primaryKey=True) 

406 return tableSpec 

407 

408 _OVERLAP_TABLE_NAME_SPEC = "{element.name}_skypix_overlap" 

409 

410 @classmethod 

411 def _makeOverlapTableSpec(cls, element: DatabaseDimensionElement) -> ddl.TableSpec: 

412 """Create a specification for the table that holds materialized 

413 overlap rows. 

414 

415 Parameters 

416 ---------- 

417 element : `DatabaseDimensionElement` 

418 Dimension element whose overlaps are to be managed. 

419 

420 Returns 

421 ------- 

422 tableSpec : `ddl.TableSpec` 

423 Table specification. 

424 """ 

425 assert element.spatial is not None 

426 tableSpec = ddl.TableSpec( 

427 fields=[ 

428 ddl.FieldSpec( 

429 name="skypix_system", 

430 dtype=sqlalchemy.String, 

431 length=16, 

432 nullable=False, 

433 primaryKey=True, 

434 ), 

435 ddl.FieldSpec( 

436 name="skypix_level", 

437 dtype=sqlalchemy.SmallInteger, 

438 nullable=False, 

439 primaryKey=True, 

440 ), 

441 # (more columns added below) 

442 ], 

443 unique=set(), 

444 indexes={ 

445 # This index has the same fields as the PK, in a different 

446 # order, to facilitate queries that know skypix_index and want 

447 # to find the other element. 

448 ("skypix_system", "skypix_level", "skypix_index",) + tuple(element.graph.required.names), 

449 }, 

450 foreignKeys=[ 

451 # Foreign key to summary table. This makes sure we don't 

452 # materialize any overlaps without remembering that we've done 

453 # so in the summary table, though it can't prevent the converse 

454 # of adding a summary row without adding overlap row (either of 

455 # those is a logic bug, of course, but we want to be defensive 

456 # about those). Using ON DELETE CASCADE, it'd be very easy to 

457 # implement "disabling" an overlap materialization, because we 

458 # can just delete the summary row. 

459 # Note that the governor dimension column is added below, in 

460 # the call to addDimensionForeignKey. 

461 ddl.ForeignKeySpec( 

462 cls._SUMMARY_TABLE_NAME_SPEC.format(element=element), 

463 source=("skypix_system", "skypix_level", element.spatial.governor.name), 

464 target=("skypix_system", "skypix_level", element.spatial.governor.name), 

465 onDelete="CASCADE", 

466 ), 

467 ], 

468 ) 

469 # Add fields for the standard element this class manages overlaps for. 

470 # This is guaranteed to add a column for the governor dimension, 

471 # because that's a required dependency of element. 

472 for dimension in element.required: 

473 addDimensionForeignKey(tableSpec, dimension, primaryKey=True) 

474 # Add field for the actual skypix index. We do this later because I 

475 # think we care (at least a bit) about the order in which the primary 

476 # key is defined, in that we want a non-summary column like this one 

477 # to appear after the governor dimension column. 

478 tableSpec.fields.add( 

479 ddl.FieldSpec( 

480 name="skypix_index", 

481 dtype=sqlalchemy.BigInteger, 

482 nullable=False, 

483 primaryKey=True, 

484 ) 

485 ) 

486 return tableSpec 

487 

488 def enable( 

489 self, 

490 storage: TableDimensionRecordStorage, 

491 skypix: SkyPixDimension, 

492 governorValue: str, 

493 ) -> None: 

494 """Enable materialization of overlaps between a skypix dimension 

495 and the records of ``self.element`` with a particular governor value. 

496 

497 Parameters 

498 ---------- 

499 storage : `TableDimensionRecordStorage` 

500 Storage object for the records of ``self.element``. 

501 skypix : `SkyPixDimension` 

502 The skypix dimension (system and level) for which overlaps should 

503 be materialized. 

504 governorValue : `str` 

505 Value of this element's governor dimension for which overlaps 

506 should be materialized. For example, if ``self.element`` is 

507 ``visit``, this is an instrument name; if ``self.element`` is 

508 ``patch``, this is a skymap name. 

509 

510 Notes 

511 ----- 

512 If there are existing rows for the given ``governorValue``, overlap 

513 rows for them will be immediately computed and inserted. At present, 

514 that never happens, because we only enable overlaps with 

515 `DimensionUniverse.commonSkyPix`, and that happens immediately after 

516 each governor row is inserted (and there can't be any patch rows, 

517 for example, until after the corresponding skymap row is inserted). 

518 

519 After calling `enable` for a particular combination, any new records 

520 for ``self.element`` that are inserted will automatically be 

521 accompanied by overlap records (via calls to `insert` made 

522 by `TableDimensionRecordStorage` methods). 

523 """ 

524 # Because we're essentially materializing a view in Python, we 

525 # aggressively lock all tables we're reading and writing in order to be 

526 # sure nothing gets out of sync. This may not be the most efficient 

527 # approach possible, but we'll focus on correct before we focus on 

528 # fast, and enabling a new overlap combination should be a very rare 

529 # operation anyway, and never one we do in parallel. 

530 with self._db.transaction(lock=[self._governor.table, storage._table, 

531 self._summaryTable, self._overlapTable]): 

532 result, inserted = self._db.sync( 

533 self._summaryTable, 

534 keys={ 

535 "skypix_system": skypix.system.name, 

536 "skypix_level": skypix.level, 

537 self._governor.element.name: governorValue, 

538 }, 

539 ) 

540 if inserted: 540 ↛ 550line 540 didn't jump to line 550, because the condition on line 540 was never false

541 _LOG.debug( 

542 "Precomputing initial overlaps for %s vs %s for %s=%s", 

543 skypix.name, 

544 self.element.name, 

545 self._governor.element.name, 

546 governorValue 

547 ) 

548 self._fill(storage=storage, skypix=skypix, governorValue=governorValue) 

549 else: 

550 _LOG.debug( 

551 "Overlaps already precomputed for %s vs %s for %s=%s", 

552 skypix.name, 

553 self.element.name, 

554 self._governor.element.name, 

555 governorValue 

556 ) 

557 

558 def _fill( 

559 self, *, 

560 storage: TableDimensionRecordStorage, 

561 skypix: SkyPixDimension, 

562 governorValue: str, 

563 ) -> None: 

564 """Insert overlap records for a newly-enabled combination of skypix 

565 dimension and governor value. 

566 

567 This method should only be called by `enable`. 

568 

569 Parameters 

570 ---------- 

571 storage : `TableDimensionRecordStorage` 

572 Storage object for the records of ``self.element``. 

573 skypix : `SkyPixDimension` 

574 The skypix dimension (system and level) for which overlaps should 

575 be materialized. 

576 governorValue : `str` 

577 Value of this element's governor dimension for which overlaps 

578 should be materialized. For example, if ``self.element`` is 

579 ``visit``, this is an instrument name; if ``self.element`` is 

580 ``patch``, this is a skymap name. 

581 """ 

582 overlapRecords: List[dict] = [] 

583 # `DimensionRecordStorage.fetch` as defined by the ABC expects to be 

584 # given iterables of data IDs that correspond to that element's graph 

585 # (e.g. {instrument, visit, detector}), not just some subset of it 

586 # (e.g. {instrument}). But we know the implementation of `fetch` for 

587 # `TableDimensionRecordStorage will use this iterable to do exactly 

588 # what we want. 

589 governorDataId = DataCoordinate.standardize({self._governor.element.name: governorValue}, 

590 graph=self._governor.element.graph) 

591 for record in storage.fetch(DataCoordinateIterable.fromScalar(governorDataId)): 591 ↛ 592line 591 didn't jump to line 592, because the loop on line 591 never started

592 if record.region is None: 

593 continue 

594 baseOverlapRecord = record.dataId.byName() 

595 baseOverlapRecord["skypix_system"] = skypix.system.name 

596 baseOverlapRecord["skypix_level"] = skypix.level 

597 for begin, end in skypix.pixelization.envelope(record.region): 

598 overlapRecords.extend( 

599 dict(baseOverlapRecord, skypix_index=index) for index in range(begin, end) 

600 ) 

601 _LOG.debug( 

602 "Inserting %d initial overlap rows for %s vs %s for %s=%r", 

603 len(overlapRecords), 

604 skypix.name, 

605 self.element.name, 

606 self._governor.element.name, 

607 governorValue, 

608 ) 

609 self._db.insert(self._overlapTable, *overlapRecords) 

610 

611 def insert(self, records: Sequence[DimensionRecord], replace: bool = False) -> None: 

612 """Insert overlaps for a sequence of ``self.element`` records that 

613 have just been inserted. 

614 

615 This must be called by any method that inserts records for that 

616 element (i.e. `TableDimensionRecordStorage.insert` and 

617 `TableDimensionRecordStorage.sync`), within the same transaction. 

618 

619 Parameters 

620 ---------- 

621 records : `Sequence` [ `DimensionRecord` ] 

622 Records for ``self.element``. Records with `None` regions are 

623 ignored. 

624 replace : `bool`, optional 

625 If `True` (`False` is default) one or more of the given records may 

626 already exist and is being updated, so we need to delete any 

627 existing overlap records first. 

628 """ 

629 # Group records by family.governor value. 

630 grouped: Dict[str, List[DimensionRecord]] = defaultdict(list) 

631 for record in records: 

632 grouped[getattr(record, self._governor.element.name)].append(record) 

633 _LOG.debug( 

634 "Precomputing new skypix overlaps for %s where %s in %s.", 

635 self.element.name, self._governor.element.name, grouped.keys() 

636 ) 

637 # Make sure the set of combinations to materialize does not change 

638 # while we are materializing the ones we have, by locking the summary 

639 # table. Because we aren't planning to write to the summary table, 

640 # this could just be a SHARED lock instead of an EXCLUSIVE one, but 

641 # there's no API for that right now. 

642 with self._db.transaction(lock=[self._summaryTable]): 

643 # Query for the skypix dimensions to be associated with each 

644 # governor value. 

645 gvCol = self._summaryTable.columns[self._governor.element.name] 

646 sysCol = self._summaryTable.columns.skypix_system 

647 lvlCol = self._summaryTable.columns.skypix_level 

648 query = sqlalchemy.sql.select( 

649 gvCol, sysCol, lvlCol, 

650 ).select_from( 

651 self._summaryTable 

652 ).where( 

653 gvCol.in_(list(grouped.keys())) 

654 ) 

655 # Group results by governor value, then skypix system. 

656 skypix: Dict[str, NamedKeyDict[SkyPixSystem, List[int]]] = { 

657 gv: NamedKeyDict() for gv in grouped.keys() 

658 } 

659 for summaryRow in self._db.query(query).mappings(): 

660 system = self.element.universe.skypix[summaryRow[sysCol]] 

661 skypix[summaryRow[gvCol]].setdefault(system, []).append(summaryRow[lvlCol]) 

662 if replace: 

663 # Construct constraints for a DELETE query as a list of dicts. 

664 # We include the skypix_system and skypix_level column values 

665 # explicitly instead of just letting the query search for all 

666 # of those related to the given records, because they are the 

667 # first columns in the primary key, and hence searching with 

668 # them will be way faster (and we don't want to add a new index 

669 # just for this operation). 

670 to_delete: List[Dict[str, Any]] = [] 

671 for gv, skypix_systems in skypix.items(): 

672 for system, skypix_levels in skypix_systems.items(): 

673 to_delete.extend( 

674 {"skypix_system": system.name, "skypix_level": level, **record.dataId.byName()} 

675 for record, level in itertools.product(grouped[gv], skypix_levels) 

676 ) 

677 self._db.delete( 

678 self._overlapTable, 

679 ["skypix_system", "skypix_level"] + list(self.element.graph.required.names), 

680 *to_delete, 

681 ) 

682 overlapRecords: List[dict] = [] 

683 # Compute overlaps for one governor value at a time, but gather 

684 # them all up for one insert. 

685 for gv, group in grouped.items(): 

686 overlapRecords.extend(self._compute(group, skypix[gv], gv)) 

687 _LOG.debug( 

688 "Inserting %d new skypix overlap rows for %s where %s in %s.", 

689 len(overlapRecords), self.element.name, self._governor.element.name, grouped.keys() 

690 ) 

691 self._db.insert(self._overlapTable, *overlapRecords) 

692 

693 def _compute( 

694 self, 

695 records: Sequence[DimensionRecord], 

696 skypix: NamedKeyDict[SkyPixSystem, List[int]], 

697 governorValue: str, 

698 ) -> Iterator[dict]: 

699 """Compute all overlap rows for a particular governor dimension value 

700 and all of the skypix dimensions for which its overlaps are enabled. 

701 

702 This method should only be called by `insert`. 

703 

704 Parameters 

705 ---------- 

706 records : `Sequence` [ `DimensionRecord` ] 

707 Records for ``self.element``. Records with `None` regions are 

708 ignored. All must have the governor value given. 

709 skypix : `NamedKeyDict` [ `SkyPixSystem`, `list` [ `int` ] ] 

710 Mapping containing all skypix systems and levels for which overlaps 

711 should be computed, grouped by `SkyPixSystem`. 

712 governorValue : `str` 

713 Value of this element's governor dimension for which overlaps 

714 should be computed. For example, if ``self.element`` is ``visit``, 

715 this is an instrument name; if ``self.element`` is ``patch``, this 

716 is a skymap name. 

717 

718 Yields 

719 ------ 

720 row : `dict` 

721 Dictionary representing an overlap row. 

722 """ 

723 # Process input records one at time, computing all skypix indices for 

724 # each. 

725 for record in records: 

726 if record.region is None: 

727 continue 

728 assert getattr(record, self._governor.element.name) == governorValue 

729 for system, levels in skypix.items(): 

730 if not levels: 730 ↛ 731line 730 didn't jump to line 731, because the condition on line 730 was never true

731 continue 

732 baseOverlapRecord = record.dataId.byName() 

733 baseOverlapRecord["skypix_system"] = system.name 

734 levels.sort(reverse=True) 

735 # Start with the first level, which is the finest-grained one. 

736 # Compute skypix envelope indices directly for that. 

737 indices: Dict[int, Set[int]] = {levels[0]: set()} 

738 for begin, end in system[levels[0]].pixelization.envelope(record.region): 

739 indices[levels[0]].update(range(begin, end)) 

740 # Divide those indices by powers of 4 (and remove duplicates) 

741 # work our way up to the last (coarsest) level. 

742 for lastLevel, nextLevel in zip(levels[:-1], levels[1:]): 742 ↛ 743line 742 didn't jump to line 743, because the loop on line 742 never started

743 factor = 4**(lastLevel - nextLevel) 

744 indices[nextLevel] = {index//factor for index in indices[lastLevel]} 

745 for level in levels: 

746 yield from ( 

747 { 

748 "skypix_level": level, 

749 "skypix_index": index, 

750 **baseOverlapRecord, # type: ignore 

751 } for index in indices[level] 

752 ) 

753 

754 def select( 

755 self, 

756 skypix: SkyPixDimension, 

757 governorValues: Union[AbstractSet[str], EllipsisType], 

758 ) -> sqlalchemy.sql.FromClause: 

759 """Construct a subquery expression containing overlaps between the 

760 given skypix dimension and governor values. 

761 

762 Parameters 

763 ---------- 

764 skypix : `SkyPixDimension` 

765 The skypix dimension (system and level) for which overlaps should 

766 be materialized. 

767 governorValues : `str` 

768 Values of this element's governor dimension for which overlaps 

769 should be returned. For example, if ``self.element`` is ``visit``, 

770 this is a set of instrument names; if ``self.element`` is 

771 ``patch``, this is a set of skymap names. If ``...`` all values 

772 in the database are used (`GovernorDimensionRecordStorage.values`). 

773 

774 Returns 

775 ------- 

776 subquery : `sqlalchemy.sql.FromClause` 

777 A SELECT query with an alias, intended for use as a subquery, with 

778 columns equal to ``self.element.required.names`` + ``skypix.name``. 

779 """ 

780 if skypix != self.element.universe.commonSkyPix: 780 ↛ 785line 780 didn't jump to line 785

781 # We guarantee elsewhere that we always materialize all overlaps 

782 # vs. commonSkyPix, but for everything else, we need to check that 

783 # we have materialized this combination of governor values and 

784 # skypix. 

785 summaryWhere = [ 

786 self._summaryTable.columns.skypix_system == skypix.system.name, 

787 self._summaryTable.columns.skypix_level == skypix.level, 

788 ] 

789 gvCol = self._summaryTable.columns[self._governor.element.name] 

790 if governorValues is not Ellipsis: 

791 summaryWhere.append(gvCol.in_(list(governorValues))) 

792 summaryQuery = sqlalchemy.sql.select( 

793 gvCol 

794 ).select_from( 

795 self._summaryTable 

796 ).where( 

797 sqlalchemy.sql.and_(*summaryWhere) 

798 ) 

799 materializedGovernorValues = {row._mapping[gvCol] for row in self._db.query(summaryQuery)} 

800 if governorValues is Ellipsis: 

801 missingGovernorValues = self._governor.values - materializedGovernorValues 

802 else: 

803 missingGovernorValues = governorValues - materializedGovernorValues 

804 if missingGovernorValues: 

805 raise RuntimeError( 

806 f"Query requires an overlap join between {skypix.name} and {self.element.name} " 

807 f"(for {self._governor.element.name} in {missingGovernorValues}), but these " 

808 f"have not been materialized." 

809 ) 

810 columns = [self._overlapTable.columns.skypix_index.label(skypix.name)] 

811 columns.extend(self._overlapTable.columns[name] for name in self.element.graph.required.names) 

812 overlapWhere = [ 

813 self._overlapTable.columns.skypix_system == skypix.system.name, 

814 self._overlapTable.columns.skypix_level == skypix.level, 

815 ] 

816 if governorValues is not Ellipsis: 816 ↛ 817line 816 didn't jump to line 817, because the condition on line 816 was never true

817 overlapWhere.append( 

818 self._overlapTable.columns[self._governor.element.name].in_(list(governorValues)) 

819 ) 

820 overlapQuery = sqlalchemy.sql.select( 

821 *columns 

822 ).select_from( 

823 self._overlapTable 

824 ).where( 

825 sqlalchemy.sql.and_(*overlapWhere) 

826 ) 

827 return overlapQuery.alias(f"{self.element.name}_{skypix.name}_overlap") 

828 

829 def digestTables(self) -> Iterable[sqlalchemy.schema.Table]: 

830 """Return tables used for schema digest. 

831 

832 Returns 

833 ------- 

834 tables : `Iterable` [ `sqlalchemy.schema.Table` ] 

835 Possibly empty set of tables for schema digest calculations. 

836 """ 

837 return [self._summaryTable, self._overlapTable]