Coverage for python/lsst/daf/butler/registry/dimensions/table.py: 86%

236 statements  

« prev     ^ index     » next       coverage.py v7.2.5, created at 2023-05-02 18:18 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ["TableDimensionRecordStorage"] 

24 

25import itertools 

26import logging 

27import warnings 

28from collections import defaultdict 

29from typing import AbstractSet, Any, Dict, Iterable, Iterator, List, Mapping, Optional, Sequence, Set, Union 

30 

31import sqlalchemy 

32 

33from ...core import ( 

34 DatabaseDimensionElement, 

35 DataCoordinate, 

36 DataCoordinateIterable, 

37 DimensionElement, 

38 DimensionRecord, 

39 GovernorDimension, 

40 NamedKeyDict, 

41 NamedKeyMapping, 

42 NamedValueSet, 

43 SimpleQuery, 

44 SkyPixDimension, 

45 SkyPixSystem, 

46 SpatialRegionDatabaseRepresentation, 

47 TimespanDatabaseRepresentation, 

48 addDimensionForeignKey, 

49 ddl, 

50) 

51from ..interfaces import ( 

52 Database, 

53 DatabaseDimensionOverlapStorage, 

54 DatabaseDimensionRecordStorage, 

55 GovernorDimensionRecordStorage, 

56 StaticTablesContext, 

57) 

58from ..queries import QueryBuilder 

59from ..wildcards import Ellipsis, EllipsisType 

60 

61_LOG = logging.getLogger(__name__) 

62 

63 

64MAX_FETCH_CHUNK = 1000 

65"""Maximum number of data IDs we fetch records at a time. 

66 

67Barring something database-engine-specific, this sets the size of the actual 

68SQL query, not just the number of result rows, because the only way to query 

69for multiple data IDs in a single SELECT query via SQLAlchemy is to have an OR 

70term in the WHERE clause for each one. 

71""" 

72 

73 

74class TableDimensionRecordStorage(DatabaseDimensionRecordStorage): 

75 """A record storage implementation uses a regular database table. 

76 

77 Parameters 

78 ---------- 

79 db : `Database` 

80 Interface to the database engine and namespace that will hold these 

81 dimension records. 

82 element : `DatabaseDimensionElement` 

83 The element whose records this storage will manage. 

84 table : `sqlalchemy.schema.Table` 

85 The logical table for the element. 

86 skyPixOverlap : `_SkyPixOverlapStorage`, optional 

87 Object that manages the tables that hold materialized spatial overlap 

88 joins to skypix dimensions. Should be `None` if (and only if) 

89 ``element.spatial is None``. 

90 """ 

91 

92 def __init__( 

93 self, 

94 db: Database, 

95 element: DatabaseDimensionElement, 

96 *, 

97 table: sqlalchemy.schema.Table, 

98 skyPixOverlap: Optional[_SkyPixOverlapStorage] = None, 

99 ): 

100 self._db = db 

101 self._table = table 

102 self._element = element 

103 self._fetchColumns: Dict[str, sqlalchemy.sql.ColumnElement] = { 

104 dimension.name: self._table.columns[name] 

105 for dimension, name in zip( 

106 self._element.dimensions, self._element.RecordClass.fields.dimensions.names 

107 ) 

108 } 

109 self._skyPixOverlap = skyPixOverlap 

110 self._otherOverlaps: List[DatabaseDimensionOverlapStorage] = [] 

111 

112 @classmethod 

113 def initialize( 

114 cls, 

115 db: Database, 

116 element: DatabaseDimensionElement, 

117 *, 

118 context: Optional[StaticTablesContext] = None, 

119 config: Mapping[str, Any], 

120 governors: NamedKeyMapping[GovernorDimension, GovernorDimensionRecordStorage], 

121 ) -> DatabaseDimensionRecordStorage: 

122 # Docstring inherited from DatabaseDimensionRecordStorage. 

123 spec = element.RecordClass.fields.makeTableSpec( 

124 RegionReprClass=db.getSpatialRegionRepresentation(), 

125 TimespanReprClass=db.getTimespanRepresentation(), 

126 ) 

127 if context is not None: 127 ↛ 130line 127 didn't jump to line 130, because the condition on line 127 was never false

128 table = context.addTable(element.name, spec) 

129 else: 

130 table = db.ensureTableExists(element.name, spec) 

131 skyPixOverlap: Optional[_SkyPixOverlapStorage] 

132 if element.spatial is not None: 

133 governor = governors[element.spatial.governor] 

134 skyPixOverlap = _SkyPixOverlapStorage.initialize( 

135 db, 

136 element, 

137 context=context, 

138 governor=governor, 

139 ) 

140 result = cls(db, element, table=table, skyPixOverlap=skyPixOverlap) 

141 

142 # Whenever anyone inserts a new governor dimension value, we want 

143 # to enable overlaps for that value between this element and 

144 # commonSkyPix. 

145 def callback(record: DimensionRecord) -> None: 

146 skyPixOverlap.enable( # type: ignore 

147 result, 

148 element.universe.commonSkyPix, 

149 getattr(record, element.spatial.governor.primaryKey.name), # type: ignore 

150 ) 

151 

152 governor.registerInsertionListener(callback) 

153 return result 

154 else: 

155 return cls(db, element, table=table) 

156 

157 @property 

158 def element(self) -> DatabaseDimensionElement: 

159 # Docstring inherited from DimensionRecordStorage.element. 

160 return self._element 

161 

162 def clearCaches(self) -> None: 

163 # Docstring inherited from DimensionRecordStorage.clearCaches. 

164 pass 

165 

166 def join( 

167 self, 

168 builder: QueryBuilder, 

169 *, 

170 regions: Optional[NamedKeyDict[DimensionElement, SpatialRegionDatabaseRepresentation]] = None, 

171 timespans: Optional[NamedKeyDict[DimensionElement, TimespanDatabaseRepresentation]] = None, 

172 ) -> None: 

173 # Docstring inherited from DimensionRecordStorage. 

174 if regions is not None: 

175 dimensions = NamedValueSet(self.element.required) 

176 dimensions.add(self.element.universe.commonSkyPix) 

177 assert self._skyPixOverlap is not None 

178 builder.joinTable( 

179 self._skyPixOverlap.select(self.element.universe.commonSkyPix, Ellipsis), 

180 dimensions, 

181 ) 

182 regionsInTable = self._db.getSpatialRegionRepresentation().fromSelectable(self._table) 

183 regions[self.element] = regionsInTable 

184 joinOn = builder.startJoin( 

185 self._table, self.element.dimensions, self.element.RecordClass.fields.dimensions.names 

186 ) 

187 if timespans is not None: 

188 timespanInTable = self._db.getTimespanRepresentation().fromSelectable(self._table) 

189 for timespanInQuery in timespans.values(): 189 ↛ 190line 189 didn't jump to line 190, because the loop on line 189 never started

190 joinOn.append(timespanInQuery.overlaps(timespanInTable)) 

191 timespans[self.element] = timespanInTable 

192 builder.finishJoin(self._table, joinOn) 

193 return self._table 

194 

195 def fetch(self, dataIds: DataCoordinateIterable) -> Iterable[DimensionRecord]: 

196 # Docstring inherited from DimensionRecordStorage.fetch. 

197 RecordClass = self.element.RecordClass 

198 query = SimpleQuery() 

199 query.columns.extend(self._table.columns[name] for name in RecordClass.fields.standard.names) 

200 if self.element.spatial is not None: 

201 query.columns.append(self._table.columns["region"]) 

202 if self.element.temporal is not None: 

203 TimespanReprClass = self._db.getTimespanRepresentation() 

204 query.columns.extend(self._table.columns[name] for name in TimespanReprClass.getFieldNames()) 

205 query.join(self._table) 

206 dataIds.constrain(query, lambda name: self._fetchColumns[name]) 

207 with warnings.catch_warnings(): 

208 # Some of our generated queries may contain cartesian joins, this 

209 # is not a serious issue as it is properly constrained, so we want 

210 # to suppress sqlalchemy warnings. 

211 warnings.filterwarnings( 

212 "ignore", 

213 message="SELECT statement has a cartesian product", 

214 category=sqlalchemy.exc.SAWarning, 

215 ) 

216 with self._db.query(query.combine()) as sql_result: 

217 for row in sql_result.fetchall(): 

218 values = row._asdict() 

219 if self.element.temporal is not None: 

220 values[TimespanDatabaseRepresentation.NAME] = TimespanReprClass.extract(values) 

221 yield RecordClass(**values) 

222 

223 def insert(self, *records: DimensionRecord, replace: bool = False, skip_existing: bool = False) -> None: 

224 # Docstring inherited from DimensionRecordStorage.insert. 

225 elementRows = [record.toDict() for record in records] 

226 if self.element.temporal is not None: 

227 TimespanReprClass = self._db.getTimespanRepresentation() 

228 for row in elementRows: 

229 timespan = row.pop(TimespanDatabaseRepresentation.NAME) 

230 TimespanReprClass.update(timespan, result=row) 

231 with self._db.transaction(): 

232 if replace: 

233 self._db.replace(self._table, *elementRows) 

234 elif skip_existing: 

235 self._db.ensure(self._table, *elementRows, primary_key_only=True) 

236 else: 

237 self._db.insert(self._table, *elementRows) 

238 if self._skyPixOverlap is not None: 

239 self._skyPixOverlap.insert(records, replace=replace) 

240 

241 def sync(self, record: DimensionRecord, update: bool = False) -> Union[bool, Dict[str, Any]]: 

242 # Docstring inherited from DimensionRecordStorage.sync. 

243 compared = record.toDict() 

244 keys = {} 

245 for name in record.fields.required.names: 

246 keys[name] = compared.pop(name) 

247 if self.element.temporal is not None: 

248 TimespanReprClass = self._db.getTimespanRepresentation() 

249 timespan = compared.pop(TimespanDatabaseRepresentation.NAME) 

250 TimespanReprClass.update(timespan, result=compared) 

251 with self._db.transaction(): 

252 _, inserted_or_updated = self._db.sync( 

253 self._table, 

254 keys=keys, 

255 compared=compared, 

256 update=update, 

257 ) 

258 if inserted_or_updated and self._skyPixOverlap is not None: 

259 if inserted_or_updated is True: 

260 # Inserted a new row, so we just need to insert new overlap 

261 # rows. 

262 self._skyPixOverlap.insert([record]) 

263 elif "region" in inserted_or_updated: 263 ↛ 251line 263 didn't jump to line 251

264 # Updated the region, so we need to delete old overlap rows 

265 # and insert new ones. 

266 # (mypy should be able to tell that inserted_or_updated 

267 # must be a dict if we get to this clause, but it can't) 

268 self._skyPixOverlap.insert([record], replace=True) 

269 # We updated something other than a region. 

270 return inserted_or_updated 

271 

272 def digestTables(self) -> Iterable[sqlalchemy.schema.Table]: 

273 # Docstring inherited from DimensionRecordStorage.digestTables. 

274 result = [self._table] 

275 if self._skyPixOverlap is not None: 

276 result.extend(self._skyPixOverlap.digestTables()) 

277 return result 

278 

279 def connect(self, overlaps: DatabaseDimensionOverlapStorage) -> None: 

280 # Docstring inherited from DatabaseDimensionRecordStorage. 

281 self._otherOverlaps.append(overlaps) 

282 

283 

284class _SkyPixOverlapStorage: 

285 """A helper object for `TableDimensionRecordStorage` that manages its 

286 materialized overlaps with skypix dimensions. 

287 

288 New instances should be constructed by calling `initialize`, not by calling 

289 the constructor directly. 

290 

291 Parameters 

292 ---------- 

293 db : `Database` 

294 Interface to the underlying database engine and namespace. 

295 element : `DatabaseDimensionElement` 

296 Dimension element whose overlaps are to be managed. 

297 summaryTable : `sqlalchemy.schema.Table` 

298 Table that records which combinations of skypix dimensions and 

299 governor dimension values have materialized overlap rows. 

300 overlapTable : `sqlalchemy.schema.Table` 

301 Table containing the actual materialized overlap rows. 

302 governor : `GovernorDimensionRecordStorage` 

303 Record storage backend for this element's governor dimension. 

304 

305 Notes 

306 ----- 

307 This class (and most importantly, the tables it relies on) can in principle 

308 manage overlaps between with any skypix dimension, but at present it is 

309 only being used to manage relationships with the special ``commonSkyPix`` 

310 dimension, because that's all the query system uses. Eventually, we expect 

311 to require users to explicitly materialize all relationships they will 

312 want to use in queries. 

313 

314 Other possible future improvements include: 

315 

316 - allowing finer-grained skypix dimensions to provide overlap rows for 

317 coarser ones, by dividing indices by powers of 4 (and possibly doing 

318 ``SELECT DISTINCT`` in the subquery to remove duplicates); 

319 

320 - allowing finer-grained database elements (e.g. patch) to provide overlap 

321 rows for coarser ones (e.g. tract), by ignoring irrelevant columns 

322 (e.g. the patch IDs) in the subquery (again, possible with 

323 ``SELECT DISTINCT``). 

324 

325 But there's no point to doing any of that until the query system can 

326 figure out how best to ask for overlap rows when an exact match isn't 

327 available. 

328 """ 

329 

330 def __init__( 

331 self, 

332 db: Database, 

333 element: DatabaseDimensionElement, 

334 summaryTable: sqlalchemy.schema.Table, 

335 overlapTable: sqlalchemy.schema.Table, 

336 governor: GovernorDimensionRecordStorage, 

337 ): 

338 self._db = db 

339 self.element = element 

340 assert element.spatial is not None 

341 self._summaryTable = summaryTable 

342 self._overlapTable = overlapTable 

343 self._governor = governor 

344 

345 @classmethod 

346 def initialize( 

347 cls, 

348 db: Database, 

349 element: DatabaseDimensionElement, 

350 *, 

351 context: Optional[StaticTablesContext], 

352 governor: GovernorDimensionRecordStorage, 

353 ) -> _SkyPixOverlapStorage: 

354 """Construct a new instance, creating tables as needed. 

355 

356 Parameters 

357 ---------- 

358 db : `Database` 

359 Interface to the underlying database engine and namespace. 

360 element : `DatabaseDimensionElement` 

361 Dimension element whose overlaps are to be managed. 

362 context : `StaticTablesContext`, optional 

363 If provided, an object to use to create any new tables. If not 

364 provided, ``db.ensureTableExists`` should be used instead. 

365 governor : `GovernorDimensionRecordStorage` 

366 Record storage backend for this element's governor dimension. 

367 """ 

368 if context is not None: 368 ↛ 371line 368 didn't jump to line 371, because the condition on line 368 was never false

369 op = context.addTable 

370 else: 

371 op = db.ensureTableExists 

372 summaryTable = op( 

373 cls._SUMMARY_TABLE_NAME_SPEC.format(element=element), 

374 cls._makeSummaryTableSpec(element), 

375 ) 

376 overlapTable = op( 

377 cls._OVERLAP_TABLE_NAME_SPEC.format(element=element), 

378 cls._makeOverlapTableSpec(element), 

379 ) 

380 return _SkyPixOverlapStorage( 

381 db, element, summaryTable=summaryTable, overlapTable=overlapTable, governor=governor 

382 ) 

383 

384 _SUMMARY_TABLE_NAME_SPEC = "{element.name}_skypix_overlap_summary" 

385 

386 @classmethod 

387 def _makeSummaryTableSpec(cls, element: DatabaseDimensionElement) -> ddl.TableSpec: 

388 """Create a specification for the table that records which combinations 

389 of skypix dimension and governor value have materialized overlaps. 

390 

391 Parameters 

392 ---------- 

393 element : `DatabaseDimensionElement` 

394 Dimension element whose overlaps are to be managed. 

395 

396 Returns 

397 ------- 

398 tableSpec : `ddl.TableSpec` 

399 Table specification. 

400 """ 

401 assert element.spatial is not None 

402 tableSpec = ddl.TableSpec( 

403 fields=[ 

404 ddl.FieldSpec( 

405 name="skypix_system", 

406 dtype=sqlalchemy.String, 

407 length=16, 

408 nullable=False, 

409 primaryKey=True, 

410 ), 

411 ddl.FieldSpec( 

412 name="skypix_level", 

413 dtype=sqlalchemy.SmallInteger, 

414 nullable=False, 

415 primaryKey=True, 

416 ), 

417 ] 

418 ) 

419 addDimensionForeignKey(tableSpec, element.spatial.governor, primaryKey=True) 

420 return tableSpec 

421 

422 _OVERLAP_TABLE_NAME_SPEC = "{element.name}_skypix_overlap" 

423 

424 @classmethod 

425 def _makeOverlapTableSpec(cls, element: DatabaseDimensionElement) -> ddl.TableSpec: 

426 """Create a specification for the table that holds materialized 

427 overlap rows. 

428 

429 Parameters 

430 ---------- 

431 element : `DatabaseDimensionElement` 

432 Dimension element whose overlaps are to be managed. 

433 

434 Returns 

435 ------- 

436 tableSpec : `ddl.TableSpec` 

437 Table specification. 

438 """ 

439 assert element.spatial is not None 

440 tableSpec = ddl.TableSpec( 

441 fields=[ 

442 ddl.FieldSpec( 

443 name="skypix_system", 

444 dtype=sqlalchemy.String, 

445 length=16, 

446 nullable=False, 

447 primaryKey=True, 

448 ), 

449 ddl.FieldSpec( 

450 name="skypix_level", 

451 dtype=sqlalchemy.SmallInteger, 

452 nullable=False, 

453 primaryKey=True, 

454 ), 

455 # (more columns added below) 

456 ], 

457 unique=set(), 

458 indexes={ 

459 # This index has the same fields as the PK, in a different 

460 # order, to facilitate queries that know skypix_index and want 

461 # to find the other element. 

462 ( 

463 "skypix_system", 

464 "skypix_level", 

465 "skypix_index", 

466 ) 

467 + tuple(element.graph.required.names), 

468 }, 

469 foreignKeys=[ 

470 # Foreign key to summary table. This makes sure we don't 

471 # materialize any overlaps without remembering that we've done 

472 # so in the summary table, though it can't prevent the converse 

473 # of adding a summary row without adding overlap row (either of 

474 # those is a logic bug, of course, but we want to be defensive 

475 # about those). Using ON DELETE CASCADE, it'd be very easy to 

476 # implement "disabling" an overlap materialization, because we 

477 # can just delete the summary row. 

478 # Note that the governor dimension column is added below, in 

479 # the call to addDimensionForeignKey. 

480 ddl.ForeignKeySpec( 

481 cls._SUMMARY_TABLE_NAME_SPEC.format(element=element), 

482 source=("skypix_system", "skypix_level", element.spatial.governor.name), 

483 target=("skypix_system", "skypix_level", element.spatial.governor.name), 

484 onDelete="CASCADE", 

485 ), 

486 ], 

487 ) 

488 # Add fields for the standard element this class manages overlaps for. 

489 # This is guaranteed to add a column for the governor dimension, 

490 # because that's a required dependency of element. 

491 for dimension in element.required: 

492 addDimensionForeignKey(tableSpec, dimension, primaryKey=True) 

493 # Add field for the actual skypix index. We do this later because I 

494 # think we care (at least a bit) about the order in which the primary 

495 # key is defined, in that we want a non-summary column like this one 

496 # to appear after the governor dimension column. 

497 tableSpec.fields.add( 

498 ddl.FieldSpec( 

499 name="skypix_index", 

500 dtype=sqlalchemy.BigInteger, 

501 nullable=False, 

502 primaryKey=True, 

503 ) 

504 ) 

505 return tableSpec 

506 

507 def enable( 

508 self, 

509 storage: TableDimensionRecordStorage, 

510 skypix: SkyPixDimension, 

511 governorValue: str, 

512 ) -> None: 

513 """Enable materialization of overlaps between a skypix dimension 

514 and the records of ``self.element`` with a particular governor value. 

515 

516 Parameters 

517 ---------- 

518 storage : `TableDimensionRecordStorage` 

519 Storage object for the records of ``self.element``. 

520 skypix : `SkyPixDimension` 

521 The skypix dimension (system and level) for which overlaps should 

522 be materialized. 

523 governorValue : `str` 

524 Value of this element's governor dimension for which overlaps 

525 should be materialized. For example, if ``self.element`` is 

526 ``visit``, this is an instrument name; if ``self.element`` is 

527 ``patch``, this is a skymap name. 

528 

529 Notes 

530 ----- 

531 If there are existing rows for the given ``governorValue``, overlap 

532 rows for them will be immediately computed and inserted. At present, 

533 that never happens, because we only enable overlaps with 

534 `DimensionUniverse.commonSkyPix`, and that happens immediately after 

535 each governor row is inserted (and there can't be any patch rows, 

536 for example, until after the corresponding skymap row is inserted). 

537 

538 After calling `enable` for a particular combination, any new records 

539 for ``self.element`` that are inserted will automatically be 

540 accompanied by overlap records (via calls to `insert` made 

541 by `TableDimensionRecordStorage` methods). 

542 """ 

543 # Because we're essentially materializing a view in Python, we 

544 # aggressively lock all tables we're reading and writing in order to be 

545 # sure nothing gets out of sync. This may not be the most efficient 

546 # approach possible, but we'll focus on correct before we focus on 

547 # fast, and enabling a new overlap combination should be a very rare 

548 # operation anyway, and never one we do in parallel. 

549 with self._db.transaction( 

550 lock=[self._governor.table, storage._table, self._summaryTable, self._overlapTable] 

551 ): 

552 result, inserted = self._db.sync( 

553 self._summaryTable, 

554 keys={ 

555 "skypix_system": skypix.system.name, 

556 "skypix_level": skypix.level, 

557 self._governor.element.name: governorValue, 

558 }, 

559 ) 

560 if inserted: 

561 _LOG.debug( 

562 "Precomputing initial overlaps for %s vs %s for %s=%s", 

563 skypix.name, 

564 self.element.name, 

565 self._governor.element.name, 

566 governorValue, 

567 ) 

568 self._fill(storage=storage, skypix=skypix, governorValue=governorValue) 

569 else: 

570 _LOG.debug( 

571 "Overlaps already precomputed for %s vs %s for %s=%s", 

572 skypix.name, 

573 self.element.name, 

574 self._governor.element.name, 

575 governorValue, 

576 ) 

577 

578 def _fill( 

579 self, 

580 *, 

581 storage: TableDimensionRecordStorage, 

582 skypix: SkyPixDimension, 

583 governorValue: str, 

584 ) -> None: 

585 """Insert overlap records for a newly-enabled combination of skypix 

586 dimension and governor value. 

587 

588 This method should only be called by `enable`. 

589 

590 Parameters 

591 ---------- 

592 storage : `TableDimensionRecordStorage` 

593 Storage object for the records of ``self.element``. 

594 skypix : `SkyPixDimension` 

595 The skypix dimension (system and level) for which overlaps should 

596 be materialized. 

597 governorValue : `str` 

598 Value of this element's governor dimension for which overlaps 

599 should be materialized. For example, if ``self.element`` is 

600 ``visit``, this is an instrument name; if ``self.element`` is 

601 ``patch``, this is a skymap name. 

602 """ 

603 overlapRecords: List[dict] = [] 

604 # `DimensionRecordStorage.fetch` as defined by the ABC expects to be 

605 # given iterables of data IDs that correspond to that element's graph 

606 # (e.g. {instrument, visit, detector}), not just some subset of it 

607 # (e.g. {instrument}). But we know the implementation of `fetch` for 

608 # `TableDimensionRecordStorage will use this iterable to do exactly 

609 # what we want. 

610 governorDataId = DataCoordinate.standardize( 

611 {self._governor.element.name: governorValue}, graph=self._governor.element.graph 

612 ) 

613 for record in storage.fetch(DataCoordinateIterable.fromScalar(governorDataId)): 613 ↛ 614line 613 didn't jump to line 614, because the loop on line 613 never started

614 if record.region is None: 

615 continue 

616 baseOverlapRecord = record.dataId.byName() 

617 baseOverlapRecord["skypix_system"] = skypix.system.name 

618 baseOverlapRecord["skypix_level"] = skypix.level 

619 for begin, end in skypix.pixelization.envelope(record.region): 

620 overlapRecords.extend( 

621 dict(baseOverlapRecord, skypix_index=index) for index in range(begin, end) 

622 ) 

623 _LOG.debug( 

624 "Inserting %d initial overlap rows for %s vs %s for %s=%r", 

625 len(overlapRecords), 

626 skypix.name, 

627 self.element.name, 

628 self._governor.element.name, 

629 governorValue, 

630 ) 

631 self._db.insert(self._overlapTable, *overlapRecords) 

632 

633 def insert(self, records: Sequence[DimensionRecord], replace: bool = False) -> None: 

634 """Insert overlaps for a sequence of ``self.element`` records that 

635 have just been inserted. 

636 

637 This must be called by any method that inserts records for that 

638 element (i.e. `TableDimensionRecordStorage.insert` and 

639 `TableDimensionRecordStorage.sync`), within the same transaction. 

640 

641 Parameters 

642 ---------- 

643 records : `Sequence` [ `DimensionRecord` ] 

644 Records for ``self.element``. Records with `None` regions are 

645 ignored. 

646 replace : `bool`, optional 

647 If `True` (`False` is default) one or more of the given records may 

648 already exist and is being updated, so we need to delete any 

649 existing overlap records first. 

650 """ 

651 # Group records by family.governor value. 

652 grouped: Dict[str, List[DimensionRecord]] = defaultdict(list) 

653 for record in records: 

654 grouped[getattr(record, self._governor.element.name)].append(record) 

655 _LOG.debug( 

656 "Precomputing new skypix overlaps for %s where %s in %s.", 

657 self.element.name, 

658 self._governor.element.name, 

659 grouped.keys(), 

660 ) 

661 # Make sure the set of combinations to materialize does not change 

662 # while we are materializing the ones we have, by locking the summary 

663 # table. Because we aren't planning to write to the summary table, 

664 # this could just be a SHARED lock instead of an EXCLUSIVE one, but 

665 # there's no API for that right now. 

666 with self._db.transaction(lock=[self._summaryTable]): 

667 # Query for the skypix dimensions to be associated with each 

668 # governor value. 

669 gvCol = self._summaryTable.columns[self._governor.element.name] 

670 sysCol = self._summaryTable.columns.skypix_system 

671 lvlCol = self._summaryTable.columns.skypix_level 

672 query = ( 

673 sqlalchemy.sql.select( 

674 gvCol, 

675 sysCol, 

676 lvlCol, 

677 ) 

678 .select_from(self._summaryTable) 

679 .where(gvCol.in_(list(grouped.keys()))) 

680 ) 

681 # Group results by governor value, then skypix system. 

682 skypix: Dict[str, NamedKeyDict[SkyPixSystem, List[int]]] = { 

683 gv: NamedKeyDict() for gv in grouped.keys() 

684 } 

685 with self._db.query(query) as sql_result: 

686 for summaryRow in sql_result.mappings(): 

687 system = self.element.universe.skypix[summaryRow[sysCol]] 

688 skypix[summaryRow[gvCol]].setdefault(system, []).append(summaryRow[lvlCol]) 

689 if replace: 

690 # Construct constraints for a DELETE query as a list of dicts. 

691 # We include the skypix_system and skypix_level column values 

692 # explicitly instead of just letting the query search for all 

693 # of those related to the given records, because they are the 

694 # first columns in the primary key, and hence searching with 

695 # them will be way faster (and we don't want to add a new index 

696 # just for this operation). 

697 to_delete: List[Dict[str, Any]] = [] 

698 for gv, skypix_systems in skypix.items(): 

699 for system, skypix_levels in skypix_systems.items(): 

700 to_delete.extend( 

701 {"skypix_system": system.name, "skypix_level": level, **record.dataId.byName()} 

702 for record, level in itertools.product(grouped[gv], skypix_levels) 

703 ) 

704 self._db.delete( 

705 self._overlapTable, 

706 ["skypix_system", "skypix_level"] + list(self.element.graph.required.names), 

707 *to_delete, 

708 ) 

709 overlapRecords: List[dict] = [] 

710 # Compute overlaps for one governor value at a time, but gather 

711 # them all up for one insert. 

712 for gv, group in grouped.items(): 

713 overlapRecords.extend(self._compute(group, skypix[gv], gv)) 

714 _LOG.debug( 

715 "Inserting %d new skypix overlap rows for %s where %s in %s.", 

716 len(overlapRecords), 

717 self.element.name, 

718 self._governor.element.name, 

719 grouped.keys(), 

720 ) 

721 self._db.insert(self._overlapTable, *overlapRecords) 

722 

723 def _compute( 

724 self, 

725 records: Sequence[DimensionRecord], 

726 skypix: NamedKeyDict[SkyPixSystem, List[int]], 

727 governorValue: str, 

728 ) -> Iterator[dict]: 

729 """Compute all overlap rows for a particular governor dimension value 

730 and all of the skypix dimensions for which its overlaps are enabled. 

731 

732 This method should only be called by `insert`. 

733 

734 Parameters 

735 ---------- 

736 records : `Sequence` [ `DimensionRecord` ] 

737 Records for ``self.element``. Records with `None` regions are 

738 ignored. All must have the governor value given. 

739 skypix : `NamedKeyDict` [ `SkyPixSystem`, `list` [ `int` ] ] 

740 Mapping containing all skypix systems and levels for which overlaps 

741 should be computed, grouped by `SkyPixSystem`. 

742 governorValue : `str` 

743 Value of this element's governor dimension for which overlaps 

744 should be computed. For example, if ``self.element`` is ``visit``, 

745 this is an instrument name; if ``self.element`` is ``patch``, this 

746 is a skymap name. 

747 

748 Yields 

749 ------ 

750 row : `dict` 

751 Dictionary representing an overlap row. 

752 """ 

753 # Process input records one at time, computing all skypix indices for 

754 # each. 

755 for record in records: 

756 if record.region is None: 

757 continue 

758 assert getattr(record, self._governor.element.name) == governorValue 

759 for system, levels in skypix.items(): 

760 if not levels: 760 ↛ 761line 760 didn't jump to line 761, because the condition on line 760 was never true

761 continue 

762 baseOverlapRecord = record.dataId.byName() 

763 baseOverlapRecord["skypix_system"] = system.name 

764 levels.sort(reverse=True) 

765 # Start with the first level, which is the finest-grained one. 

766 # Compute skypix envelope indices directly for that. 

767 indices: Dict[int, Set[int]] = {levels[0]: set()} 

768 for begin, end in system[levels[0]].pixelization.envelope(record.region): 

769 indices[levels[0]].update(range(begin, end)) 

770 # Divide those indices by powers of 4 (and remove duplicates) 

771 # work our way up to the last (coarsest) level. 

772 for lastLevel, nextLevel in zip(levels[:-1], levels[1:]): 772 ↛ 773line 772 didn't jump to line 773, because the loop on line 772 never started

773 factor = 4 ** (lastLevel - nextLevel) 

774 indices[nextLevel] = {index // factor for index in indices[lastLevel]} 

775 for level in levels: 

776 yield from ( 

777 { 

778 "skypix_level": level, 

779 "skypix_index": index, 

780 **baseOverlapRecord, # type: ignore 

781 } 

782 for index in indices[level] 

783 ) 

784 

785 def select( 

786 self, 

787 skypix: SkyPixDimension, 

788 governorValues: Union[AbstractSet[str], EllipsisType], 

789 ) -> sqlalchemy.sql.FromClause: 

790 """Construct a subquery expression containing overlaps between the 

791 given skypix dimension and governor values. 

792 

793 Parameters 

794 ---------- 

795 skypix : `SkyPixDimension` 

796 The skypix dimension (system and level) for which overlaps should 

797 be materialized. 

798 governorValues : `str` 

799 Values of this element's governor dimension for which overlaps 

800 should be returned. For example, if ``self.element`` is ``visit``, 

801 this is a set of instrument names; if ``self.element`` is 

802 ``patch``, this is a set of skymap names. If ``...`` all values 

803 in the database are used (`GovernorDimensionRecordStorage.values`). 

804 

805 Returns 

806 ------- 

807 subquery : `sqlalchemy.sql.FromClause` 

808 A SELECT query with an alias, intended for use as a subquery, with 

809 columns equal to ``self.element.required.names`` + ``skypix.name``. 

810 """ 

811 if skypix != self.element.universe.commonSkyPix: 811 ↛ 816line 811 didn't jump to line 816

812 # We guarantee elsewhere that we always materialize all overlaps 

813 # vs. commonSkyPix, but for everything else, we need to check that 

814 # we have materialized this combination of governor values and 

815 # skypix. 

816 summaryWhere = [ 

817 self._summaryTable.columns.skypix_system == skypix.system.name, 

818 self._summaryTable.columns.skypix_level == skypix.level, 

819 ] 

820 gvCol = self._summaryTable.columns[self._governor.element.name] 

821 if governorValues is not Ellipsis: 

822 summaryWhere.append(gvCol.in_(list(governorValues))) 

823 summaryQuery = ( 

824 sqlalchemy.sql.select(gvCol) 

825 .select_from(self._summaryTable) 

826 .where(sqlalchemy.sql.and_(*summaryWhere)) 

827 ) 

828 materializedGovernorValues = {row._mapping[gvCol] for row in self._db.query(summaryQuery)} 

829 if governorValues is Ellipsis: 

830 missingGovernorValues = self._governor.values - materializedGovernorValues 

831 else: 

832 missingGovernorValues = governorValues - materializedGovernorValues 

833 if missingGovernorValues: 

834 raise RuntimeError( 

835 f"Query requires an overlap join between {skypix.name} and {self.element.name} " 

836 f"(for {self._governor.element.name} in {missingGovernorValues}), but these " 

837 f"have not been materialized." 

838 ) 

839 columns = [self._overlapTable.columns.skypix_index.label(skypix.name)] 

840 columns.extend(self._overlapTable.columns[name] for name in self.element.graph.required.names) 

841 overlapWhere = [ 

842 self._overlapTable.columns.skypix_system == skypix.system.name, 

843 self._overlapTable.columns.skypix_level == skypix.level, 

844 ] 

845 if governorValues is not Ellipsis: 845 ↛ 846line 845 didn't jump to line 846, because the condition on line 845 was never true

846 overlapWhere.append( 

847 self._overlapTable.columns[self._governor.element.name].in_(list(governorValues)) 

848 ) 

849 overlapQuery = ( 

850 sqlalchemy.sql.select(*columns) 

851 .select_from(self._overlapTable) 

852 .where(sqlalchemy.sql.and_(*overlapWhere)) 

853 ) 

854 return overlapQuery.alias(f"{self.element.name}_{skypix.name}_overlap") 

855 

856 def digestTables(self) -> Iterable[sqlalchemy.schema.Table]: 

857 """Return tables used for schema digest. 

858 

859 Returns 

860 ------- 

861 tables : `Iterable` [ `sqlalchemy.schema.Table` ] 

862 Possibly empty set of tables for schema digest calculations. 

863 """ 

864 return [self._summaryTable, self._overlapTable]