Coverage for python/lsst/daf/butler/registry/dimensions/table.py: 86%

236 statements  

« prev     ^ index     » next       coverage.py v6.4.2, created at 2022-07-15 02:34 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ["TableDimensionRecordStorage"] 

24 

25import itertools 

26import logging 

27import warnings 

28from collections import defaultdict 

29from typing import AbstractSet, Any, Dict, Iterable, Iterator, List, Mapping, Optional, Sequence, Set, Union 

30 

31import sqlalchemy 

32 

33from ...core import ( 

34 DatabaseDimensionElement, 

35 DataCoordinate, 

36 DataCoordinateIterable, 

37 DimensionElement, 

38 DimensionRecord, 

39 GovernorDimension, 

40 NamedKeyDict, 

41 NamedKeyMapping, 

42 NamedValueSet, 

43 SimpleQuery, 

44 SkyPixDimension, 

45 SkyPixSystem, 

46 SpatialRegionDatabaseRepresentation, 

47 TimespanDatabaseRepresentation, 

48 addDimensionForeignKey, 

49 ddl, 

50) 

51from ..interfaces import ( 

52 Database, 

53 DatabaseDimensionOverlapStorage, 

54 DatabaseDimensionRecordStorage, 

55 GovernorDimensionRecordStorage, 

56 StaticTablesContext, 

57) 

58from ..queries import QueryBuilder 

59from ..wildcards import Ellipsis, EllipsisType 

60 

61_LOG = logging.getLogger(__name__) 

62 

63 

64MAX_FETCH_CHUNK = 1000 

65"""Maximum number of data IDs we fetch records at a time. 

66 

67Barring something database-engine-specific, this sets the size of the actual 

68SQL query, not just the number of result rows, because the only way to query 

69for multiple data IDs in a single SELECT query via SQLAlchemy is to have an OR 

70term in the WHERE clause for each one. 

71""" 

72 

73 

74class TableDimensionRecordStorage(DatabaseDimensionRecordStorage): 

75 """A record storage implementation uses a regular database table. 

76 

77 Parameters 

78 ---------- 

79 db : `Database` 

80 Interface to the database engine and namespace that will hold these 

81 dimension records. 

82 element : `DatabaseDimensionElement` 

83 The element whose records this storage will manage. 

84 table : `sqlalchemy.schema.Table` 

85 The logical table for the element. 

86 skyPixOverlap : `_SkyPixOverlapStorage`, optional 

87 Object that manages the tables that hold materialized spatial overlap 

88 joins to skypix dimensions. Should be `None` if (and only if) 

89 ``element.spatial is None``. 

90 """ 

91 

92 def __init__( 

93 self, 

94 db: Database, 

95 element: DatabaseDimensionElement, 

96 *, 

97 table: sqlalchemy.schema.Table, 

98 skyPixOverlap: Optional[_SkyPixOverlapStorage] = None, 

99 ): 

100 self._db = db 

101 self._table = table 

102 self._element = element 

103 self._fetchColumns: Dict[str, sqlalchemy.sql.ColumnElement] = { 

104 dimension.name: self._table.columns[name] 

105 for dimension, name in zip( 

106 self._element.dimensions, self._element.RecordClass.fields.dimensions.names 

107 ) 

108 } 

109 self._skyPixOverlap = skyPixOverlap 

110 self._otherOverlaps: List[DatabaseDimensionOverlapStorage] = [] 

111 

112 @classmethod 

113 def initialize( 

114 cls, 

115 db: Database, 

116 element: DatabaseDimensionElement, 

117 *, 

118 context: Optional[StaticTablesContext] = None, 

119 config: Mapping[str, Any], 

120 governors: NamedKeyMapping[GovernorDimension, GovernorDimensionRecordStorage], 

121 ) -> DatabaseDimensionRecordStorage: 

122 # Docstring inherited from DatabaseDimensionRecordStorage. 

123 spec = element.RecordClass.fields.makeTableSpec( 

124 RegionReprClass=db.getSpatialRegionRepresentation(), 

125 TimespanReprClass=db.getTimespanRepresentation(), 

126 ) 

127 if context is not None: 127 ↛ 130line 127 didn't jump to line 130, because the condition on line 127 was never false

128 table = context.addTable(element.name, spec) 

129 else: 

130 table = db.ensureTableExists(element.name, spec) 

131 skyPixOverlap: Optional[_SkyPixOverlapStorage] 

132 if element.spatial is not None: 

133 governor = governors[element.spatial.governor] 

134 skyPixOverlap = _SkyPixOverlapStorage.initialize( 

135 db, 

136 element, 

137 context=context, 

138 governor=governor, 

139 ) 

140 result = cls(db, element, table=table, skyPixOverlap=skyPixOverlap) 

141 

142 # Whenever anyone inserts a new governor dimension value, we want 

143 # to enable overlaps for that value between this element and 

144 # commonSkyPix. 

145 def callback(record: DimensionRecord) -> None: 

146 skyPixOverlap.enable( # type: ignore 

147 result, 

148 element.universe.commonSkyPix, 

149 getattr(record, element.spatial.governor.primaryKey.name), # type: ignore 

150 ) 

151 

152 governor.registerInsertionListener(callback) 

153 return result 

154 else: 

155 return cls(db, element, table=table) 

156 

157 @property 

158 def element(self) -> DatabaseDimensionElement: 

159 # Docstring inherited from DimensionRecordStorage.element. 

160 return self._element 

161 

162 def clearCaches(self) -> None: 

163 # Docstring inherited from DimensionRecordStorage.clearCaches. 

164 pass 

165 

166 def join( 

167 self, 

168 builder: QueryBuilder, 

169 *, 

170 regions: Optional[NamedKeyDict[DimensionElement, SpatialRegionDatabaseRepresentation]] = None, 

171 timespans: Optional[NamedKeyDict[DimensionElement, TimespanDatabaseRepresentation]] = None, 

172 ) -> None: 

173 # Docstring inherited from DimensionRecordStorage. 

174 if regions is not None: 

175 dimensions = NamedValueSet(self.element.required) 

176 dimensions.add(self.element.universe.commonSkyPix) 

177 assert self._skyPixOverlap is not None 

178 builder.joinTable( 

179 self._skyPixOverlap.select(self.element.universe.commonSkyPix, Ellipsis), 

180 dimensions, 

181 ) 

182 regionsInTable = self._db.getSpatialRegionRepresentation().fromSelectable(self._table) 

183 regions[self.element] = regionsInTable 

184 joinOn = builder.startJoin( 

185 self._table, self.element.dimensions, self.element.RecordClass.fields.dimensions.names 

186 ) 

187 if timespans is not None: 

188 timespanInTable = self._db.getTimespanRepresentation().fromSelectable(self._table) 

189 for timespanInQuery in timespans.values(): 189 ↛ 190line 189 didn't jump to line 190, because the loop on line 189 never started

190 joinOn.append(timespanInQuery.overlaps(timespanInTable)) 

191 timespans[self.element] = timespanInTable 

192 builder.finishJoin(self._table, joinOn) 

193 return self._table 

194 

195 def fetch(self, dataIds: DataCoordinateIterable) -> Iterable[DimensionRecord]: 

196 # Docstring inherited from DimensionRecordStorage.fetch. 

197 RecordClass = self.element.RecordClass 

198 query = SimpleQuery() 

199 query.columns.extend(self._table.columns[name] for name in RecordClass.fields.standard.names) 

200 if self.element.spatial is not None: 

201 query.columns.append(self._table.columns["region"]) 

202 if self.element.temporal is not None: 

203 TimespanReprClass = self._db.getTimespanRepresentation() 

204 query.columns.extend(self._table.columns[name] for name in TimespanReprClass.getFieldNames()) 

205 query.join(self._table) 

206 dataIds.constrain(query, lambda name: self._fetchColumns[name]) 

207 with warnings.catch_warnings(): 

208 # Some of our generated queries may contain cartesian joins, this 

209 # is not a serious issue as it is properly constrained, so we want 

210 # to suppress sqlalchemy warnings. 

211 warnings.filterwarnings( 

212 "ignore", 

213 message="SELECT statement has a cartesian product", 

214 category=sqlalchemy.exc.SAWarning, 

215 ) 

216 for row in self._db.query(query.combine()): 

217 values = row._asdict() 

218 if self.element.temporal is not None: 

219 values[TimespanDatabaseRepresentation.NAME] = TimespanReprClass.extract(values) 

220 yield RecordClass(**values) 

221 

222 def insert(self, *records: DimensionRecord, replace: bool = False, skip_existing: bool = False) -> None: 

223 # Docstring inherited from DimensionRecordStorage.insert. 

224 elementRows = [record.toDict() for record in records] 

225 if self.element.temporal is not None: 

226 TimespanReprClass = self._db.getTimespanRepresentation() 

227 for row in elementRows: 

228 timespan = row.pop(TimespanDatabaseRepresentation.NAME) 

229 TimespanReprClass.update(timespan, result=row) 

230 with self._db.transaction(): 

231 if replace: 

232 self._db.replace(self._table, *elementRows) 

233 elif skip_existing: 

234 self._db.ensure(self._table, *elementRows, primary_key_only=True) 

235 else: 

236 self._db.insert(self._table, *elementRows) 

237 if self._skyPixOverlap is not None: 

238 self._skyPixOverlap.insert(records, replace=replace, skip_existing=skip_existing) 

239 

240 def sync(self, record: DimensionRecord, update: bool = False) -> Union[bool, Dict[str, Any]]: 

241 # Docstring inherited from DimensionRecordStorage.sync. 

242 compared = record.toDict() 

243 keys = {} 

244 for name in record.fields.required.names: 

245 keys[name] = compared.pop(name) 

246 if self.element.temporal is not None: 

247 TimespanReprClass = self._db.getTimespanRepresentation() 

248 timespan = compared.pop(TimespanDatabaseRepresentation.NAME) 

249 TimespanReprClass.update(timespan, result=compared) 

250 with self._db.transaction(): 

251 _, inserted_or_updated = self._db.sync( 

252 self._table, 

253 keys=keys, 

254 compared=compared, 

255 update=update, 

256 ) 

257 if inserted_or_updated and self._skyPixOverlap is not None: 

258 if inserted_or_updated is True: 

259 # Inserted a new row, so we just need to insert new overlap 

260 # rows. 

261 self._skyPixOverlap.insert([record]) 

262 elif "region" in inserted_or_updated: 262 ↛ 250line 262 didn't jump to line 250

263 # Updated the region, so we need to delete old overlap rows 

264 # and insert new ones. 

265 # (mypy should be able to tell that inserted_or_updated 

266 # must be a dict if we get to this clause, but it can't) 

267 self._skyPixOverlap.insert([record], replace=True) 

268 # We updated something other than a region. 

269 return inserted_or_updated 

270 

271 def digestTables(self) -> Iterable[sqlalchemy.schema.Table]: 

272 # Docstring inherited from DimensionRecordStorage.digestTables. 

273 result = [self._table] 

274 if self._skyPixOverlap is not None: 

275 result.extend(self._skyPixOverlap.digestTables()) 

276 return result 

277 

278 def connect(self, overlaps: DatabaseDimensionOverlapStorage) -> None: 

279 # Docstring inherited from DatabaseDimensionRecordStorage. 

280 self._otherOverlaps.append(overlaps) 

281 

282 

283class _SkyPixOverlapStorage: 

284 """A helper object for `TableDimensionRecordStorage` that manages its 

285 materialized overlaps with skypix dimensions. 

286 

287 New instances should be constructed by calling `initialize`, not by calling 

288 the constructor directly. 

289 

290 Parameters 

291 ---------- 

292 db : `Database` 

293 Interface to the underlying database engine and namespace. 

294 element : `DatabaseDimensionElement` 

295 Dimension element whose overlaps are to be managed. 

296 summaryTable : `sqlalchemy.schema.Table` 

297 Table that records which combinations of skypix dimensions and 

298 governor dimension values have materialized overlap rows. 

299 overlapTable : `sqlalchemy.schema.Table` 

300 Table containing the actual materialized overlap rows. 

301 governor : `GovernorDimensionRecordStorage` 

302 Record storage backend for this element's governor dimension. 

303 

304 Notes 

305 ----- 

306 This class (and most importantly, the tables it relies on) can in principle 

307 manage overlaps between with any skypix dimension, but at present it is 

308 only being used to manage relationships with the special ``commonSkyPix`` 

309 dimension, because that's all the query system uses. Eventually, we expect 

310 to require users to explicitly materialize all relationships they will 

311 want to use in queries. 

312 

313 Other possible future improvements include: 

314 

315 - allowing finer-grained skypix dimensions to provide overlap rows for 

316 coarser ones, by dividing indices by powers of 4 (and possibly doing 

317 ``SELECT DISTINCT`` in the subquery to remove duplicates); 

318 

319 - allowing finer-grained database elements (e.g. patch) to provide overlap 

320 rows for coarser ones (e.g. tract), by ignoring irrelevant columns 

321 (e.g. the patch IDs) in the subquery (again, possible with 

322 ``SELECT DISTINCT``). 

323 

324 But there's no point to doing any of that until the query system can 

325 figure out how best to ask for overlap rows when an exact match isn't 

326 available. 

327 """ 

328 

329 def __init__( 

330 self, 

331 db: Database, 

332 element: DatabaseDimensionElement, 

333 summaryTable: sqlalchemy.schema.Table, 

334 overlapTable: sqlalchemy.schema.Table, 

335 governor: GovernorDimensionRecordStorage, 

336 ): 

337 self._db = db 

338 self.element = element 

339 assert element.spatial is not None 

340 self._summaryTable = summaryTable 

341 self._overlapTable = overlapTable 

342 self._governor = governor 

343 

344 @classmethod 

345 def initialize( 

346 cls, 

347 db: Database, 

348 element: DatabaseDimensionElement, 

349 *, 

350 context: Optional[StaticTablesContext], 

351 governor: GovernorDimensionRecordStorage, 

352 ) -> _SkyPixOverlapStorage: 

353 """Construct a new instance, creating tables as needed. 

354 

355 Parameters 

356 ---------- 

357 db : `Database` 

358 Interface to the underlying database engine and namespace. 

359 element : `DatabaseDimensionElement` 

360 Dimension element whose overlaps are to be managed. 

361 context : `StaticTablesContext`, optional 

362 If provided, an object to use to create any new tables. If not 

363 provided, ``db.ensureTableExists`` should be used instead. 

364 governor : `GovernorDimensionRecordStorage` 

365 Record storage backend for this element's governor dimension. 

366 """ 

367 if context is not None: 367 ↛ 370line 367 didn't jump to line 370, because the condition on line 367 was never false

368 op = context.addTable 

369 else: 

370 op = db.ensureTableExists 

371 summaryTable = op( 

372 cls._SUMMARY_TABLE_NAME_SPEC.format(element=element), 

373 cls._makeSummaryTableSpec(element), 

374 ) 

375 overlapTable = op( 

376 cls._OVERLAP_TABLE_NAME_SPEC.format(element=element), 

377 cls._makeOverlapTableSpec(element), 

378 ) 

379 return _SkyPixOverlapStorage( 

380 db, element, summaryTable=summaryTable, overlapTable=overlapTable, governor=governor 

381 ) 

382 

383 _SUMMARY_TABLE_NAME_SPEC = "{element.name}_skypix_overlap_summary" 

384 

385 @classmethod 

386 def _makeSummaryTableSpec(cls, element: DatabaseDimensionElement) -> ddl.TableSpec: 

387 """Create a specification for the table that records which combinations 

388 of skypix dimension and governor value have materialized overlaps. 

389 

390 Parameters 

391 ---------- 

392 element : `DatabaseDimensionElement` 

393 Dimension element whose overlaps are to be managed. 

394 

395 Returns 

396 ------- 

397 tableSpec : `ddl.TableSpec` 

398 Table specification. 

399 """ 

400 assert element.spatial is not None 

401 tableSpec = ddl.TableSpec( 

402 fields=[ 

403 ddl.FieldSpec( 

404 name="skypix_system", 

405 dtype=sqlalchemy.String, 

406 length=16, 

407 nullable=False, 

408 primaryKey=True, 

409 ), 

410 ddl.FieldSpec( 

411 name="skypix_level", 

412 dtype=sqlalchemy.SmallInteger, 

413 nullable=False, 

414 primaryKey=True, 

415 ), 

416 ] 

417 ) 

418 addDimensionForeignKey(tableSpec, element.spatial.governor, primaryKey=True) 

419 return tableSpec 

420 

421 _OVERLAP_TABLE_NAME_SPEC = "{element.name}_skypix_overlap" 

422 

423 @classmethod 

424 def _makeOverlapTableSpec(cls, element: DatabaseDimensionElement) -> ddl.TableSpec: 

425 """Create a specification for the table that holds materialized 

426 overlap rows. 

427 

428 Parameters 

429 ---------- 

430 element : `DatabaseDimensionElement` 

431 Dimension element whose overlaps are to be managed. 

432 

433 Returns 

434 ------- 

435 tableSpec : `ddl.TableSpec` 

436 Table specification. 

437 """ 

438 assert element.spatial is not None 

439 tableSpec = ddl.TableSpec( 

440 fields=[ 

441 ddl.FieldSpec( 

442 name="skypix_system", 

443 dtype=sqlalchemy.String, 

444 length=16, 

445 nullable=False, 

446 primaryKey=True, 

447 ), 

448 ddl.FieldSpec( 

449 name="skypix_level", 

450 dtype=sqlalchemy.SmallInteger, 

451 nullable=False, 

452 primaryKey=True, 

453 ), 

454 # (more columns added below) 

455 ], 

456 unique=set(), 

457 indexes={ 

458 # This index has the same fields as the PK, in a different 

459 # order, to facilitate queries that know skypix_index and want 

460 # to find the other element. 

461 ( 

462 "skypix_system", 

463 "skypix_level", 

464 "skypix_index", 

465 ) 

466 + tuple(element.graph.required.names), 

467 }, 

468 foreignKeys=[ 

469 # Foreign key to summary table. This makes sure we don't 

470 # materialize any overlaps without remembering that we've done 

471 # so in the summary table, though it can't prevent the converse 

472 # of adding a summary row without adding overlap row (either of 

473 # those is a logic bug, of course, but we want to be defensive 

474 # about those). Using ON DELETE CASCADE, it'd be very easy to 

475 # implement "disabling" an overlap materialization, because we 

476 # can just delete the summary row. 

477 # Note that the governor dimension column is added below, in 

478 # the call to addDimensionForeignKey. 

479 ddl.ForeignKeySpec( 

480 cls._SUMMARY_TABLE_NAME_SPEC.format(element=element), 

481 source=("skypix_system", "skypix_level", element.spatial.governor.name), 

482 target=("skypix_system", "skypix_level", element.spatial.governor.name), 

483 onDelete="CASCADE", 

484 ), 

485 ], 

486 ) 

487 # Add fields for the standard element this class manages overlaps for. 

488 # This is guaranteed to add a column for the governor dimension, 

489 # because that's a required dependency of element. 

490 for dimension in element.required: 

491 addDimensionForeignKey(tableSpec, dimension, primaryKey=True) 

492 # Add field for the actual skypix index. We do this later because I 

493 # think we care (at least a bit) about the order in which the primary 

494 # key is defined, in that we want a non-summary column like this one 

495 # to appear after the governor dimension column. 

496 tableSpec.fields.add( 

497 ddl.FieldSpec( 

498 name="skypix_index", 

499 dtype=sqlalchemy.BigInteger, 

500 nullable=False, 

501 primaryKey=True, 

502 ) 

503 ) 

504 return tableSpec 

505 

506 def enable( 

507 self, 

508 storage: TableDimensionRecordStorage, 

509 skypix: SkyPixDimension, 

510 governorValue: str, 

511 ) -> None: 

512 """Enable materialization of overlaps between a skypix dimension 

513 and the records of ``self.element`` with a particular governor value. 

514 

515 Parameters 

516 ---------- 

517 storage : `TableDimensionRecordStorage` 

518 Storage object for the records of ``self.element``. 

519 skypix : `SkyPixDimension` 

520 The skypix dimension (system and level) for which overlaps should 

521 be materialized. 

522 governorValue : `str` 

523 Value of this element's governor dimension for which overlaps 

524 should be materialized. For example, if ``self.element`` is 

525 ``visit``, this is an instrument name; if ``self.element`` is 

526 ``patch``, this is a skymap name. 

527 

528 Notes 

529 ----- 

530 If there are existing rows for the given ``governorValue``, overlap 

531 rows for them will be immediately computed and inserted. At present, 

532 that never happens, because we only enable overlaps with 

533 `DimensionUniverse.commonSkyPix`, and that happens immediately after 

534 each governor row is inserted (and there can't be any patch rows, 

535 for example, until after the corresponding skymap row is inserted). 

536 

537 After calling `enable` for a particular combination, any new records 

538 for ``self.element`` that are inserted will automatically be 

539 accompanied by overlap records (via calls to `insert` made 

540 by `TableDimensionRecordStorage` methods). 

541 """ 

542 # Because we're essentially materializing a view in Python, we 

543 # aggressively lock all tables we're reading and writing in order to be 

544 # sure nothing gets out of sync. This may not be the most efficient 

545 # approach possible, but we'll focus on correct before we focus on 

546 # fast, and enabling a new overlap combination should be a very rare 

547 # operation anyway, and never one we do in parallel. 

548 with self._db.transaction( 

549 lock=[self._governor.table, storage._table, self._summaryTable, self._overlapTable] 

550 ): 

551 result, inserted = self._db.sync( 

552 self._summaryTable, 

553 keys={ 

554 "skypix_system": skypix.system.name, 

555 "skypix_level": skypix.level, 

556 self._governor.element.name: governorValue, 

557 }, 

558 ) 

559 if inserted: 

560 _LOG.debug( 

561 "Precomputing initial overlaps for %s vs %s for %s=%s", 

562 skypix.name, 

563 self.element.name, 

564 self._governor.element.name, 

565 governorValue, 

566 ) 

567 self._fill(storage=storage, skypix=skypix, governorValue=governorValue) 

568 else: 

569 _LOG.debug( 

570 "Overlaps already precomputed for %s vs %s for %s=%s", 

571 skypix.name, 

572 self.element.name, 

573 self._governor.element.name, 

574 governorValue, 

575 ) 

576 

577 def _fill( 

578 self, 

579 *, 

580 storage: TableDimensionRecordStorage, 

581 skypix: SkyPixDimension, 

582 governorValue: str, 

583 ) -> None: 

584 """Insert overlap records for a newly-enabled combination of skypix 

585 dimension and governor value. 

586 

587 This method should only be called by `enable`. 

588 

589 Parameters 

590 ---------- 

591 storage : `TableDimensionRecordStorage` 

592 Storage object for the records of ``self.element``. 

593 skypix : `SkyPixDimension` 

594 The skypix dimension (system and level) for which overlaps should 

595 be materialized. 

596 governorValue : `str` 

597 Value of this element's governor dimension for which overlaps 

598 should be materialized. For example, if ``self.element`` is 

599 ``visit``, this is an instrument name; if ``self.element`` is 

600 ``patch``, this is a skymap name. 

601 """ 

602 overlapRecords: List[dict] = [] 

603 # `DimensionRecordStorage.fetch` as defined by the ABC expects to be 

604 # given iterables of data IDs that correspond to that element's graph 

605 # (e.g. {instrument, visit, detector}), not just some subset of it 

606 # (e.g. {instrument}). But we know the implementation of `fetch` for 

607 # `TableDimensionRecordStorage will use this iterable to do exactly 

608 # what we want. 

609 governorDataId = DataCoordinate.standardize( 

610 {self._governor.element.name: governorValue}, graph=self._governor.element.graph 

611 ) 

612 for record in storage.fetch(DataCoordinateIterable.fromScalar(governorDataId)): 612 ↛ 613line 612 didn't jump to line 613, because the loop on line 612 never started

613 if record.region is None: 

614 continue 

615 baseOverlapRecord = record.dataId.byName() 

616 baseOverlapRecord["skypix_system"] = skypix.system.name 

617 baseOverlapRecord["skypix_level"] = skypix.level 

618 for begin, end in skypix.pixelization.envelope(record.region): 

619 overlapRecords.extend( 

620 dict(baseOverlapRecord, skypix_index=index) for index in range(begin, end) 

621 ) 

622 _LOG.debug( 

623 "Inserting %d initial overlap rows for %s vs %s for %s=%r", 

624 len(overlapRecords), 

625 skypix.name, 

626 self.element.name, 

627 self._governor.element.name, 

628 governorValue, 

629 ) 

630 self._db.insert(self._overlapTable, *overlapRecords) 

631 

632 def insert( 

633 self, records: Sequence[DimensionRecord], replace: bool = False, skip_existing: bool = False 

634 ) -> None: 

635 """Insert overlaps for a sequence of ``self.element`` records that 

636 have just been inserted. 

637 

638 This must be called by any method that inserts records for that 

639 element (i.e. `TableDimensionRecordStorage.insert` and 

640 `TableDimensionRecordStorage.sync`), within the same transaction. 

641 

642 Parameters 

643 ---------- 

644 records : `Sequence` [ `DimensionRecord` ] 

645 Records for ``self.element``. Records with `None` regions are 

646 ignored. 

647 replace : `bool`, optional 

648 If `True` (`False` is default) one or more of the given records may 

649 already exist and is being updated, so we need to delete any 

650 existing overlap records first. 

651 skip_existing : `bool`, optional 

652 If `True` (`False` is default), skip insertion if a record with 

653 the same primary key values already exists. 

654 """ 

655 # Group records by family.governor value. 

656 grouped: Dict[str, List[DimensionRecord]] = defaultdict(list) 

657 for record in records: 

658 grouped[getattr(record, self._governor.element.name)].append(record) 

659 _LOG.debug( 

660 "Precomputing new skypix overlaps for %s where %s in %s.", 

661 self.element.name, 

662 self._governor.element.name, 

663 grouped.keys(), 

664 ) 

665 # Make sure the set of combinations to materialize does not change 

666 # while we are materializing the ones we have, by locking the summary 

667 # table. Because we aren't planning to write to the summary table, 

668 # this could just be a SHARED lock instead of an EXCLUSIVE one, but 

669 # there's no API for that right now. 

670 with self._db.transaction(lock=[self._summaryTable]): 

671 # Query for the skypix dimensions to be associated with each 

672 # governor value. 

673 gvCol = self._summaryTable.columns[self._governor.element.name] 

674 sysCol = self._summaryTable.columns.skypix_system 

675 lvlCol = self._summaryTable.columns.skypix_level 

676 query = ( 

677 sqlalchemy.sql.select( 

678 gvCol, 

679 sysCol, 

680 lvlCol, 

681 ) 

682 .select_from(self._summaryTable) 

683 .where(gvCol.in_(list(grouped.keys()))) 

684 ) 

685 # Group results by governor value, then skypix system. 

686 skypix: Dict[str, NamedKeyDict[SkyPixSystem, List[int]]] = { 

687 gv: NamedKeyDict() for gv in grouped.keys() 

688 } 

689 for summaryRow in self._db.query(query).mappings(): 

690 system = self.element.universe.skypix[summaryRow[sysCol]] 

691 skypix[summaryRow[gvCol]].setdefault(system, []).append(summaryRow[lvlCol]) 

692 if replace: 

693 # Construct constraints for a DELETE query as a list of dicts. 

694 # We include the skypix_system and skypix_level column values 

695 # explicitly instead of just letting the query search for all 

696 # of those related to the given records, because they are the 

697 # first columns in the primary key, and hence searching with 

698 # them will be way faster (and we don't want to add a new index 

699 # just for this operation). 

700 to_delete: List[Dict[str, Any]] = [] 

701 for gv, skypix_systems in skypix.items(): 

702 for system, skypix_levels in skypix_systems.items(): 

703 to_delete.extend( 

704 {"skypix_system": system.name, "skypix_level": level, **record.dataId.byName()} 

705 for record, level in itertools.product(grouped[gv], skypix_levels) 

706 ) 

707 self._db.delete( 

708 self._overlapTable, 

709 ["skypix_system", "skypix_level"] + list(self.element.graph.required.names), 

710 *to_delete, 

711 ) 

712 overlapRecords: List[dict] = [] 

713 # Compute overlaps for one governor value at a time, but gather 

714 # them all up for one insert. 

715 for gv, group in grouped.items(): 

716 overlapRecords.extend(self._compute(group, skypix[gv], gv)) 

717 _LOG.debug( 

718 "Inserting %d new skypix overlap rows for %s where %s in %s.", 

719 len(overlapRecords), 

720 self.element.name, 

721 self._governor.element.name, 

722 grouped.keys(), 

723 ) 

724 if skip_existing: 

725 self._db.ensure(self._overlapTable, *overlapRecords, primary_key_only=True) 

726 else: 

727 self._db.insert(self._overlapTable, *overlapRecords) 

728 

729 def _compute( 

730 self, 

731 records: Sequence[DimensionRecord], 

732 skypix: NamedKeyDict[SkyPixSystem, List[int]], 

733 governorValue: str, 

734 ) -> Iterator[dict]: 

735 """Compute all overlap rows for a particular governor dimension value 

736 and all of the skypix dimensions for which its overlaps are enabled. 

737 

738 This method should only be called by `insert`. 

739 

740 Parameters 

741 ---------- 

742 records : `Sequence` [ `DimensionRecord` ] 

743 Records for ``self.element``. Records with `None` regions are 

744 ignored. All must have the governor value given. 

745 skypix : `NamedKeyDict` [ `SkyPixSystem`, `list` [ `int` ] ] 

746 Mapping containing all skypix systems and levels for which overlaps 

747 should be computed, grouped by `SkyPixSystem`. 

748 governorValue : `str` 

749 Value of this element's governor dimension for which overlaps 

750 should be computed. For example, if ``self.element`` is ``visit``, 

751 this is an instrument name; if ``self.element`` is ``patch``, this 

752 is a skymap name. 

753 

754 Yields 

755 ------ 

756 row : `dict` 

757 Dictionary representing an overlap row. 

758 """ 

759 # Process input records one at time, computing all skypix indices for 

760 # each. 

761 for record in records: 

762 if record.region is None: 

763 continue 

764 assert getattr(record, self._governor.element.name) == governorValue 

765 for system, levels in skypix.items(): 

766 if not levels: 766 ↛ 767line 766 didn't jump to line 767, because the condition on line 766 was never true

767 continue 

768 baseOverlapRecord = record.dataId.byName() 

769 baseOverlapRecord["skypix_system"] = system.name 

770 levels.sort(reverse=True) 

771 # Start with the first level, which is the finest-grained one. 

772 # Compute skypix envelope indices directly for that. 

773 indices: Dict[int, Set[int]] = {levels[0]: set()} 

774 for begin, end in system[levels[0]].pixelization.envelope(record.region): 

775 indices[levels[0]].update(range(begin, end)) 

776 # Divide those indices by powers of 4 (and remove duplicates) 

777 # work our way up to the last (coarsest) level. 

778 for lastLevel, nextLevel in zip(levels[:-1], levels[1:]): 778 ↛ 779line 778 didn't jump to line 779, because the loop on line 778 never started

779 factor = 4 ** (lastLevel - nextLevel) 

780 indices[nextLevel] = {index // factor for index in indices[lastLevel]} 

781 for level in levels: 

782 yield from ( 

783 { 

784 "skypix_level": level, 

785 "skypix_index": index, 

786 **baseOverlapRecord, # type: ignore 

787 } 

788 for index in indices[level] 

789 ) 

790 

791 def select( 

792 self, 

793 skypix: SkyPixDimension, 

794 governorValues: Union[AbstractSet[str], EllipsisType], 

795 ) -> sqlalchemy.sql.FromClause: 

796 """Construct a subquery expression containing overlaps between the 

797 given skypix dimension and governor values. 

798 

799 Parameters 

800 ---------- 

801 skypix : `SkyPixDimension` 

802 The skypix dimension (system and level) for which overlaps should 

803 be materialized. 

804 governorValues : `str` 

805 Values of this element's governor dimension for which overlaps 

806 should be returned. For example, if ``self.element`` is ``visit``, 

807 this is a set of instrument names; if ``self.element`` is 

808 ``patch``, this is a set of skymap names. If ``...`` all values 

809 in the database are used (`GovernorDimensionRecordStorage.values`). 

810 

811 Returns 

812 ------- 

813 subquery : `sqlalchemy.sql.FromClause` 

814 A SELECT query with an alias, intended for use as a subquery, with 

815 columns equal to ``self.element.required.names`` + ``skypix.name``. 

816 """ 

817 if skypix != self.element.universe.commonSkyPix: 817 ↛ 822line 817 didn't jump to line 822

818 # We guarantee elsewhere that we always materialize all overlaps 

819 # vs. commonSkyPix, but for everything else, we need to check that 

820 # we have materialized this combination of governor values and 

821 # skypix. 

822 summaryWhere = [ 

823 self._summaryTable.columns.skypix_system == skypix.system.name, 

824 self._summaryTable.columns.skypix_level == skypix.level, 

825 ] 

826 gvCol = self._summaryTable.columns[self._governor.element.name] 

827 if governorValues is not Ellipsis: 

828 summaryWhere.append(gvCol.in_(list(governorValues))) 

829 summaryQuery = ( 

830 sqlalchemy.sql.select(gvCol) 

831 .select_from(self._summaryTable) 

832 .where(sqlalchemy.sql.and_(*summaryWhere)) 

833 ) 

834 materializedGovernorValues = {row._mapping[gvCol] for row in self._db.query(summaryQuery)} 

835 if governorValues is Ellipsis: 

836 missingGovernorValues = self._governor.values - materializedGovernorValues 

837 else: 

838 missingGovernorValues = governorValues - materializedGovernorValues 

839 if missingGovernorValues: 

840 raise RuntimeError( 

841 f"Query requires an overlap join between {skypix.name} and {self.element.name} " 

842 f"(for {self._governor.element.name} in {missingGovernorValues}), but these " 

843 f"have not been materialized." 

844 ) 

845 columns = [self._overlapTable.columns.skypix_index.label(skypix.name)] 

846 columns.extend(self._overlapTable.columns[name] for name in self.element.graph.required.names) 

847 overlapWhere = [ 

848 self._overlapTable.columns.skypix_system == skypix.system.name, 

849 self._overlapTable.columns.skypix_level == skypix.level, 

850 ] 

851 if governorValues is not Ellipsis: 851 ↛ 852line 851 didn't jump to line 852, because the condition on line 851 was never true

852 overlapWhere.append( 

853 self._overlapTable.columns[self._governor.element.name].in_(list(governorValues)) 

854 ) 

855 overlapQuery = ( 

856 sqlalchemy.sql.select(*columns) 

857 .select_from(self._overlapTable) 

858 .where(sqlalchemy.sql.and_(*overlapWhere)) 

859 ) 

860 return overlapQuery.alias(f"{self.element.name}_{skypix.name}_overlap") 

861 

862 def digestTables(self) -> Iterable[sqlalchemy.schema.Table]: 

863 """Return tables used for schema digest. 

864 

865 Returns 

866 ------- 

867 tables : `Iterable` [ `sqlalchemy.schema.Table` ] 

868 Possibly empty set of tables for schema digest calculations. 

869 """ 

870 return [self._summaryTable, self._overlapTable]