Coverage for python/lsst/daf/butler/registry/dimensions/table.py: 86%

234 statements  

« prev     ^ index     » next       coverage.py v6.4.1, created at 2022-06-09 09:42 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ["TableDimensionRecordStorage"] 

24 

25import itertools 

26import logging 

27import warnings 

28from collections import defaultdict 

29from typing import AbstractSet, Any, Dict, Iterable, Iterator, List, Mapping, Optional, Sequence, Set, Union 

30 

31import sqlalchemy 

32 

33from ...core import ( 

34 DatabaseDimensionElement, 

35 DataCoordinate, 

36 DataCoordinateIterable, 

37 DimensionElement, 

38 DimensionRecord, 

39 GovernorDimension, 

40 NamedKeyDict, 

41 NamedKeyMapping, 

42 NamedValueSet, 

43 SimpleQuery, 

44 SkyPixDimension, 

45 SkyPixSystem, 

46 SpatialRegionDatabaseRepresentation, 

47 TimespanDatabaseRepresentation, 

48 addDimensionForeignKey, 

49 ddl, 

50) 

51from ..interfaces import ( 

52 Database, 

53 DatabaseDimensionOverlapStorage, 

54 DatabaseDimensionRecordStorage, 

55 GovernorDimensionRecordStorage, 

56 StaticTablesContext, 

57) 

58from ..queries import QueryBuilder 

59from ..wildcards import Ellipsis, EllipsisType 

60 

61_LOG = logging.getLogger(__name__) 

62 

63 

64MAX_FETCH_CHUNK = 1000 

65"""Maximum number of data IDs we fetch records at a time. 

66 

67Barring something database-engine-specific, this sets the size of the actual 

68SQL query, not just the number of result rows, because the only way to query 

69for multiple data IDs in a single SELECT query via SQLAlchemy is to have an OR 

70term in the WHERE clause for each one. 

71""" 

72 

73 

74class TableDimensionRecordStorage(DatabaseDimensionRecordStorage): 

75 """A record storage implementation uses a regular database table. 

76 

77 Parameters 

78 ---------- 

79 db : `Database` 

80 Interface to the database engine and namespace that will hold these 

81 dimension records. 

82 element : `DatabaseDimensionElement` 

83 The element whose records this storage will manage. 

84 table : `sqlalchemy.schema.Table` 

85 The logical table for the element. 

86 skyPixOverlap : `_SkyPixOverlapStorage`, optional 

87 Object that manages the tables that hold materialized spatial overlap 

88 joins to skypix dimensions. Should be `None` if (and only if) 

89 ``element.spatial is None``. 

90 """ 

91 

92 def __init__( 

93 self, 

94 db: Database, 

95 element: DatabaseDimensionElement, 

96 *, 

97 table: sqlalchemy.schema.Table, 

98 skyPixOverlap: Optional[_SkyPixOverlapStorage] = None, 

99 ): 

100 self._db = db 

101 self._table = table 

102 self._element = element 

103 self._fetchColumns: Dict[str, sqlalchemy.sql.ColumnElement] = { 

104 dimension.name: self._table.columns[name] 

105 for dimension, name in zip( 

106 self._element.dimensions, self._element.RecordClass.fields.dimensions.names 

107 ) 

108 } 

109 self._skyPixOverlap = skyPixOverlap 

110 self._otherOverlaps: List[DatabaseDimensionOverlapStorage] = [] 

111 

112 @classmethod 

113 def initialize( 

114 cls, 

115 db: Database, 

116 element: DatabaseDimensionElement, 

117 *, 

118 context: Optional[StaticTablesContext] = None, 

119 config: Mapping[str, Any], 

120 governors: NamedKeyMapping[GovernorDimension, GovernorDimensionRecordStorage], 

121 ) -> DatabaseDimensionRecordStorage: 

122 # Docstring inherited from DatabaseDimensionRecordStorage. 

123 spec = element.RecordClass.fields.makeTableSpec( 

124 RegionReprClass=db.getSpatialRegionRepresentation(), 

125 TimespanReprClass=db.getTimespanRepresentation(), 

126 ) 

127 if context is not None: 127 ↛ 130line 127 didn't jump to line 130, because the condition on line 127 was never false

128 table = context.addTable(element.name, spec) 

129 else: 

130 table = db.ensureTableExists(element.name, spec) 

131 skyPixOverlap: Optional[_SkyPixOverlapStorage] 

132 if element.spatial is not None: 

133 governor = governors[element.spatial.governor] 

134 skyPixOverlap = _SkyPixOverlapStorage.initialize( 

135 db, 

136 element, 

137 context=context, 

138 governor=governor, 

139 ) 

140 result = cls(db, element, table=table, skyPixOverlap=skyPixOverlap) 

141 

142 # Whenever anyone inserts a new governor dimension value, we want 

143 # to enable overlaps for that value between this element and 

144 # commonSkyPix. 

145 def callback(record: DimensionRecord) -> None: 

146 skyPixOverlap.enable( # type: ignore 

147 result, 

148 element.universe.commonSkyPix, 

149 getattr(record, element.spatial.governor.primaryKey.name), # type: ignore 

150 ) 

151 

152 governor.registerInsertionListener(callback) 

153 return result 

154 else: 

155 return cls(db, element, table=table) 

156 

157 @property 

158 def element(self) -> DatabaseDimensionElement: 

159 # Docstring inherited from DimensionRecordStorage.element. 

160 return self._element 

161 

162 def clearCaches(self) -> None: 

163 # Docstring inherited from DimensionRecordStorage.clearCaches. 

164 pass 

165 

166 def join( 

167 self, 

168 builder: QueryBuilder, 

169 *, 

170 regions: Optional[NamedKeyDict[DimensionElement, SpatialRegionDatabaseRepresentation]] = None, 

171 timespans: Optional[NamedKeyDict[DimensionElement, TimespanDatabaseRepresentation]] = None, 

172 ) -> None: 

173 # Docstring inherited from DimensionRecordStorage. 

174 if regions is not None: 

175 dimensions = NamedValueSet(self.element.required) 

176 dimensions.add(self.element.universe.commonSkyPix) 

177 assert self._skyPixOverlap is not None 

178 builder.joinTable( 

179 self._skyPixOverlap.select(self.element.universe.commonSkyPix, Ellipsis), 

180 dimensions, 

181 ) 

182 regionsInTable = self._db.getSpatialRegionRepresentation().fromSelectable(self._table) 

183 regions[self.element] = regionsInTable 

184 joinOn = builder.startJoin( 

185 self._table, self.element.dimensions, self.element.RecordClass.fields.dimensions.names 

186 ) 

187 if timespans is not None: 

188 timespanInTable = self._db.getTimespanRepresentation().fromSelectable(self._table) 

189 for timespanInQuery in timespans.values(): 189 ↛ 190line 189 didn't jump to line 190, because the loop on line 189 never started

190 joinOn.append(timespanInQuery.overlaps(timespanInTable)) 

191 timespans[self.element] = timespanInTable 

192 builder.finishJoin(self._table, joinOn) 

193 return self._table 

194 

195 def fetch(self, dataIds: DataCoordinateIterable) -> Iterable[DimensionRecord]: 

196 # Docstring inherited from DimensionRecordStorage.fetch. 

197 RecordClass = self.element.RecordClass 

198 query = SimpleQuery() 

199 query.columns.extend(self._table.columns[name] for name in RecordClass.fields.standard.names) 

200 if self.element.spatial is not None: 

201 query.columns.append(self._table.columns["region"]) 

202 if self.element.temporal is not None: 

203 TimespanReprClass = self._db.getTimespanRepresentation() 

204 query.columns.extend(self._table.columns[name] for name in TimespanReprClass.getFieldNames()) 

205 query.join(self._table) 

206 dataIds.constrain(query, lambda name: self._fetchColumns[name]) 

207 with warnings.catch_warnings(): 

208 # Some of our generated queries may contain cartesian joins, this 

209 # is not a serious issue as it is properly constrained, so we want 

210 # to suppress sqlalchemy warnings. 

211 warnings.filterwarnings( 

212 "ignore", 

213 message="SELECT statement has a cartesian product", 

214 category=sqlalchemy.exc.SAWarning, 

215 ) 

216 for row in self._db.query(query.combine()): 

217 values = row._asdict() 

218 if self.element.temporal is not None: 

219 values[TimespanDatabaseRepresentation.NAME] = TimespanReprClass.extract(values) 

220 yield RecordClass(**values) 

221 

222 def insert(self, *records: DimensionRecord, replace: bool = False, skip_existing: bool = False) -> None: 

223 # Docstring inherited from DimensionRecordStorage.insert. 

224 elementRows = [record.toDict() for record in records] 

225 if self.element.temporal is not None: 

226 TimespanReprClass = self._db.getTimespanRepresentation() 

227 for row in elementRows: 

228 timespan = row.pop(TimespanDatabaseRepresentation.NAME) 

229 TimespanReprClass.update(timespan, result=row) 

230 with self._db.transaction(): 

231 if replace: 

232 self._db.replace(self._table, *elementRows) 

233 elif skip_existing: 

234 self._db.ensure(self._table, *elementRows, primary_key_only=True) 

235 else: 

236 self._db.insert(self._table, *elementRows) 

237 if self._skyPixOverlap is not None: 

238 self._skyPixOverlap.insert(records, replace=replace) 

239 

240 def sync(self, record: DimensionRecord, update: bool = False) -> Union[bool, Dict[str, Any]]: 

241 # Docstring inherited from DimensionRecordStorage.sync. 

242 compared = record.toDict() 

243 keys = {} 

244 for name in record.fields.required.names: 

245 keys[name] = compared.pop(name) 

246 if self.element.temporal is not None: 

247 TimespanReprClass = self._db.getTimespanRepresentation() 

248 timespan = compared.pop(TimespanDatabaseRepresentation.NAME) 

249 TimespanReprClass.update(timespan, result=compared) 

250 with self._db.transaction(): 

251 _, inserted_or_updated = self._db.sync( 

252 self._table, 

253 keys=keys, 

254 compared=compared, 

255 update=update, 

256 ) 

257 if inserted_or_updated and self._skyPixOverlap is not None: 

258 if inserted_or_updated is True: 

259 # Inserted a new row, so we just need to insert new overlap 

260 # rows. 

261 self._skyPixOverlap.insert([record]) 

262 elif "region" in inserted_or_updated: 262 ↛ 250line 262 didn't jump to line 250

263 # Updated the region, so we need to delete old overlap rows 

264 # and insert new ones. 

265 # (mypy should be able to tell that inserted_or_updated 

266 # must be a dict if we get to this clause, but it can't) 

267 self._skyPixOverlap.insert([record], replace=True) 

268 # We updated something other than a region. 

269 return inserted_or_updated 

270 

271 def digestTables(self) -> Iterable[sqlalchemy.schema.Table]: 

272 # Docstring inherited from DimensionRecordStorage.digestTables. 

273 result = [self._table] 

274 if self._skyPixOverlap is not None: 

275 result.extend(self._skyPixOverlap.digestTables()) 

276 return result 

277 

278 def connect(self, overlaps: DatabaseDimensionOverlapStorage) -> None: 

279 # Docstring inherited from DatabaseDimensionRecordStorage. 

280 self._otherOverlaps.append(overlaps) 

281 

282 

283class _SkyPixOverlapStorage: 

284 """A helper object for `TableDimensionRecordStorage` that manages its 

285 materialized overlaps with skypix dimensions. 

286 

287 New instances should be constructed by calling `initialize`, not by calling 

288 the constructor directly. 

289 

290 Parameters 

291 ---------- 

292 db : `Database` 

293 Interface to the underlying database engine and namespace. 

294 element : `DatabaseDimensionElement` 

295 Dimension element whose overlaps are to be managed. 

296 summaryTable : `sqlalchemy.schema.Table` 

297 Table that records which combinations of skypix dimensions and 

298 governor dimension values have materialized overlap rows. 

299 overlapTable : `sqlalchemy.schema.Table` 

300 Table containing the actual materialized overlap rows. 

301 governor : `GovernorDimensionRecordStorage` 

302 Record storage backend for this element's governor dimension. 

303 

304 Notes 

305 ----- 

306 This class (and most importantly, the tables it relies on) can in principle 

307 manage overlaps between with any skypix dimension, but at present it is 

308 only being used to manage relationships with the special ``commonSkyPix`` 

309 dimension, because that's all the query system uses. Eventually, we expect 

310 to require users to explicitly materialize all relationships they will 

311 want to use in queries. 

312 

313 Other possible future improvements include: 

314 

315 - allowing finer-grained skypix dimensions to provide overlap rows for 

316 coarser ones, by dividing indices by powers of 4 (and possibly doing 

317 ``SELECT DISTINCT`` in the subquery to remove duplicates); 

318 

319 - allowing finer-grained database elements (e.g. patch) to provide overlap 

320 rows for coarser ones (e.g. tract), by ignoring irrelevant columns 

321 (e.g. the patch IDs) in the subquery (again, possible with 

322 ``SELECT DISTINCT``). 

323 

324 But there's no point to doing any of that until the query system can 

325 figure out how best to ask for overlap rows when an exact match isn't 

326 available. 

327 """ 

328 

329 def __init__( 

330 self, 

331 db: Database, 

332 element: DatabaseDimensionElement, 

333 summaryTable: sqlalchemy.schema.Table, 

334 overlapTable: sqlalchemy.schema.Table, 

335 governor: GovernorDimensionRecordStorage, 

336 ): 

337 self._db = db 

338 self.element = element 

339 assert element.spatial is not None 

340 self._summaryTable = summaryTable 

341 self._overlapTable = overlapTable 

342 self._governor = governor 

343 

344 @classmethod 

345 def initialize( 

346 cls, 

347 db: Database, 

348 element: DatabaseDimensionElement, 

349 *, 

350 context: Optional[StaticTablesContext], 

351 governor: GovernorDimensionRecordStorage, 

352 ) -> _SkyPixOverlapStorage: 

353 """Construct a new instance, creating tables as needed. 

354 

355 Parameters 

356 ---------- 

357 db : `Database` 

358 Interface to the underlying database engine and namespace. 

359 element : `DatabaseDimensionElement` 

360 Dimension element whose overlaps are to be managed. 

361 context : `StaticTablesContext`, optional 

362 If provided, an object to use to create any new tables. If not 

363 provided, ``db.ensureTableExists`` should be used instead. 

364 governor : `GovernorDimensionRecordStorage` 

365 Record storage backend for this element's governor dimension. 

366 """ 

367 if context is not None: 367 ↛ 370line 367 didn't jump to line 370, because the condition on line 367 was never false

368 op = context.addTable 

369 else: 

370 op = db.ensureTableExists 

371 summaryTable = op( 

372 cls._SUMMARY_TABLE_NAME_SPEC.format(element=element), 

373 cls._makeSummaryTableSpec(element), 

374 ) 

375 overlapTable = op( 

376 cls._OVERLAP_TABLE_NAME_SPEC.format(element=element), 

377 cls._makeOverlapTableSpec(element), 

378 ) 

379 return _SkyPixOverlapStorage( 

380 db, element, summaryTable=summaryTable, overlapTable=overlapTable, governor=governor 

381 ) 

382 

383 _SUMMARY_TABLE_NAME_SPEC = "{element.name}_skypix_overlap_summary" 

384 

385 @classmethod 

386 def _makeSummaryTableSpec(cls, element: DatabaseDimensionElement) -> ddl.TableSpec: 

387 """Create a specification for the table that records which combinations 

388 of skypix dimension and governor value have materialized overlaps. 

389 

390 Parameters 

391 ---------- 

392 element : `DatabaseDimensionElement` 

393 Dimension element whose overlaps are to be managed. 

394 

395 Returns 

396 ------- 

397 tableSpec : `ddl.TableSpec` 

398 Table specification. 

399 """ 

400 assert element.spatial is not None 

401 tableSpec = ddl.TableSpec( 

402 fields=[ 

403 ddl.FieldSpec( 

404 name="skypix_system", 

405 dtype=sqlalchemy.String, 

406 length=16, 

407 nullable=False, 

408 primaryKey=True, 

409 ), 

410 ddl.FieldSpec( 

411 name="skypix_level", 

412 dtype=sqlalchemy.SmallInteger, 

413 nullable=False, 

414 primaryKey=True, 

415 ), 

416 ] 

417 ) 

418 addDimensionForeignKey(tableSpec, element.spatial.governor, primaryKey=True) 

419 return tableSpec 

420 

421 _OVERLAP_TABLE_NAME_SPEC = "{element.name}_skypix_overlap" 

422 

423 @classmethod 

424 def _makeOverlapTableSpec(cls, element: DatabaseDimensionElement) -> ddl.TableSpec: 

425 """Create a specification for the table that holds materialized 

426 overlap rows. 

427 

428 Parameters 

429 ---------- 

430 element : `DatabaseDimensionElement` 

431 Dimension element whose overlaps are to be managed. 

432 

433 Returns 

434 ------- 

435 tableSpec : `ddl.TableSpec` 

436 Table specification. 

437 """ 

438 assert element.spatial is not None 

439 tableSpec = ddl.TableSpec( 

440 fields=[ 

441 ddl.FieldSpec( 

442 name="skypix_system", 

443 dtype=sqlalchemy.String, 

444 length=16, 

445 nullable=False, 

446 primaryKey=True, 

447 ), 

448 ddl.FieldSpec( 

449 name="skypix_level", 

450 dtype=sqlalchemy.SmallInteger, 

451 nullable=False, 

452 primaryKey=True, 

453 ), 

454 # (more columns added below) 

455 ], 

456 unique=set(), 

457 indexes={ 

458 # This index has the same fields as the PK, in a different 

459 # order, to facilitate queries that know skypix_index and want 

460 # to find the other element. 

461 ( 

462 "skypix_system", 

463 "skypix_level", 

464 "skypix_index", 

465 ) 

466 + tuple(element.graph.required.names), 

467 }, 

468 foreignKeys=[ 

469 # Foreign key to summary table. This makes sure we don't 

470 # materialize any overlaps without remembering that we've done 

471 # so in the summary table, though it can't prevent the converse 

472 # of adding a summary row without adding overlap row (either of 

473 # those is a logic bug, of course, but we want to be defensive 

474 # about those). Using ON DELETE CASCADE, it'd be very easy to 

475 # implement "disabling" an overlap materialization, because we 

476 # can just delete the summary row. 

477 # Note that the governor dimension column is added below, in 

478 # the call to addDimensionForeignKey. 

479 ddl.ForeignKeySpec( 

480 cls._SUMMARY_TABLE_NAME_SPEC.format(element=element), 

481 source=("skypix_system", "skypix_level", element.spatial.governor.name), 

482 target=("skypix_system", "skypix_level", element.spatial.governor.name), 

483 onDelete="CASCADE", 

484 ), 

485 ], 

486 ) 

487 # Add fields for the standard element this class manages overlaps for. 

488 # This is guaranteed to add a column for the governor dimension, 

489 # because that's a required dependency of element. 

490 for dimension in element.required: 

491 addDimensionForeignKey(tableSpec, dimension, primaryKey=True) 

492 # Add field for the actual skypix index. We do this later because I 

493 # think we care (at least a bit) about the order in which the primary 

494 # key is defined, in that we want a non-summary column like this one 

495 # to appear after the governor dimension column. 

496 tableSpec.fields.add( 

497 ddl.FieldSpec( 

498 name="skypix_index", 

499 dtype=sqlalchemy.BigInteger, 

500 nullable=False, 

501 primaryKey=True, 

502 ) 

503 ) 

504 return tableSpec 

505 

506 def enable( 

507 self, 

508 storage: TableDimensionRecordStorage, 

509 skypix: SkyPixDimension, 

510 governorValue: str, 

511 ) -> None: 

512 """Enable materialization of overlaps between a skypix dimension 

513 and the records of ``self.element`` with a particular governor value. 

514 

515 Parameters 

516 ---------- 

517 storage : `TableDimensionRecordStorage` 

518 Storage object for the records of ``self.element``. 

519 skypix : `SkyPixDimension` 

520 The skypix dimension (system and level) for which overlaps should 

521 be materialized. 

522 governorValue : `str` 

523 Value of this element's governor dimension for which overlaps 

524 should be materialized. For example, if ``self.element`` is 

525 ``visit``, this is an instrument name; if ``self.element`` is 

526 ``patch``, this is a skymap name. 

527 

528 Notes 

529 ----- 

530 If there are existing rows for the given ``governorValue``, overlap 

531 rows for them will be immediately computed and inserted. At present, 

532 that never happens, because we only enable overlaps with 

533 `DimensionUniverse.commonSkyPix`, and that happens immediately after 

534 each governor row is inserted (and there can't be any patch rows, 

535 for example, until after the corresponding skymap row is inserted). 

536 

537 After calling `enable` for a particular combination, any new records 

538 for ``self.element`` that are inserted will automatically be 

539 accompanied by overlap records (via calls to `insert` made 

540 by `TableDimensionRecordStorage` methods). 

541 """ 

542 # Because we're essentially materializing a view in Python, we 

543 # aggressively lock all tables we're reading and writing in order to be 

544 # sure nothing gets out of sync. This may not be the most efficient 

545 # approach possible, but we'll focus on correct before we focus on 

546 # fast, and enabling a new overlap combination should be a very rare 

547 # operation anyway, and never one we do in parallel. 

548 with self._db.transaction( 

549 lock=[self._governor.table, storage._table, self._summaryTable, self._overlapTable] 

550 ): 

551 result, inserted = self._db.sync( 

552 self._summaryTable, 

553 keys={ 

554 "skypix_system": skypix.system.name, 

555 "skypix_level": skypix.level, 

556 self._governor.element.name: governorValue, 

557 }, 

558 ) 

559 if inserted: 

560 _LOG.debug( 

561 "Precomputing initial overlaps for %s vs %s for %s=%s", 

562 skypix.name, 

563 self.element.name, 

564 self._governor.element.name, 

565 governorValue, 

566 ) 

567 self._fill(storage=storage, skypix=skypix, governorValue=governorValue) 

568 else: 

569 _LOG.debug( 

570 "Overlaps already precomputed for %s vs %s for %s=%s", 

571 skypix.name, 

572 self.element.name, 

573 self._governor.element.name, 

574 governorValue, 

575 ) 

576 

577 def _fill( 

578 self, 

579 *, 

580 storage: TableDimensionRecordStorage, 

581 skypix: SkyPixDimension, 

582 governorValue: str, 

583 ) -> None: 

584 """Insert overlap records for a newly-enabled combination of skypix 

585 dimension and governor value. 

586 

587 This method should only be called by `enable`. 

588 

589 Parameters 

590 ---------- 

591 storage : `TableDimensionRecordStorage` 

592 Storage object for the records of ``self.element``. 

593 skypix : `SkyPixDimension` 

594 The skypix dimension (system and level) for which overlaps should 

595 be materialized. 

596 governorValue : `str` 

597 Value of this element's governor dimension for which overlaps 

598 should be materialized. For example, if ``self.element`` is 

599 ``visit``, this is an instrument name; if ``self.element`` is 

600 ``patch``, this is a skymap name. 

601 """ 

602 overlapRecords: List[dict] = [] 

603 # `DimensionRecordStorage.fetch` as defined by the ABC expects to be 

604 # given iterables of data IDs that correspond to that element's graph 

605 # (e.g. {instrument, visit, detector}), not just some subset of it 

606 # (e.g. {instrument}). But we know the implementation of `fetch` for 

607 # `TableDimensionRecordStorage will use this iterable to do exactly 

608 # what we want. 

609 governorDataId = DataCoordinate.standardize( 

610 {self._governor.element.name: governorValue}, graph=self._governor.element.graph 

611 ) 

612 for record in storage.fetch(DataCoordinateIterable.fromScalar(governorDataId)): 612 ↛ 613line 612 didn't jump to line 613, because the loop on line 612 never started

613 if record.region is None: 

614 continue 

615 baseOverlapRecord = record.dataId.byName() 

616 baseOverlapRecord["skypix_system"] = skypix.system.name 

617 baseOverlapRecord["skypix_level"] = skypix.level 

618 for begin, end in skypix.pixelization.envelope(record.region): 

619 overlapRecords.extend( 

620 dict(baseOverlapRecord, skypix_index=index) for index in range(begin, end) 

621 ) 

622 _LOG.debug( 

623 "Inserting %d initial overlap rows for %s vs %s for %s=%r", 

624 len(overlapRecords), 

625 skypix.name, 

626 self.element.name, 

627 self._governor.element.name, 

628 governorValue, 

629 ) 

630 self._db.insert(self._overlapTable, *overlapRecords) 

631 

632 def insert(self, records: Sequence[DimensionRecord], replace: bool = False) -> None: 

633 """Insert overlaps for a sequence of ``self.element`` records that 

634 have just been inserted. 

635 

636 This must be called by any method that inserts records for that 

637 element (i.e. `TableDimensionRecordStorage.insert` and 

638 `TableDimensionRecordStorage.sync`), within the same transaction. 

639 

640 Parameters 

641 ---------- 

642 records : `Sequence` [ `DimensionRecord` ] 

643 Records for ``self.element``. Records with `None` regions are 

644 ignored. 

645 replace : `bool`, optional 

646 If `True` (`False` is default) one or more of the given records may 

647 already exist and is being updated, so we need to delete any 

648 existing overlap records first. 

649 """ 

650 # Group records by family.governor value. 

651 grouped: Dict[str, List[DimensionRecord]] = defaultdict(list) 

652 for record in records: 

653 grouped[getattr(record, self._governor.element.name)].append(record) 

654 _LOG.debug( 

655 "Precomputing new skypix overlaps for %s where %s in %s.", 

656 self.element.name, 

657 self._governor.element.name, 

658 grouped.keys(), 

659 ) 

660 # Make sure the set of combinations to materialize does not change 

661 # while we are materializing the ones we have, by locking the summary 

662 # table. Because we aren't planning to write to the summary table, 

663 # this could just be a SHARED lock instead of an EXCLUSIVE one, but 

664 # there's no API for that right now. 

665 with self._db.transaction(lock=[self._summaryTable]): 

666 # Query for the skypix dimensions to be associated with each 

667 # governor value. 

668 gvCol = self._summaryTable.columns[self._governor.element.name] 

669 sysCol = self._summaryTable.columns.skypix_system 

670 lvlCol = self._summaryTable.columns.skypix_level 

671 query = ( 

672 sqlalchemy.sql.select( 

673 gvCol, 

674 sysCol, 

675 lvlCol, 

676 ) 

677 .select_from(self._summaryTable) 

678 .where(gvCol.in_(list(grouped.keys()))) 

679 ) 

680 # Group results by governor value, then skypix system. 

681 skypix: Dict[str, NamedKeyDict[SkyPixSystem, List[int]]] = { 

682 gv: NamedKeyDict() for gv in grouped.keys() 

683 } 

684 for summaryRow in self._db.query(query).mappings(): 

685 system = self.element.universe.skypix[summaryRow[sysCol]] 

686 skypix[summaryRow[gvCol]].setdefault(system, []).append(summaryRow[lvlCol]) 

687 if replace: 

688 # Construct constraints for a DELETE query as a list of dicts. 

689 # We include the skypix_system and skypix_level column values 

690 # explicitly instead of just letting the query search for all 

691 # of those related to the given records, because they are the 

692 # first columns in the primary key, and hence searching with 

693 # them will be way faster (and we don't want to add a new index 

694 # just for this operation). 

695 to_delete: List[Dict[str, Any]] = [] 

696 for gv, skypix_systems in skypix.items(): 

697 for system, skypix_levels in skypix_systems.items(): 

698 to_delete.extend( 

699 {"skypix_system": system.name, "skypix_level": level, **record.dataId.byName()} 

700 for record, level in itertools.product(grouped[gv], skypix_levels) 

701 ) 

702 self._db.delete( 

703 self._overlapTable, 

704 ["skypix_system", "skypix_level"] + list(self.element.graph.required.names), 

705 *to_delete, 

706 ) 

707 overlapRecords: List[dict] = [] 

708 # Compute overlaps for one governor value at a time, but gather 

709 # them all up for one insert. 

710 for gv, group in grouped.items(): 

711 overlapRecords.extend(self._compute(group, skypix[gv], gv)) 

712 _LOG.debug( 

713 "Inserting %d new skypix overlap rows for %s where %s in %s.", 

714 len(overlapRecords), 

715 self.element.name, 

716 self._governor.element.name, 

717 grouped.keys(), 

718 ) 

719 self._db.insert(self._overlapTable, *overlapRecords) 

720 

721 def _compute( 

722 self, 

723 records: Sequence[DimensionRecord], 

724 skypix: NamedKeyDict[SkyPixSystem, List[int]], 

725 governorValue: str, 

726 ) -> Iterator[dict]: 

727 """Compute all overlap rows for a particular governor dimension value 

728 and all of the skypix dimensions for which its overlaps are enabled. 

729 

730 This method should only be called by `insert`. 

731 

732 Parameters 

733 ---------- 

734 records : `Sequence` [ `DimensionRecord` ] 

735 Records for ``self.element``. Records with `None` regions are 

736 ignored. All must have the governor value given. 

737 skypix : `NamedKeyDict` [ `SkyPixSystem`, `list` [ `int` ] ] 

738 Mapping containing all skypix systems and levels for which overlaps 

739 should be computed, grouped by `SkyPixSystem`. 

740 governorValue : `str` 

741 Value of this element's governor dimension for which overlaps 

742 should be computed. For example, if ``self.element`` is ``visit``, 

743 this is an instrument name; if ``self.element`` is ``patch``, this 

744 is a skymap name. 

745 

746 Yields 

747 ------ 

748 row : `dict` 

749 Dictionary representing an overlap row. 

750 """ 

751 # Process input records one at time, computing all skypix indices for 

752 # each. 

753 for record in records: 

754 if record.region is None: 

755 continue 

756 assert getattr(record, self._governor.element.name) == governorValue 

757 for system, levels in skypix.items(): 

758 if not levels: 758 ↛ 759line 758 didn't jump to line 759, because the condition on line 758 was never true

759 continue 

760 baseOverlapRecord = record.dataId.byName() 

761 baseOverlapRecord["skypix_system"] = system.name 

762 levels.sort(reverse=True) 

763 # Start with the first level, which is the finest-grained one. 

764 # Compute skypix envelope indices directly for that. 

765 indices: Dict[int, Set[int]] = {levels[0]: set()} 

766 for begin, end in system[levels[0]].pixelization.envelope(record.region): 

767 indices[levels[0]].update(range(begin, end)) 

768 # Divide those indices by powers of 4 (and remove duplicates) 

769 # work our way up to the last (coarsest) level. 

770 for lastLevel, nextLevel in zip(levels[:-1], levels[1:]): 770 ↛ 771line 770 didn't jump to line 771, because the loop on line 770 never started

771 factor = 4 ** (lastLevel - nextLevel) 

772 indices[nextLevel] = {index // factor for index in indices[lastLevel]} 

773 for level in levels: 

774 yield from ( 

775 { 

776 "skypix_level": level, 

777 "skypix_index": index, 

778 **baseOverlapRecord, # type: ignore 

779 } 

780 for index in indices[level] 

781 ) 

782 

783 def select( 

784 self, 

785 skypix: SkyPixDimension, 

786 governorValues: Union[AbstractSet[str], EllipsisType], 

787 ) -> sqlalchemy.sql.FromClause: 

788 """Construct a subquery expression containing overlaps between the 

789 given skypix dimension and governor values. 

790 

791 Parameters 

792 ---------- 

793 skypix : `SkyPixDimension` 

794 The skypix dimension (system and level) for which overlaps should 

795 be materialized. 

796 governorValues : `str` 

797 Values of this element's governor dimension for which overlaps 

798 should be returned. For example, if ``self.element`` is ``visit``, 

799 this is a set of instrument names; if ``self.element`` is 

800 ``patch``, this is a set of skymap names. If ``...`` all values 

801 in the database are used (`GovernorDimensionRecordStorage.values`). 

802 

803 Returns 

804 ------- 

805 subquery : `sqlalchemy.sql.FromClause` 

806 A SELECT query with an alias, intended for use as a subquery, with 

807 columns equal to ``self.element.required.names`` + ``skypix.name``. 

808 """ 

809 if skypix != self.element.universe.commonSkyPix: 809 ↛ 814line 809 didn't jump to line 814

810 # We guarantee elsewhere that we always materialize all overlaps 

811 # vs. commonSkyPix, but for everything else, we need to check that 

812 # we have materialized this combination of governor values and 

813 # skypix. 

814 summaryWhere = [ 

815 self._summaryTable.columns.skypix_system == skypix.system.name, 

816 self._summaryTable.columns.skypix_level == skypix.level, 

817 ] 

818 gvCol = self._summaryTable.columns[self._governor.element.name] 

819 if governorValues is not Ellipsis: 

820 summaryWhere.append(gvCol.in_(list(governorValues))) 

821 summaryQuery = ( 

822 sqlalchemy.sql.select(gvCol) 

823 .select_from(self._summaryTable) 

824 .where(sqlalchemy.sql.and_(*summaryWhere)) 

825 ) 

826 materializedGovernorValues = {row._mapping[gvCol] for row in self._db.query(summaryQuery)} 

827 if governorValues is Ellipsis: 

828 missingGovernorValues = self._governor.values - materializedGovernorValues 

829 else: 

830 missingGovernorValues = governorValues - materializedGovernorValues 

831 if missingGovernorValues: 

832 raise RuntimeError( 

833 f"Query requires an overlap join between {skypix.name} and {self.element.name} " 

834 f"(for {self._governor.element.name} in {missingGovernorValues}), but these " 

835 f"have not been materialized." 

836 ) 

837 columns = [self._overlapTable.columns.skypix_index.label(skypix.name)] 

838 columns.extend(self._overlapTable.columns[name] for name in self.element.graph.required.names) 

839 overlapWhere = [ 

840 self._overlapTable.columns.skypix_system == skypix.system.name, 

841 self._overlapTable.columns.skypix_level == skypix.level, 

842 ] 

843 if governorValues is not Ellipsis: 843 ↛ 844line 843 didn't jump to line 844, because the condition on line 843 was never true

844 overlapWhere.append( 

845 self._overlapTable.columns[self._governor.element.name].in_(list(governorValues)) 

846 ) 

847 overlapQuery = ( 

848 sqlalchemy.sql.select(*columns) 

849 .select_from(self._overlapTable) 

850 .where(sqlalchemy.sql.and_(*overlapWhere)) 

851 ) 

852 return overlapQuery.alias(f"{self.element.name}_{skypix.name}_overlap") 

853 

854 def digestTables(self) -> Iterable[sqlalchemy.schema.Table]: 

855 """Return tables used for schema digest. 

856 

857 Returns 

858 ------- 

859 tables : `Iterable` [ `sqlalchemy.schema.Table` ] 

860 Possibly empty set of tables for schema digest calculations. 

861 """ 

862 return [self._summaryTable, self._overlapTable]