Coverage for python/lsst/daf/butler/registry/dimensions/table.py: 86%

236 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2022-09-22 02:04 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ["TableDimensionRecordStorage"] 

24 

25import itertools 

26import logging 

27import warnings 

28from collections import defaultdict 

29from typing import AbstractSet, Any, Dict, Iterable, Iterator, List, Mapping, Optional, Sequence, Set, Union 

30 

31import sqlalchemy 

32from lsst.utils.ellipsis import Ellipsis, EllipsisType 

33 

34from ...core import ( 

35 DatabaseDimensionElement, 

36 DataCoordinate, 

37 DataCoordinateIterable, 

38 DimensionElement, 

39 DimensionRecord, 

40 GovernorDimension, 

41 NamedKeyDict, 

42 NamedKeyMapping, 

43 NamedValueSet, 

44 SimpleQuery, 

45 SkyPixDimension, 

46 SkyPixSystem, 

47 TimespanDatabaseRepresentation, 

48 addDimensionForeignKey, 

49 ddl, 

50) 

51from ..interfaces import ( 

52 Database, 

53 DatabaseDimensionOverlapStorage, 

54 DatabaseDimensionRecordStorage, 

55 GovernorDimensionRecordStorage, 

56 StaticTablesContext, 

57) 

58from ..queries import QueryBuilder 

59 

60_LOG = logging.getLogger(__name__) 

61 

62 

63MAX_FETCH_CHUNK = 1000 

64"""Maximum number of data IDs we fetch records at a time. 

65 

66Barring something database-engine-specific, this sets the size of the actual 

67SQL query, not just the number of result rows, because the only way to query 

68for multiple data IDs in a single SELECT query via SQLAlchemy is to have an OR 

69term in the WHERE clause for each one. 

70""" 

71 

72 

73class TableDimensionRecordStorage(DatabaseDimensionRecordStorage): 

74 """A record storage implementation uses a regular database table. 

75 

76 Parameters 

77 ---------- 

78 db : `Database` 

79 Interface to the database engine and namespace that will hold these 

80 dimension records. 

81 element : `DatabaseDimensionElement` 

82 The element whose records this storage will manage. 

83 table : `sqlalchemy.schema.Table` 

84 The logical table for the element. 

85 skyPixOverlap : `_SkyPixOverlapStorage`, optional 

86 Object that manages the tables that hold materialized spatial overlap 

87 joins to skypix dimensions. Should be `None` if (and only if) 

88 ``element.spatial is None``. 

89 """ 

90 

91 def __init__( 

92 self, 

93 db: Database, 

94 element: DatabaseDimensionElement, 

95 *, 

96 table: sqlalchemy.schema.Table, 

97 skyPixOverlap: Optional[_SkyPixOverlapStorage] = None, 

98 ): 

99 self._db = db 

100 self._table = table 

101 self._element = element 

102 self._fetchColumns: Dict[str, sqlalchemy.sql.ColumnElement] = { 

103 dimension.name: self._table.columns[name] 

104 for dimension, name in zip( 

105 self._element.dimensions, self._element.RecordClass.fields.dimensions.names 

106 ) 

107 } 

108 self._skyPixOverlap = skyPixOverlap 

109 self._otherOverlaps: List[DatabaseDimensionOverlapStorage] = [] 

110 

111 @classmethod 

112 def initialize( 

113 cls, 

114 db: Database, 

115 element: DatabaseDimensionElement, 

116 *, 

117 context: Optional[StaticTablesContext] = None, 

118 config: Mapping[str, Any], 

119 governors: NamedKeyMapping[GovernorDimension, GovernorDimensionRecordStorage], 

120 ) -> DatabaseDimensionRecordStorage: 

121 # Docstring inherited from DatabaseDimensionRecordStorage. 

122 spec = element.RecordClass.fields.makeTableSpec(TimespanReprClass=db.getTimespanRepresentation()) 

123 if context is not None: 123 ↛ 126line 123 didn't jump to line 126, because the condition on line 123 was never false

124 table = context.addTable(element.name, spec) 

125 else: 

126 table = db.ensureTableExists(element.name, spec) 

127 skyPixOverlap: Optional[_SkyPixOverlapStorage] 

128 if element.spatial is not None: 

129 governor = governors[element.spatial.governor] 

130 skyPixOverlap = _SkyPixOverlapStorage.initialize( 

131 db, 

132 element, 

133 context=context, 

134 governor=governor, 

135 ) 

136 result = cls(db, element, table=table, skyPixOverlap=skyPixOverlap) 

137 

138 # Whenever anyone inserts a new governor dimension value, we want 

139 # to enable overlaps for that value between this element and 

140 # commonSkyPix. 

141 def callback(record: DimensionRecord) -> None: 

142 skyPixOverlap.enable( # type: ignore 

143 result, 

144 element.universe.commonSkyPix, 

145 getattr(record, element.spatial.governor.primaryKey.name), # type: ignore 

146 ) 

147 

148 governor.registerInsertionListener(callback) 

149 return result 

150 else: 

151 return cls(db, element, table=table) 

152 

153 @property 

154 def element(self) -> DatabaseDimensionElement: 

155 # Docstring inherited from DimensionRecordStorage.element. 

156 return self._element 

157 

158 def clearCaches(self) -> None: 

159 # Docstring inherited from DimensionRecordStorage.clearCaches. 

160 pass 

161 

162 def join( 

163 self, 

164 builder: QueryBuilder, 

165 *, 

166 regions: Optional[NamedKeyDict[DimensionElement, sqlalchemy.sql.ColumnElement]] = None, 

167 timespans: Optional[NamedKeyDict[DimensionElement, TimespanDatabaseRepresentation]] = None, 

168 ) -> None: 

169 # Docstring inherited from DimensionRecordStorage. 

170 if regions is not None: 

171 dimensions = NamedValueSet(self.element.required) 

172 dimensions.add(self.element.universe.commonSkyPix) 

173 assert self._skyPixOverlap is not None 

174 builder.joinTable( 

175 self._skyPixOverlap.select(self.element.universe.commonSkyPix, Ellipsis), 

176 dimensions, 

177 ) 

178 regionsInTable = self._table.columns["region"] 

179 regions[self.element] = regionsInTable 

180 joinOn = builder.startJoin( 

181 self._table, self.element.dimensions, self.element.RecordClass.fields.dimensions.names 

182 ) 

183 if timespans is not None: 

184 timespanInTable = self._db.getTimespanRepresentation().from_columns(self._table.columns) 

185 for timespanInQuery in timespans.values(): 185 ↛ 186line 185 didn't jump to line 186, because the loop on line 185 never started

186 joinOn.append(timespanInQuery.overlaps(timespanInTable)) 

187 timespans[self.element] = timespanInTable 

188 builder.finishJoin(self._table, joinOn) 

189 return self._table 

190 

191 def fetch(self, dataIds: DataCoordinateIterable) -> Iterable[DimensionRecord]: 

192 # Docstring inherited from DimensionRecordStorage.fetch. 

193 RecordClass = self.element.RecordClass 

194 query = SimpleQuery() 

195 query.columns.extend(self._table.columns[name] for name in RecordClass.fields.standard.names) 

196 if self.element.spatial is not None: 

197 query.columns.append(self._table.columns["region"]) 

198 if self.element.temporal is not None: 

199 TimespanReprClass = self._db.getTimespanRepresentation() 

200 query.columns.extend(self._table.columns[name] for name in TimespanReprClass.getFieldNames()) 

201 query.join(self._table) 

202 dataIds.constrain(query, lambda name: self._fetchColumns[name]) 

203 with warnings.catch_warnings(): 

204 # Some of our generated queries may contain cartesian joins, this 

205 # is not a serious issue as it is properly constrained, so we want 

206 # to suppress sqlalchemy warnings. 

207 warnings.filterwarnings( 

208 "ignore", 

209 message="SELECT statement has a cartesian product", 

210 category=sqlalchemy.exc.SAWarning, 

211 ) 

212 for row in self._db.query(query.combine()): 

213 values = row._asdict() 

214 if self.element.temporal is not None: 

215 values[TimespanDatabaseRepresentation.NAME] = TimespanReprClass.extract(values) 

216 yield RecordClass(**values) 

217 

218 def insert(self, *records: DimensionRecord, replace: bool = False, skip_existing: bool = False) -> None: 

219 # Docstring inherited from DimensionRecordStorage.insert. 

220 elementRows = [record.toDict() for record in records] 

221 if self.element.temporal is not None: 

222 TimespanReprClass = self._db.getTimespanRepresentation() 

223 for row in elementRows: 

224 timespan = row.pop(TimespanDatabaseRepresentation.NAME) 

225 TimespanReprClass.update(timespan, result=row) 

226 with self._db.transaction(): 

227 if replace: 

228 self._db.replace(self._table, *elementRows) 

229 elif skip_existing: 

230 self._db.ensure(self._table, *elementRows, primary_key_only=True) 

231 else: 

232 self._db.insert(self._table, *elementRows) 

233 if self._skyPixOverlap is not None: 

234 self._skyPixOverlap.insert(records, replace=replace, skip_existing=skip_existing) 

235 

236 def sync(self, record: DimensionRecord, update: bool = False) -> Union[bool, Dict[str, Any]]: 

237 # Docstring inherited from DimensionRecordStorage.sync. 

238 compared = record.toDict() 

239 keys = {} 

240 for name in record.fields.required.names: 

241 keys[name] = compared.pop(name) 

242 if self.element.temporal is not None: 

243 TimespanReprClass = self._db.getTimespanRepresentation() 

244 timespan = compared.pop(TimespanDatabaseRepresentation.NAME) 

245 TimespanReprClass.update(timespan, result=compared) 

246 with self._db.transaction(): 

247 _, inserted_or_updated = self._db.sync( 

248 self._table, 

249 keys=keys, 

250 compared=compared, 

251 update=update, 

252 ) 

253 if inserted_or_updated and self._skyPixOverlap is not None: 

254 if inserted_or_updated is True: 

255 # Inserted a new row, so we just need to insert new overlap 

256 # rows. 

257 self._skyPixOverlap.insert([record]) 

258 elif "region" in inserted_or_updated: 258 ↛ 246line 258 didn't jump to line 246

259 # Updated the region, so we need to delete old overlap rows 

260 # and insert new ones. 

261 # (mypy should be able to tell that inserted_or_updated 

262 # must be a dict if we get to this clause, but it can't) 

263 self._skyPixOverlap.insert([record], replace=True) 

264 # We updated something other than a region. 

265 return inserted_or_updated 

266 

267 def digestTables(self) -> Iterable[sqlalchemy.schema.Table]: 

268 # Docstring inherited from DimensionRecordStorage.digestTables. 

269 result = [self._table] 

270 if self._skyPixOverlap is not None: 

271 result.extend(self._skyPixOverlap.digestTables()) 

272 return result 

273 

274 def connect(self, overlaps: DatabaseDimensionOverlapStorage) -> None: 

275 # Docstring inherited from DatabaseDimensionRecordStorage. 

276 self._otherOverlaps.append(overlaps) 

277 

278 

279class _SkyPixOverlapStorage: 

280 """A helper object for `TableDimensionRecordStorage` that manages its 

281 materialized overlaps with skypix dimensions. 

282 

283 New instances should be constructed by calling `initialize`, not by calling 

284 the constructor directly. 

285 

286 Parameters 

287 ---------- 

288 db : `Database` 

289 Interface to the underlying database engine and namespace. 

290 element : `DatabaseDimensionElement` 

291 Dimension element whose overlaps are to be managed. 

292 summaryTable : `sqlalchemy.schema.Table` 

293 Table that records which combinations of skypix dimensions and 

294 governor dimension values have materialized overlap rows. 

295 overlapTable : `sqlalchemy.schema.Table` 

296 Table containing the actual materialized overlap rows. 

297 governor : `GovernorDimensionRecordStorage` 

298 Record storage backend for this element's governor dimension. 

299 

300 Notes 

301 ----- 

302 This class (and most importantly, the tables it relies on) can in principle 

303 manage overlaps between with any skypix dimension, but at present it is 

304 only being used to manage relationships with the special ``commonSkyPix`` 

305 dimension, because that's all the query system uses. Eventually, we expect 

306 to require users to explicitly materialize all relationships they will 

307 want to use in queries. 

308 

309 Other possible future improvements include: 

310 

311 - allowing finer-grained skypix dimensions to provide overlap rows for 

312 coarser ones, by dividing indices by powers of 4 (and possibly doing 

313 ``SELECT DISTINCT`` in the subquery to remove duplicates); 

314 

315 - allowing finer-grained database elements (e.g. patch) to provide overlap 

316 rows for coarser ones (e.g. tract), by ignoring irrelevant columns 

317 (e.g. the patch IDs) in the subquery (again, possible with 

318 ``SELECT DISTINCT``). 

319 

320 But there's no point to doing any of that until the query system can 

321 figure out how best to ask for overlap rows when an exact match isn't 

322 available. 

323 """ 

324 

325 def __init__( 

326 self, 

327 db: Database, 

328 element: DatabaseDimensionElement, 

329 summaryTable: sqlalchemy.schema.Table, 

330 overlapTable: sqlalchemy.schema.Table, 

331 governor: GovernorDimensionRecordStorage, 

332 ): 

333 self._db = db 

334 self.element = element 

335 assert element.spatial is not None 

336 self._summaryTable = summaryTable 

337 self._overlapTable = overlapTable 

338 self._governor = governor 

339 

340 @classmethod 

341 def initialize( 

342 cls, 

343 db: Database, 

344 element: DatabaseDimensionElement, 

345 *, 

346 context: Optional[StaticTablesContext], 

347 governor: GovernorDimensionRecordStorage, 

348 ) -> _SkyPixOverlapStorage: 

349 """Construct a new instance, creating tables as needed. 

350 

351 Parameters 

352 ---------- 

353 db : `Database` 

354 Interface to the underlying database engine and namespace. 

355 element : `DatabaseDimensionElement` 

356 Dimension element whose overlaps are to be managed. 

357 context : `StaticTablesContext`, optional 

358 If provided, an object to use to create any new tables. If not 

359 provided, ``db.ensureTableExists`` should be used instead. 

360 governor : `GovernorDimensionRecordStorage` 

361 Record storage backend for this element's governor dimension. 

362 """ 

363 if context is not None: 363 ↛ 366line 363 didn't jump to line 366, because the condition on line 363 was never false

364 op = context.addTable 

365 else: 

366 op = db.ensureTableExists 

367 summaryTable = op( 

368 cls._SUMMARY_TABLE_NAME_SPEC.format(element=element), 

369 cls._makeSummaryTableSpec(element), 

370 ) 

371 overlapTable = op( 

372 cls._OVERLAP_TABLE_NAME_SPEC.format(element=element), 

373 cls._makeOverlapTableSpec(element), 

374 ) 

375 return _SkyPixOverlapStorage( 

376 db, element, summaryTable=summaryTable, overlapTable=overlapTable, governor=governor 

377 ) 

378 

379 _SUMMARY_TABLE_NAME_SPEC = "{element.name}_skypix_overlap_summary" 

380 

381 @classmethod 

382 def _makeSummaryTableSpec(cls, element: DatabaseDimensionElement) -> ddl.TableSpec: 

383 """Create a specification for the table that records which combinations 

384 of skypix dimension and governor value have materialized overlaps. 

385 

386 Parameters 

387 ---------- 

388 element : `DatabaseDimensionElement` 

389 Dimension element whose overlaps are to be managed. 

390 

391 Returns 

392 ------- 

393 tableSpec : `ddl.TableSpec` 

394 Table specification. 

395 """ 

396 assert element.spatial is not None 

397 tableSpec = ddl.TableSpec( 

398 fields=[ 

399 ddl.FieldSpec( 

400 name="skypix_system", 

401 dtype=sqlalchemy.String, 

402 length=16, 

403 nullable=False, 

404 primaryKey=True, 

405 ), 

406 ddl.FieldSpec( 

407 name="skypix_level", 

408 dtype=sqlalchemy.SmallInteger, 

409 nullable=False, 

410 primaryKey=True, 

411 ), 

412 ] 

413 ) 

414 addDimensionForeignKey(tableSpec, element.spatial.governor, primaryKey=True) 

415 return tableSpec 

416 

417 _OVERLAP_TABLE_NAME_SPEC = "{element.name}_skypix_overlap" 

418 

419 @classmethod 

420 def _makeOverlapTableSpec(cls, element: DatabaseDimensionElement) -> ddl.TableSpec: 

421 """Create a specification for the table that holds materialized 

422 overlap rows. 

423 

424 Parameters 

425 ---------- 

426 element : `DatabaseDimensionElement` 

427 Dimension element whose overlaps are to be managed. 

428 

429 Returns 

430 ------- 

431 tableSpec : `ddl.TableSpec` 

432 Table specification. 

433 """ 

434 assert element.spatial is not None 

435 tableSpec = ddl.TableSpec( 

436 fields=[ 

437 ddl.FieldSpec( 

438 name="skypix_system", 

439 dtype=sqlalchemy.String, 

440 length=16, 

441 nullable=False, 

442 primaryKey=True, 

443 ), 

444 ddl.FieldSpec( 

445 name="skypix_level", 

446 dtype=sqlalchemy.SmallInteger, 

447 nullable=False, 

448 primaryKey=True, 

449 ), 

450 # (more columns added below) 

451 ], 

452 unique=set(), 

453 indexes={ 

454 # This index has the same fields as the PK, in a different 

455 # order, to facilitate queries that know skypix_index and want 

456 # to find the other element. 

457 ( 

458 "skypix_system", 

459 "skypix_level", 

460 "skypix_index", 

461 ) 

462 + tuple(element.graph.required.names), 

463 }, 

464 foreignKeys=[ 

465 # Foreign key to summary table. This makes sure we don't 

466 # materialize any overlaps without remembering that we've done 

467 # so in the summary table, though it can't prevent the converse 

468 # of adding a summary row without adding overlap row (either of 

469 # those is a logic bug, of course, but we want to be defensive 

470 # about those). Using ON DELETE CASCADE, it'd be very easy to 

471 # implement "disabling" an overlap materialization, because we 

472 # can just delete the summary row. 

473 # Note that the governor dimension column is added below, in 

474 # the call to addDimensionForeignKey. 

475 ddl.ForeignKeySpec( 

476 cls._SUMMARY_TABLE_NAME_SPEC.format(element=element), 

477 source=("skypix_system", "skypix_level", element.spatial.governor.name), 

478 target=("skypix_system", "skypix_level", element.spatial.governor.name), 

479 onDelete="CASCADE", 

480 ), 

481 ], 

482 ) 

483 # Add fields for the standard element this class manages overlaps for. 

484 # This is guaranteed to add a column for the governor dimension, 

485 # because that's a required dependency of element. 

486 for dimension in element.required: 

487 addDimensionForeignKey(tableSpec, dimension, primaryKey=True) 

488 # Add field for the actual skypix index. We do this later because I 

489 # think we care (at least a bit) about the order in which the primary 

490 # key is defined, in that we want a non-summary column like this one 

491 # to appear after the governor dimension column. 

492 tableSpec.fields.add( 

493 ddl.FieldSpec( 

494 name="skypix_index", 

495 dtype=sqlalchemy.BigInteger, 

496 nullable=False, 

497 primaryKey=True, 

498 ) 

499 ) 

500 return tableSpec 

501 

502 def enable( 

503 self, 

504 storage: TableDimensionRecordStorage, 

505 skypix: SkyPixDimension, 

506 governorValue: str, 

507 ) -> None: 

508 """Enable materialization of overlaps between a skypix dimension 

509 and the records of ``self.element`` with a particular governor value. 

510 

511 Parameters 

512 ---------- 

513 storage : `TableDimensionRecordStorage` 

514 Storage object for the records of ``self.element``. 

515 skypix : `SkyPixDimension` 

516 The skypix dimension (system and level) for which overlaps should 

517 be materialized. 

518 governorValue : `str` 

519 Value of this element's governor dimension for which overlaps 

520 should be materialized. For example, if ``self.element`` is 

521 ``visit``, this is an instrument name; if ``self.element`` is 

522 ``patch``, this is a skymap name. 

523 

524 Notes 

525 ----- 

526 If there are existing rows for the given ``governorValue``, overlap 

527 rows for them will be immediately computed and inserted. At present, 

528 that never happens, because we only enable overlaps with 

529 `DimensionUniverse.commonSkyPix`, and that happens immediately after 

530 each governor row is inserted (and there can't be any patch rows, 

531 for example, until after the corresponding skymap row is inserted). 

532 

533 After calling `enable` for a particular combination, any new records 

534 for ``self.element`` that are inserted will automatically be 

535 accompanied by overlap records (via calls to `insert` made 

536 by `TableDimensionRecordStorage` methods). 

537 """ 

538 # Because we're essentially materializing a view in Python, we 

539 # aggressively lock all tables we're reading and writing in order to be 

540 # sure nothing gets out of sync. This may not be the most efficient 

541 # approach possible, but we'll focus on correct before we focus on 

542 # fast, and enabling a new overlap combination should be a very rare 

543 # operation anyway, and never one we do in parallel. 

544 with self._db.transaction( 

545 lock=[self._governor.table, storage._table, self._summaryTable, self._overlapTable] 

546 ): 

547 result, inserted = self._db.sync( 

548 self._summaryTable, 

549 keys={ 

550 "skypix_system": skypix.system.name, 

551 "skypix_level": skypix.level, 

552 self._governor.element.name: governorValue, 

553 }, 

554 ) 

555 if inserted: 

556 _LOG.debug( 

557 "Precomputing initial overlaps for %s vs %s for %s=%s", 

558 skypix.name, 

559 self.element.name, 

560 self._governor.element.name, 

561 governorValue, 

562 ) 

563 self._fill(storage=storage, skypix=skypix, governorValue=governorValue) 

564 else: 

565 _LOG.debug( 

566 "Overlaps already precomputed for %s vs %s for %s=%s", 

567 skypix.name, 

568 self.element.name, 

569 self._governor.element.name, 

570 governorValue, 

571 ) 

572 

573 def _fill( 

574 self, 

575 *, 

576 storage: TableDimensionRecordStorage, 

577 skypix: SkyPixDimension, 

578 governorValue: str, 

579 ) -> None: 

580 """Insert overlap records for a newly-enabled combination of skypix 

581 dimension and governor value. 

582 

583 This method should only be called by `enable`. 

584 

585 Parameters 

586 ---------- 

587 storage : `TableDimensionRecordStorage` 

588 Storage object for the records of ``self.element``. 

589 skypix : `SkyPixDimension` 

590 The skypix dimension (system and level) for which overlaps should 

591 be materialized. 

592 governorValue : `str` 

593 Value of this element's governor dimension for which overlaps 

594 should be materialized. For example, if ``self.element`` is 

595 ``visit``, this is an instrument name; if ``self.element`` is 

596 ``patch``, this is a skymap name. 

597 """ 

598 overlapRecords: List[dict] = [] 

599 # `DimensionRecordStorage.fetch` as defined by the ABC expects to be 

600 # given iterables of data IDs that correspond to that element's graph 

601 # (e.g. {instrument, visit, detector}), not just some subset of it 

602 # (e.g. {instrument}). But we know the implementation of `fetch` for 

603 # `TableDimensionRecordStorage will use this iterable to do exactly 

604 # what we want. 

605 governorDataId = DataCoordinate.standardize( 

606 {self._governor.element.name: governorValue}, graph=self._governor.element.graph 

607 ) 

608 for record in storage.fetch(DataCoordinateIterable.fromScalar(governorDataId)): 608 ↛ 609line 608 didn't jump to line 609, because the loop on line 608 never started

609 if record.region is None: 

610 continue 

611 baseOverlapRecord = record.dataId.byName() 

612 baseOverlapRecord["skypix_system"] = skypix.system.name 

613 baseOverlapRecord["skypix_level"] = skypix.level 

614 for begin, end in skypix.pixelization.envelope(record.region): 

615 overlapRecords.extend( 

616 dict(baseOverlapRecord, skypix_index=index) for index in range(begin, end) 

617 ) 

618 _LOG.debug( 

619 "Inserting %d initial overlap rows for %s vs %s for %s=%r", 

620 len(overlapRecords), 

621 skypix.name, 

622 self.element.name, 

623 self._governor.element.name, 

624 governorValue, 

625 ) 

626 self._db.insert(self._overlapTable, *overlapRecords) 

627 

628 def insert( 

629 self, records: Sequence[DimensionRecord], replace: bool = False, skip_existing: bool = False 

630 ) -> None: 

631 """Insert overlaps for a sequence of ``self.element`` records that 

632 have just been inserted. 

633 

634 This must be called by any method that inserts records for that 

635 element (i.e. `TableDimensionRecordStorage.insert` and 

636 `TableDimensionRecordStorage.sync`), within the same transaction. 

637 

638 Parameters 

639 ---------- 

640 records : `Sequence` [ `DimensionRecord` ] 

641 Records for ``self.element``. Records with `None` regions are 

642 ignored. 

643 replace : `bool`, optional 

644 If `True` (`False` is default) one or more of the given records may 

645 already exist and is being updated, so we need to delete any 

646 existing overlap records first. 

647 skip_existing : `bool`, optional 

648 If `True` (`False` is default), skip insertion if a record with 

649 the same primary key values already exists. 

650 """ 

651 # Group records by family.governor value. 

652 grouped: Dict[str, List[DimensionRecord]] = defaultdict(list) 

653 for record in records: 

654 grouped[getattr(record, self._governor.element.name)].append(record) 

655 _LOG.debug( 

656 "Precomputing new skypix overlaps for %s where %s in %s.", 

657 self.element.name, 

658 self._governor.element.name, 

659 grouped.keys(), 

660 ) 

661 # Make sure the set of combinations to materialize does not change 

662 # while we are materializing the ones we have, by locking the summary 

663 # table. Because we aren't planning to write to the summary table, 

664 # this could just be a SHARED lock instead of an EXCLUSIVE one, but 

665 # there's no API for that right now. 

666 with self._db.transaction(lock=[self._summaryTable]): 

667 # Query for the skypix dimensions to be associated with each 

668 # governor value. 

669 gvCol = self._summaryTable.columns[self._governor.element.name] 

670 sysCol = self._summaryTable.columns.skypix_system 

671 lvlCol = self._summaryTable.columns.skypix_level 

672 query = ( 

673 sqlalchemy.sql.select( 

674 gvCol, 

675 sysCol, 

676 lvlCol, 

677 ) 

678 .select_from(self._summaryTable) 

679 .where(gvCol.in_(list(grouped.keys()))) 

680 ) 

681 # Group results by governor value, then skypix system. 

682 skypix: Dict[str, NamedKeyDict[SkyPixSystem, List[int]]] = { 

683 gv: NamedKeyDict() for gv in grouped.keys() 

684 } 

685 for summaryRow in self._db.query(query).mappings(): 

686 system = self.element.universe.skypix[summaryRow[sysCol]] 

687 skypix[summaryRow[gvCol]].setdefault(system, []).append(summaryRow[lvlCol]) 

688 if replace: 

689 # Construct constraints for a DELETE query as a list of dicts. 

690 # We include the skypix_system and skypix_level column values 

691 # explicitly instead of just letting the query search for all 

692 # of those related to the given records, because they are the 

693 # first columns in the primary key, and hence searching with 

694 # them will be way faster (and we don't want to add a new index 

695 # just for this operation). 

696 to_delete: List[Dict[str, Any]] = [] 

697 for gv, skypix_systems in skypix.items(): 

698 for system, skypix_levels in skypix_systems.items(): 

699 to_delete.extend( 

700 {"skypix_system": system.name, "skypix_level": level, **record.dataId.byName()} 

701 for record, level in itertools.product(grouped[gv], skypix_levels) 

702 ) 

703 self._db.delete( 

704 self._overlapTable, 

705 ["skypix_system", "skypix_level"] + list(self.element.graph.required.names), 

706 *to_delete, 

707 ) 

708 overlapRecords: List[dict] = [] 

709 # Compute overlaps for one governor value at a time, but gather 

710 # them all up for one insert. 

711 for gv, group in grouped.items(): 

712 overlapRecords.extend(self._compute(group, skypix[gv], gv)) 

713 _LOG.debug( 

714 "Inserting %d new skypix overlap rows for %s where %s in %s.", 

715 len(overlapRecords), 

716 self.element.name, 

717 self._governor.element.name, 

718 grouped.keys(), 

719 ) 

720 if skip_existing: 

721 self._db.ensure(self._overlapTable, *overlapRecords, primary_key_only=True) 

722 else: 

723 self._db.insert(self._overlapTable, *overlapRecords) 

724 

725 def _compute( 

726 self, 

727 records: Sequence[DimensionRecord], 

728 skypix: NamedKeyDict[SkyPixSystem, List[int]], 

729 governorValue: str, 

730 ) -> Iterator[dict]: 

731 """Compute all overlap rows for a particular governor dimension value 

732 and all of the skypix dimensions for which its overlaps are enabled. 

733 

734 This method should only be called by `insert`. 

735 

736 Parameters 

737 ---------- 

738 records : `Sequence` [ `DimensionRecord` ] 

739 Records for ``self.element``. Records with `None` regions are 

740 ignored. All must have the governor value given. 

741 skypix : `NamedKeyDict` [ `SkyPixSystem`, `list` [ `int` ] ] 

742 Mapping containing all skypix systems and levels for which overlaps 

743 should be computed, grouped by `SkyPixSystem`. 

744 governorValue : `str` 

745 Value of this element's governor dimension for which overlaps 

746 should be computed. For example, if ``self.element`` is ``visit``, 

747 this is an instrument name; if ``self.element`` is ``patch``, this 

748 is a skymap name. 

749 

750 Yields 

751 ------ 

752 row : `dict` 

753 Dictionary representing an overlap row. 

754 """ 

755 # Process input records one at time, computing all skypix indices for 

756 # each. 

757 for record in records: 

758 if record.region is None: 

759 continue 

760 assert getattr(record, self._governor.element.name) == governorValue 

761 for system, levels in skypix.items(): 

762 if not levels: 762 ↛ 763line 762 didn't jump to line 763, because the condition on line 762 was never true

763 continue 

764 baseOverlapRecord = record.dataId.byName() 

765 baseOverlapRecord["skypix_system"] = system.name 

766 levels.sort(reverse=True) 

767 # Start with the first level, which is the finest-grained one. 

768 # Compute skypix envelope indices directly for that. 

769 indices: Dict[int, Set[int]] = {levels[0]: set()} 

770 for begin, end in system[levels[0]].pixelization.envelope(record.region): 

771 indices[levels[0]].update(range(begin, end)) 

772 # Divide those indices by powers of 4 (and remove duplicates) 

773 # work our way up to the last (coarsest) level. 

774 for lastLevel, nextLevel in zip(levels[:-1], levels[1:]): 774 ↛ 775line 774 didn't jump to line 775, because the loop on line 774 never started

775 factor = 4 ** (lastLevel - nextLevel) 

776 indices[nextLevel] = {index // factor for index in indices[lastLevel]} 

777 for level in levels: 

778 yield from ( 

779 { 

780 "skypix_level": level, 

781 "skypix_index": index, 

782 **baseOverlapRecord, # type: ignore 

783 } 

784 for index in indices[level] 

785 ) 

786 

787 def select( 

788 self, 

789 skypix: SkyPixDimension, 

790 governorValues: Union[AbstractSet[str], EllipsisType], 

791 ) -> sqlalchemy.sql.FromClause: 

792 """Construct a subquery expression containing overlaps between the 

793 given skypix dimension and governor values. 

794 

795 Parameters 

796 ---------- 

797 skypix : `SkyPixDimension` 

798 The skypix dimension (system and level) for which overlaps should 

799 be materialized. 

800 governorValues : `str` 

801 Values of this element's governor dimension for which overlaps 

802 should be returned. For example, if ``self.element`` is ``visit``, 

803 this is a set of instrument names; if ``self.element`` is 

804 ``patch``, this is a set of skymap names. If ``...`` all values 

805 in the database are used (`GovernorDimensionRecordStorage.values`). 

806 

807 Returns 

808 ------- 

809 subquery : `sqlalchemy.sql.FromClause` 

810 A SELECT query with an alias, intended for use as a subquery, with 

811 columns equal to ``self.element.required.names`` + ``skypix.name``. 

812 """ 

813 if skypix != self.element.universe.commonSkyPix: 813 ↛ 818line 813 didn't jump to line 818

814 # We guarantee elsewhere that we always materialize all overlaps 

815 # vs. commonSkyPix, but for everything else, we need to check that 

816 # we have materialized this combination of governor values and 

817 # skypix. 

818 summaryWhere = [ 

819 self._summaryTable.columns.skypix_system == skypix.system.name, 

820 self._summaryTable.columns.skypix_level == skypix.level, 

821 ] 

822 gvCol = self._summaryTable.columns[self._governor.element.name] 

823 if governorValues is not Ellipsis: 

824 summaryWhere.append(gvCol.in_(list(governorValues))) 

825 summaryQuery = ( 

826 sqlalchemy.sql.select(gvCol) 

827 .select_from(self._summaryTable) 

828 .where(sqlalchemy.sql.and_(*summaryWhere)) 

829 ) 

830 materializedGovernorValues = {row._mapping[gvCol] for row in self._db.query(summaryQuery)} 

831 if governorValues is Ellipsis: 

832 missingGovernorValues = self._governor.values - materializedGovernorValues 

833 else: 

834 missingGovernorValues = governorValues - materializedGovernorValues 

835 if missingGovernorValues: 

836 raise RuntimeError( 

837 f"Query requires an overlap join between {skypix.name} and {self.element.name} " 

838 f"(for {self._governor.element.name} in {missingGovernorValues}), but these " 

839 f"have not been materialized." 

840 ) 

841 columns = [self._overlapTable.columns.skypix_index.label(skypix.name)] 

842 columns.extend(self._overlapTable.columns[name] for name in self.element.graph.required.names) 

843 overlapWhere = [ 

844 self._overlapTable.columns.skypix_system == skypix.system.name, 

845 self._overlapTable.columns.skypix_level == skypix.level, 

846 ] 

847 if governorValues is not Ellipsis: 847 ↛ 848line 847 didn't jump to line 848, because the condition on line 847 was never true

848 overlapWhere.append( 

849 self._overlapTable.columns[self._governor.element.name].in_(list(governorValues)) 

850 ) 

851 overlapQuery = ( 

852 sqlalchemy.sql.select(*columns) 

853 .select_from(self._overlapTable) 

854 .where(sqlalchemy.sql.and_(*overlapWhere)) 

855 ) 

856 return overlapQuery.alias(f"{self.element.name}_{skypix.name}_overlap") 

857 

858 def digestTables(self) -> Iterable[sqlalchemy.schema.Table]: 

859 """Return tables used for schema digest. 

860 

861 Returns 

862 ------- 

863 tables : `Iterable` [ `sqlalchemy.schema.Table` ] 

864 Possibly empty set of tables for schema digest calculations. 

865 """ 

866 return [self._summaryTable, self._overlapTable]