Coverage for python/lsst/daf/butler/registry/dimensions/table.py: 85%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

231 statements  

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ["TableDimensionRecordStorage"] 

24 

25from collections import defaultdict 

26import itertools 

27import logging 

28from typing import ( 

29 AbstractSet, 

30 Any, 

31 Dict, 

32 Iterable, 

33 Iterator, 

34 List, 

35 Mapping, 

36 Optional, 

37 Sequence, 

38 Set, 

39 Union, 

40) 

41import warnings 

42 

43import sqlalchemy 

44 

45from ...core import ( 

46 addDimensionForeignKey, 

47 DatabaseDimensionElement, 

48 DataCoordinate, 

49 DataCoordinateIterable, 

50 ddl, 

51 DimensionElement, 

52 DimensionRecord, 

53 GovernorDimension, 

54 NamedKeyDict, 

55 NamedKeyMapping, 

56 NamedValueSet, 

57 SimpleQuery, 

58 SkyPixDimension, 

59 SkyPixSystem, 

60 SpatialRegionDatabaseRepresentation, 

61 TimespanDatabaseRepresentation, 

62) 

63from ..interfaces import ( 

64 Database, 

65 DatabaseDimensionOverlapStorage, 

66 DatabaseDimensionRecordStorage, 

67 GovernorDimensionRecordStorage, 

68 StaticTablesContext, 

69) 

70from ..queries import QueryBuilder 

71from ..wildcards import Ellipsis, EllipsisType 

72 

73 

74_LOG = logging.getLogger(__name__) 

75 

76 

77MAX_FETCH_CHUNK = 1000 

78"""Maximum number of data IDs we fetch records at a time. 

79 

80Barring something database-engine-specific, this sets the size of the actual 

81SQL query, not just the number of result rows, because the only way to query 

82for multiple data IDs in a single SELECT query via SQLAlchemy is to have an OR 

83term in the WHERE clause for each one. 

84""" 

85 

86 

87class TableDimensionRecordStorage(DatabaseDimensionRecordStorage): 

88 """A record storage implementation uses a regular database table. 

89 

90 Parameters 

91 ---------- 

92 db : `Database` 

93 Interface to the database engine and namespace that will hold these 

94 dimension records. 

95 element : `DatabaseDimensionElement` 

96 The element whose records this storage will manage. 

97 table : `sqlalchemy.schema.Table` 

98 The logical table for the element. 

99 skyPixOverlap : `_SkyPixOverlapStorage`, optional 

100 Object that manages the tables that hold materialized spatial overlap 

101 joins to skypix dimensions. Should be `None` if (and only if) 

102 ``element.spatial is None``. 

103 """ 

104 def __init__(self, db: Database, element: DatabaseDimensionElement, *, table: sqlalchemy.schema.Table, 

105 skyPixOverlap: Optional[_SkyPixOverlapStorage] = None): 

106 self._db = db 

107 self._table = table 

108 self._element = element 

109 self._fetchColumns: Dict[str, sqlalchemy.sql.ColumnElement] = { 

110 dimension.name: self._table.columns[name] 

111 for dimension, name in zip(self._element.dimensions, 

112 self._element.RecordClass.fields.dimensions.names) 

113 } 

114 self._skyPixOverlap = skyPixOverlap 

115 self._otherOverlaps: List[DatabaseDimensionOverlapStorage] = [] 

116 

117 @classmethod 

118 def initialize( 

119 cls, 

120 db: Database, 

121 element: DatabaseDimensionElement, *, 

122 context: Optional[StaticTablesContext] = None, 

123 config: Mapping[str, Any], 

124 governors: NamedKeyMapping[GovernorDimension, GovernorDimensionRecordStorage], 

125 ) -> DatabaseDimensionRecordStorage: 

126 # Docstring inherited from DatabaseDimensionRecordStorage. 

127 spec = element.RecordClass.fields.makeTableSpec( 

128 RegionReprClass=db.getSpatialRegionRepresentation(), 

129 TimespanReprClass=db.getTimespanRepresentation(), 

130 ) 

131 if context is not None: 131 ↛ 134line 131 didn't jump to line 134, because the condition on line 131 was never false

132 table = context.addTable(element.name, spec) 

133 else: 

134 table = db.ensureTableExists(element.name, spec) 

135 skyPixOverlap: Optional[_SkyPixOverlapStorage] 

136 if element.spatial is not None: 

137 governor = governors[element.spatial.governor] 

138 skyPixOverlap = _SkyPixOverlapStorage.initialize( 

139 db, 

140 element, 

141 context=context, 

142 governor=governor, 

143 ) 

144 result = cls(db, element, table=table, skyPixOverlap=skyPixOverlap) 

145 

146 # Whenever anyone inserts a new governor dimension value, we want 

147 # to enable overlaps for that value between this element and 

148 # commonSkyPix. 

149 def callback(record: DimensionRecord) -> None: 

150 skyPixOverlap.enable( # type: ignore 

151 result, 

152 element.universe.commonSkyPix, 

153 getattr(record, element.spatial.governor.primaryKey.name), # type: ignore 

154 ) 

155 

156 governor.registerInsertionListener(callback) 

157 return result 

158 else: 

159 return cls(db, element, table=table) 

160 

161 @property 

162 def element(self) -> DatabaseDimensionElement: 

163 # Docstring inherited from DimensionRecordStorage.element. 

164 return self._element 

165 

166 def clearCaches(self) -> None: 

167 # Docstring inherited from DimensionRecordStorage.clearCaches. 

168 pass 

169 

170 def join( 

171 self, 

172 builder: QueryBuilder, *, 

173 regions: Optional[NamedKeyDict[DimensionElement, SpatialRegionDatabaseRepresentation]] = None, 

174 timespans: Optional[NamedKeyDict[DimensionElement, TimespanDatabaseRepresentation]] = None, 

175 ) -> None: 

176 # Docstring inherited from DimensionRecordStorage. 

177 if regions is not None: 

178 dimensions = NamedValueSet(self.element.required) 

179 dimensions.add(self.element.universe.commonSkyPix) 

180 assert self._skyPixOverlap is not None 

181 builder.joinTable( 

182 self._skyPixOverlap.select(self.element.universe.commonSkyPix, Ellipsis), 

183 dimensions, 

184 ) 

185 regionsInTable = self._db.getSpatialRegionRepresentation().fromSelectable(self._table) 

186 regions[self.element] = regionsInTable 

187 joinOn = builder.startJoin(self._table, self.element.dimensions, 

188 self.element.RecordClass.fields.dimensions.names) 

189 if timespans is not None: 

190 timespanInTable = self._db.getTimespanRepresentation().fromSelectable(self._table) 

191 for timespanInQuery in timespans.values(): 191 ↛ 192line 191 didn't jump to line 192, because the loop on line 191 never started

192 joinOn.append(timespanInQuery.overlaps(timespanInTable)) 

193 timespans[self.element] = timespanInTable 

194 builder.finishJoin(self._table, joinOn) 

195 return self._table 

196 

197 def fetch(self, dataIds: DataCoordinateIterable) -> Iterable[DimensionRecord]: 

198 # Docstring inherited from DimensionRecordStorage.fetch. 

199 RecordClass = self.element.RecordClass 

200 query = SimpleQuery() 

201 query.columns.extend(self._table.columns[name] for name in RecordClass.fields.standard.names) 

202 if self.element.spatial is not None: 

203 query.columns.append(self._table.columns["region"]) 

204 if self.element.temporal is not None: 

205 TimespanReprClass = self._db.getTimespanRepresentation() 

206 query.columns.extend(self._table.columns[name] for name in TimespanReprClass.getFieldNames()) 

207 query.join(self._table) 

208 dataIds.constrain(query, lambda name: self._fetchColumns[name]) 

209 with warnings.catch_warnings(): 

210 # Some of our generated queries may contain cartesian joins, this 

211 # is not a serious issue as it is properly constrained, so we want 

212 # to suppress sqlalchemy warnings. 

213 warnings.filterwarnings("ignore", message="SELECT statement has a cartesian product", 

214 category=sqlalchemy.exc.SAWarning) 

215 for row in self._db.query(query.combine()): 

216 values = row._asdict() 

217 if self.element.temporal is not None: 

218 values[TimespanDatabaseRepresentation.NAME] = TimespanReprClass.extract(values) 

219 yield RecordClass(**values) 

220 

221 def insert(self, *records: DimensionRecord, replace: bool = False) -> None: 

222 # Docstring inherited from DimensionRecordStorage.insert. 

223 elementRows = [record.toDict() for record in records] 

224 if self.element.temporal is not None: 

225 TimespanReprClass = self._db.getTimespanRepresentation() 

226 for row in elementRows: 

227 timespan = row.pop(TimespanDatabaseRepresentation.NAME) 

228 TimespanReprClass.update(timespan, result=row) 

229 with self._db.transaction(): 

230 if replace: 

231 self._db.replace(self._table, *elementRows) 

232 else: 

233 self._db.insert(self._table, *elementRows) 

234 if self._skyPixOverlap is not None: 

235 self._skyPixOverlap.insert(records, replace=replace) 

236 

237 def sync(self, record: DimensionRecord, update: bool = False) -> Union[bool, Dict[str, Any]]: 

238 # Docstring inherited from DimensionRecordStorage.sync. 

239 compared = record.toDict() 

240 keys = {} 

241 for name in record.fields.required.names: 

242 keys[name] = compared.pop(name) 

243 if self.element.temporal is not None: 

244 TimespanReprClass = self._db.getTimespanRepresentation() 

245 timespan = compared.pop(TimespanDatabaseRepresentation.NAME) 

246 TimespanReprClass.update(timespan, result=compared) 

247 with self._db.transaction(): 

248 _, inserted_or_updated = self._db.sync( 

249 self._table, 

250 keys=keys, 

251 compared=compared, 

252 update=update, 

253 ) 

254 if inserted_or_updated and self._skyPixOverlap is not None: 

255 if inserted_or_updated is True: 

256 # Inserted a new row, so we just need to insert new overlap 

257 # rows. 

258 self._skyPixOverlap.insert([record]) 

259 elif "region" in inserted_or_updated: # type: ignore 259 ↛ 266line 259 didn't jump to line 266, because the condition on line 259 was never false

260 # Updated the region, so we need to delete old overlap rows 

261 # and insert new ones. 

262 # (mypy should be able to tell that inserted_or_updated 

263 # must be a dict if we get to this clause, but it can't) 

264 self._skyPixOverlap.insert([record], replace=True) 

265 # We updated something other than a region. 

266 return inserted_or_updated 

267 

268 def digestTables(self) -> Iterable[sqlalchemy.schema.Table]: 

269 # Docstring inherited from DimensionRecordStorage.digestTables. 

270 result = [self._table] 

271 if self._skyPixOverlap is not None: 

272 result.extend(self._skyPixOverlap.digestTables()) 

273 return result 

274 

275 def connect(self, overlaps: DatabaseDimensionOverlapStorage) -> None: 

276 # Docstring inherited from DatabaseDimensionRecordStorage. 

277 self._otherOverlaps.append(overlaps) 

278 

279 

280class _SkyPixOverlapStorage: 

281 """A helper object for `TableDimensionRecordStorage` that manages its 

282 materialized overlaps with skypix dimensions. 

283 

284 New instances should be constructed by calling `initialize`, not by calling 

285 the constructor directly. 

286 

287 Parameters 

288 ---------- 

289 db : `Database` 

290 Interface to the underlying database engine and namespace. 

291 element : `DatabaseDimensionElement` 

292 Dimension element whose overlaps are to be managed. 

293 summaryTable : `sqlalchemy.schema.Table` 

294 Table that records which combinations of skypix dimensions and 

295 governor dimension values have materialized overlap rows. 

296 overlapTable : `sqlalchemy.schema.Table` 

297 Table containing the actual materialized overlap rows. 

298 governor : `GovernorDimensionRecordStorage` 

299 Record storage backend for this element's governor dimension. 

300 

301 Notes 

302 ----- 

303 This class (and most importantly, the tables it relies on) can in principle 

304 manage overlaps between with any skypix dimension, but at present it is 

305 only being used to manage relationships with the special ``commonSkyPix`` 

306 dimension, because that's all the query system uses. Eventually, we expect 

307 to require users to explicitly materialize all relationships they will 

308 want to use in queries. 

309 

310 Other possible future improvements include: 

311 

312 - allowing finer-grained skypix dimensions to provide overlap rows for 

313 coarser ones, by dividing indices by powers of 4 (and possibly doing 

314 ``SELECT DISTINCT`` in the subquery to remove duplicates); 

315 

316 - allowing finer-grained database elements (e.g. patch) to provide overlap 

317 rows for coarser ones (e.g. tract), by ignoring irrelevant columns 

318 (e.g. the patch IDs) in the subquery (again, possible with 

319 ``SELECT DISTINCT``). 

320 

321 But there's no point to doing any of that until the query system can 

322 figure out how best to ask for overlap rows when an exact match isn't 

323 available. 

324 """ 

325 def __init__( 

326 self, 

327 db: Database, 

328 element: DatabaseDimensionElement, 

329 summaryTable: sqlalchemy.schema.Table, 

330 overlapTable: sqlalchemy.schema.Table, 

331 governor: GovernorDimensionRecordStorage, 

332 ): 

333 self._db = db 

334 self.element = element 

335 assert element.spatial is not None 

336 self._summaryTable = summaryTable 

337 self._overlapTable = overlapTable 

338 self._governor = governor 

339 

340 @classmethod 

341 def initialize( 

342 cls, 

343 db: Database, 

344 element: DatabaseDimensionElement, *, 

345 context: Optional[StaticTablesContext], 

346 governor: GovernorDimensionRecordStorage, 

347 ) -> _SkyPixOverlapStorage: 

348 """Construct a new instance, creating tables as needed. 

349 

350 Parameters 

351 ---------- 

352 db : `Database` 

353 Interface to the underlying database engine and namespace. 

354 element : `DatabaseDimensionElement` 

355 Dimension element whose overlaps are to be managed. 

356 context : `StaticTablesContext`, optional 

357 If provided, an object to use to create any new tables. If not 

358 provided, ``db.ensureTableExists`` should be used instead. 

359 governor : `GovernorDimensionRecordStorage` 

360 Record storage backend for this element's governor dimension. 

361 """ 

362 if context is not None: 362 ↛ 365line 362 didn't jump to line 365, because the condition on line 362 was never false

363 op = context.addTable 

364 else: 

365 op = db.ensureTableExists 

366 summaryTable = op( 

367 cls._SUMMARY_TABLE_NAME_SPEC.format(element=element), 

368 cls._makeSummaryTableSpec(element), 

369 ) 

370 overlapTable = op( 

371 cls._OVERLAP_TABLE_NAME_SPEC.format(element=element), 

372 cls._makeOverlapTableSpec(element), 

373 ) 

374 return _SkyPixOverlapStorage(db, element, summaryTable=summaryTable, overlapTable=overlapTable, 

375 governor=governor) 

376 

377 _SUMMARY_TABLE_NAME_SPEC = "{element.name}_skypix_overlap_summary" 

378 

379 @classmethod 

380 def _makeSummaryTableSpec(cls, element: DatabaseDimensionElement) -> ddl.TableSpec: 

381 """Create a specification for the table that records which combinations 

382 of skypix dimension and governor value have materialized overlaps. 

383 

384 Parameters 

385 ---------- 

386 element : `DatabaseDimensionElement` 

387 Dimension element whose overlaps are to be managed. 

388 

389 Returns 

390 ------- 

391 tableSpec : `ddl.TableSpec` 

392 Table specification. 

393 """ 

394 assert element.spatial is not None 

395 tableSpec = ddl.TableSpec( 

396 fields=[ 

397 ddl.FieldSpec( 

398 name="skypix_system", 

399 dtype=sqlalchemy.String, 

400 length=16, 

401 nullable=False, 

402 primaryKey=True, 

403 ), 

404 ddl.FieldSpec( 

405 name="skypix_level", 

406 dtype=sqlalchemy.SmallInteger, 

407 nullable=False, 

408 primaryKey=True, 

409 ), 

410 ] 

411 ) 

412 addDimensionForeignKey(tableSpec, element.spatial.governor, primaryKey=True) 

413 return tableSpec 

414 

415 _OVERLAP_TABLE_NAME_SPEC = "{element.name}_skypix_overlap" 

416 

417 @classmethod 

418 def _makeOverlapTableSpec(cls, element: DatabaseDimensionElement) -> ddl.TableSpec: 

419 """Create a specification for the table that holds materialized 

420 overlap rows. 

421 

422 Parameters 

423 ---------- 

424 element : `DatabaseDimensionElement` 

425 Dimension element whose overlaps are to be managed. 

426 

427 Returns 

428 ------- 

429 tableSpec : `ddl.TableSpec` 

430 Table specification. 

431 """ 

432 assert element.spatial is not None 

433 tableSpec = ddl.TableSpec( 

434 fields=[ 

435 ddl.FieldSpec( 

436 name="skypix_system", 

437 dtype=sqlalchemy.String, 

438 length=16, 

439 nullable=False, 

440 primaryKey=True, 

441 ), 

442 ddl.FieldSpec( 

443 name="skypix_level", 

444 dtype=sqlalchemy.SmallInteger, 

445 nullable=False, 

446 primaryKey=True, 

447 ), 

448 # (more columns added below) 

449 ], 

450 unique=set(), 

451 indexes={ 

452 # This index has the same fields as the PK, in a different 

453 # order, to facilitate queries that know skypix_index and want 

454 # to find the other element. 

455 ("skypix_system", "skypix_level", "skypix_index",) + tuple(element.graph.required.names), 

456 }, 

457 foreignKeys=[ 

458 # Foreign key to summary table. This makes sure we don't 

459 # materialize any overlaps without remembering that we've done 

460 # so in the summary table, though it can't prevent the converse 

461 # of adding a summary row without adding overlap row (either of 

462 # those is a logic bug, of course, but we want to be defensive 

463 # about those). Using ON DELETE CASCADE, it'd be very easy to 

464 # implement "disabling" an overlap materialization, because we 

465 # can just delete the summary row. 

466 # Note that the governor dimension column is added below, in 

467 # the call to addDimensionForeignKey. 

468 ddl.ForeignKeySpec( 

469 cls._SUMMARY_TABLE_NAME_SPEC.format(element=element), 

470 source=("skypix_system", "skypix_level", element.spatial.governor.name), 

471 target=("skypix_system", "skypix_level", element.spatial.governor.name), 

472 onDelete="CASCADE", 

473 ), 

474 ], 

475 ) 

476 # Add fields for the standard element this class manages overlaps for. 

477 # This is guaranteed to add a column for the governor dimension, 

478 # because that's a required dependency of element. 

479 for dimension in element.required: 

480 addDimensionForeignKey(tableSpec, dimension, primaryKey=True) 

481 # Add field for the actual skypix index. We do this later because I 

482 # think we care (at least a bit) about the order in which the primary 

483 # key is defined, in that we want a non-summary column like this one 

484 # to appear after the governor dimension column. 

485 tableSpec.fields.add( 

486 ddl.FieldSpec( 

487 name="skypix_index", 

488 dtype=sqlalchemy.BigInteger, 

489 nullable=False, 

490 primaryKey=True, 

491 ) 

492 ) 

493 return tableSpec 

494 

495 def enable( 

496 self, 

497 storage: TableDimensionRecordStorage, 

498 skypix: SkyPixDimension, 

499 governorValue: str, 

500 ) -> None: 

501 """Enable materialization of overlaps between a skypix dimension 

502 and the records of ``self.element`` with a particular governor value. 

503 

504 Parameters 

505 ---------- 

506 storage : `TableDimensionRecordStorage` 

507 Storage object for the records of ``self.element``. 

508 skypix : `SkyPixDimension` 

509 The skypix dimension (system and level) for which overlaps should 

510 be materialized. 

511 governorValue : `str` 

512 Value of this element's governor dimension for which overlaps 

513 should be materialized. For example, if ``self.element`` is 

514 ``visit``, this is an instrument name; if ``self.element`` is 

515 ``patch``, this is a skymap name. 

516 

517 Notes 

518 ----- 

519 If there are existing rows for the given ``governorValue``, overlap 

520 rows for them will be immediately computed and inserted. At present, 

521 that never happens, because we only enable overlaps with 

522 `DimensionUniverse.commonSkyPix`, and that happens immediately after 

523 each governor row is inserted (and there can't be any patch rows, 

524 for example, until after the corresponding skymap row is inserted). 

525 

526 After calling `enable` for a particular combination, any new records 

527 for ``self.element`` that are inserted will automatically be 

528 accompanied by overlap records (via calls to `insert` made 

529 by `TableDimensionRecordStorage` methods). 

530 """ 

531 # Because we're essentially materializing a view in Python, we 

532 # aggressively lock all tables we're reading and writing in order to be 

533 # sure nothing gets out of sync. This may not be the most efficient 

534 # approach possible, but we'll focus on correct before we focus on 

535 # fast, and enabling a new overlap combination should be a very rare 

536 # operation anyway, and never one we do in parallel. 

537 with self._db.transaction(lock=[self._governor.table, storage._table, 

538 self._summaryTable, self._overlapTable]): 

539 result, inserted = self._db.sync( 

540 self._summaryTable, 

541 keys={ 

542 "skypix_system": skypix.system.name, 

543 "skypix_level": skypix.level, 

544 self._governor.element.name: governorValue, 

545 }, 

546 ) 

547 if inserted: 547 ↛ 557line 547 didn't jump to line 557, because the condition on line 547 was never false

548 _LOG.debug( 

549 "Precomputing initial overlaps for %s vs %s for %s=%s", 

550 skypix.name, 

551 self.element.name, 

552 self._governor.element.name, 

553 governorValue 

554 ) 

555 self._fill(storage=storage, skypix=skypix, governorValue=governorValue) 

556 else: 

557 _LOG.debug( 

558 "Overlaps already precomputed for %s vs %s for %s=%s", 

559 skypix.name, 

560 self.element.name, 

561 self._governor.element.name, 

562 governorValue 

563 ) 

564 

565 def _fill( 

566 self, *, 

567 storage: TableDimensionRecordStorage, 

568 skypix: SkyPixDimension, 

569 governorValue: str, 

570 ) -> None: 

571 """Insert overlap records for a newly-enabled combination of skypix 

572 dimension and governor value. 

573 

574 This method should only be called by `enable`. 

575 

576 Parameters 

577 ---------- 

578 storage : `TableDimensionRecordStorage` 

579 Storage object for the records of ``self.element``. 

580 skypix : `SkyPixDimension` 

581 The skypix dimension (system and level) for which overlaps should 

582 be materialized. 

583 governorValue : `str` 

584 Value of this element's governor dimension for which overlaps 

585 should be materialized. For example, if ``self.element`` is 

586 ``visit``, this is an instrument name; if ``self.element`` is 

587 ``patch``, this is a skymap name. 

588 """ 

589 overlapRecords: List[dict] = [] 

590 # `DimensionRecordStorage.fetch` as defined by the ABC expects to be 

591 # given iterables of data IDs that correspond to that element's graph 

592 # (e.g. {instrument, visit, detector}), not just some subset of it 

593 # (e.g. {instrument}). But we know the implementation of `fetch` for 

594 # `TableDimensionRecordStorage will use this iterable to do exactly 

595 # what we want. 

596 governorDataId = DataCoordinate.standardize({self._governor.element.name: governorValue}, 

597 graph=self._governor.element.graph) 

598 for record in storage.fetch(DataCoordinateIterable.fromScalar(governorDataId)): 598 ↛ 599line 598 didn't jump to line 599, because the loop on line 598 never started

599 if record.region is None: 

600 continue 

601 baseOverlapRecord = record.dataId.byName() 

602 baseOverlapRecord["skypix_system"] = skypix.system.name 

603 baseOverlapRecord["skypix_level"] = skypix.level 

604 for begin, end in skypix.pixelization.envelope(record.region): 

605 overlapRecords.extend( 

606 dict(baseOverlapRecord, skypix_index=index) for index in range(begin, end) 

607 ) 

608 _LOG.debug( 

609 "Inserting %d initial overlap rows for %s vs %s for %s=%r", 

610 len(overlapRecords), 

611 skypix.name, 

612 self.element.name, 

613 self._governor.element.name, 

614 governorValue, 

615 ) 

616 self._db.insert(self._overlapTable, *overlapRecords) 

617 

618 def insert(self, records: Sequence[DimensionRecord], replace: bool = False) -> None: 

619 """Insert overlaps for a sequence of ``self.element`` records that 

620 have just been inserted. 

621 

622 This must be called by any method that inserts records for that 

623 element (i.e. `TableDimensionRecordStorage.insert` and 

624 `TableDimensionRecordStorage.sync`), within the same transaction. 

625 

626 Parameters 

627 ---------- 

628 records : `Sequence` [ `DimensionRecord` ] 

629 Records for ``self.element``. Records with `None` regions are 

630 ignored. 

631 replace : `bool`, optional 

632 If `True` (`False` is default) one or more of the given records may 

633 already exist and is being updated, so we need to delete any 

634 existing overlap records first. 

635 """ 

636 # Group records by family.governor value. 

637 grouped: Dict[str, List[DimensionRecord]] = defaultdict(list) 

638 for record in records: 

639 grouped[getattr(record, self._governor.element.name)].append(record) 

640 _LOG.debug( 

641 "Precomputing new skypix overlaps for %s where %s in %s.", 

642 self.element.name, self._governor.element.name, grouped.keys() 

643 ) 

644 # Make sure the set of combinations to materialize does not change 

645 # while we are materializing the ones we have, by locking the summary 

646 # table. Because we aren't planning to write to the summary table, 

647 # this could just be a SHARED lock instead of an EXCLUSIVE one, but 

648 # there's no API for that right now. 

649 with self._db.transaction(lock=[self._summaryTable]): 

650 # Query for the skypix dimensions to be associated with each 

651 # governor value. 

652 gvCol = self._summaryTable.columns[self._governor.element.name] 

653 sysCol = self._summaryTable.columns.skypix_system 

654 lvlCol = self._summaryTable.columns.skypix_level 

655 query = sqlalchemy.sql.select( 

656 gvCol, sysCol, lvlCol, 

657 ).select_from( 

658 self._summaryTable 

659 ).where( 

660 gvCol.in_(list(grouped.keys())) 

661 ) 

662 # Group results by governor value, then skypix system. 

663 skypix: Dict[str, NamedKeyDict[SkyPixSystem, List[int]]] = { 

664 gv: NamedKeyDict() for gv in grouped.keys() 

665 } 

666 for summaryRow in self._db.query(query).mappings(): 

667 system = self.element.universe.skypix[summaryRow[sysCol]] 

668 skypix[summaryRow[gvCol]].setdefault(system, []).append(summaryRow[lvlCol]) 

669 if replace: 

670 # Construct constraints for a DELETE query as a list of dicts. 

671 # We include the skypix_system and skypix_level column values 

672 # explicitly instead of just letting the query search for all 

673 # of those related to the given records, because they are the 

674 # first columns in the primary key, and hence searching with 

675 # them will be way faster (and we don't want to add a new index 

676 # just for this operation). 

677 to_delete: List[Dict[str, Any]] = [] 

678 for gv, skypix_systems in skypix.items(): 

679 for system, skypix_levels in skypix_systems.items(): 

680 to_delete.extend( 

681 {"skypix_system": system.name, "skypix_level": level, **record.dataId.byName()} 

682 for record, level in itertools.product(grouped[gv], skypix_levels) 

683 ) 

684 self._db.delete( 

685 self._overlapTable, 

686 ["skypix_system", "skypix_level"] + list(self.element.graph.required.names), 

687 *to_delete, 

688 ) 

689 overlapRecords: List[dict] = [] 

690 # Compute overlaps for one governor value at a time, but gather 

691 # them all up for one insert. 

692 for gv, group in grouped.items(): 

693 overlapRecords.extend(self._compute(group, skypix[gv], gv)) 

694 _LOG.debug( 

695 "Inserting %d new skypix overlap rows for %s where %s in %s.", 

696 len(overlapRecords), self.element.name, self._governor.element.name, grouped.keys() 

697 ) 

698 self._db.insert(self._overlapTable, *overlapRecords) 

699 

700 def _compute( 

701 self, 

702 records: Sequence[DimensionRecord], 

703 skypix: NamedKeyDict[SkyPixSystem, List[int]], 

704 governorValue: str, 

705 ) -> Iterator[dict]: 

706 """Compute all overlap rows for a particular governor dimension value 

707 and all of the skypix dimensions for which its overlaps are enabled. 

708 

709 This method should only be called by `insert`. 

710 

711 Parameters 

712 ---------- 

713 records : `Sequence` [ `DimensionRecord` ] 

714 Records for ``self.element``. Records with `None` regions are 

715 ignored. All must have the governor value given. 

716 skypix : `NamedKeyDict` [ `SkyPixSystem`, `list` [ `int` ] ] 

717 Mapping containing all skypix systems and levels for which overlaps 

718 should be computed, grouped by `SkyPixSystem`. 

719 governorValue : `str` 

720 Value of this element's governor dimension for which overlaps 

721 should be computed. For example, if ``self.element`` is ``visit``, 

722 this is an instrument name; if ``self.element`` is ``patch``, this 

723 is a skymap name. 

724 

725 Yields 

726 ------ 

727 row : `dict` 

728 Dictionary representing an overlap row. 

729 """ 

730 # Process input records one at time, computing all skypix indices for 

731 # each. 

732 for record in records: 

733 if record.region is None: 

734 continue 

735 assert getattr(record, self._governor.element.name) == governorValue 

736 for system, levels in skypix.items(): 

737 if not levels: 737 ↛ 738line 737 didn't jump to line 738, because the condition on line 737 was never true

738 continue 

739 baseOverlapRecord = record.dataId.byName() 

740 baseOverlapRecord["skypix_system"] = system.name 

741 levels.sort(reverse=True) 

742 # Start with the first level, which is the finest-grained one. 

743 # Compute skypix envelope indices directly for that. 

744 indices: Dict[int, Set[int]] = {levels[0]: set()} 

745 for begin, end in system[levels[0]].pixelization.envelope(record.region): 

746 indices[levels[0]].update(range(begin, end)) 

747 # Divide those indices by powers of 4 (and remove duplicates) 

748 # work our way up to the last (coarsest) level. 

749 for lastLevel, nextLevel in zip(levels[:-1], levels[1:]): 749 ↛ 750line 749 didn't jump to line 750, because the loop on line 749 never started

750 factor = 4**(lastLevel - nextLevel) 

751 indices[nextLevel] = {index//factor for index in indices[lastLevel]} 

752 for level in levels: 

753 yield from ( 

754 { 

755 "skypix_level": level, 

756 "skypix_index": index, 

757 **baseOverlapRecord, # type: ignore 

758 } for index in indices[level] 

759 ) 

760 

761 def select( 

762 self, 

763 skypix: SkyPixDimension, 

764 governorValues: Union[AbstractSet[str], EllipsisType], 

765 ) -> sqlalchemy.sql.FromClause: 

766 """Construct a subquery expression containing overlaps between the 

767 given skypix dimension and governor values. 

768 

769 Parameters 

770 ---------- 

771 skypix : `SkyPixDimension` 

772 The skypix dimension (system and level) for which overlaps should 

773 be materialized. 

774 governorValues : `str` 

775 Values of this element's governor dimension for which overlaps 

776 should be returned. For example, if ``self.element`` is ``visit``, 

777 this is a set of instrument names; if ``self.element`` is 

778 ``patch``, this is a set of skymap names. If ``...`` all values 

779 in the database are used (`GovernorDimensionRecordStorage.values`). 

780 

781 Returns 

782 ------- 

783 subquery : `sqlalchemy.sql.FromClause` 

784 A SELECT query with an alias, intended for use as a subquery, with 

785 columns equal to ``self.element.required.names`` + ``skypix.name``. 

786 """ 

787 if skypix != self.element.universe.commonSkyPix: 787 ↛ 792line 787 didn't jump to line 792

788 # We guarantee elsewhere that we always materialize all overlaps 

789 # vs. commonSkyPix, but for everything else, we need to check that 

790 # we have materialized this combination of governor values and 

791 # skypix. 

792 summaryWhere = [ 

793 self._summaryTable.columns.skypix_system == skypix.system.name, 

794 self._summaryTable.columns.skypix_level == skypix.level, 

795 ] 

796 gvCol = self._summaryTable.columns[self._governor.element.name] 

797 if governorValues is not Ellipsis: 

798 summaryWhere.append(gvCol.in_(list(governorValues))) 

799 summaryQuery = sqlalchemy.sql.select( 

800 gvCol 

801 ).select_from( 

802 self._summaryTable 

803 ).where( 

804 sqlalchemy.sql.and_(*summaryWhere) 

805 ) 

806 materializedGovernorValues = {row._mapping[gvCol] for row in self._db.query(summaryQuery)} 

807 if governorValues is Ellipsis: 

808 missingGovernorValues = self._governor.values - materializedGovernorValues 

809 else: 

810 missingGovernorValues = governorValues - materializedGovernorValues 

811 if missingGovernorValues: 

812 raise RuntimeError( 

813 f"Query requires an overlap join between {skypix.name} and {self.element.name} " 

814 f"(for {self._governor.element.name} in {missingGovernorValues}), but these " 

815 f"have not been materialized." 

816 ) 

817 columns = [self._overlapTable.columns.skypix_index.label(skypix.name)] 

818 columns.extend(self._overlapTable.columns[name] for name in self.element.graph.required.names) 

819 overlapWhere = [ 

820 self._overlapTable.columns.skypix_system == skypix.system.name, 

821 self._overlapTable.columns.skypix_level == skypix.level, 

822 ] 

823 if governorValues is not Ellipsis: 823 ↛ 824line 823 didn't jump to line 824, because the condition on line 823 was never true

824 overlapWhere.append( 

825 self._overlapTable.columns[self._governor.element.name].in_(list(governorValues)) 

826 ) 

827 overlapQuery = sqlalchemy.sql.select( 

828 *columns 

829 ).select_from( 

830 self._overlapTable 

831 ).where( 

832 sqlalchemy.sql.and_(*overlapWhere) 

833 ) 

834 return overlapQuery.alias(f"{self.element.name}_{skypix.name}_overlap") 

835 

836 def digestTables(self) -> Iterable[sqlalchemy.schema.Table]: 

837 """Return tables used for schema digest. 

838 

839 Returns 

840 ------- 

841 tables : `Iterable` [ `sqlalchemy.schema.Table` ] 

842 Possibly empty set of tables for schema digest calculations. 

843 """ 

844 return [self._summaryTable, self._overlapTable]