Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ["TableDimensionRecordStorage"] 

24 

25from collections import defaultdict 

26import itertools 

27import logging 

28from typing import ( 

29 AbstractSet, 

30 Any, 

31 Dict, 

32 Iterable, 

33 Iterator, 

34 List, 

35 Mapping, 

36 Optional, 

37 Sequence, 

38 Set, 

39 Union, 

40) 

41 

42import sqlalchemy 

43 

44from ...core import ( 

45 addDimensionForeignKey, 

46 DatabaseDimensionElement, 

47 DataCoordinate, 

48 DataCoordinateIterable, 

49 ddl, 

50 DimensionElement, 

51 DimensionRecord, 

52 GovernorDimension, 

53 NamedKeyDict, 

54 NamedKeyMapping, 

55 NamedValueSet, 

56 SimpleQuery, 

57 SkyPixDimension, 

58 SkyPixSystem, 

59 SpatialRegionDatabaseRepresentation, 

60 TimespanDatabaseRepresentation, 

61) 

62from ..interfaces import ( 

63 Database, 

64 DatabaseDimensionOverlapStorage, 

65 DatabaseDimensionRecordStorage, 

66 GovernorDimensionRecordStorage, 

67 StaticTablesContext, 

68) 

69from ..queries import QueryBuilder 

70from ..wildcards import Ellipsis, EllipsisType 

71 

72 

73_LOG = logging.getLogger(__name__) 

74 

75 

76MAX_FETCH_CHUNK = 1000 

77"""Maximum number of data IDs we fetch records at a time. 

78 

79Barring something database-engine-specific, this sets the size of the actual 

80SQL query, not just the number of result rows, because the only way to query 

81for multiple data IDs in a single SELECT query via SQLAlchemy is to have an OR 

82term in the WHERE clause for each one. 

83""" 

84 

85 

86class TableDimensionRecordStorage(DatabaseDimensionRecordStorage): 

87 """A record storage implementation uses a regular database table. 

88 

89 Parameters 

90 ---------- 

91 db : `Database` 

92 Interface to the database engine and namespace that will hold these 

93 dimension records. 

94 element : `DatabaseDimensionElement` 

95 The element whose records this storage will manage. 

96 table : `sqlalchemy.schema.Table` 

97 The logical table for the element. 

98 skyPixOverlap : `_SkyPixOverlapStorage`, optional 

99 Object that manages the tables that hold materialized spatial overlap 

100 joins to skypix dimensions. Should be `None` if (and only if) 

101 ``element.spatial is None``. 

102 """ 

103 def __init__(self, db: Database, element: DatabaseDimensionElement, *, table: sqlalchemy.schema.Table, 

104 skyPixOverlap: Optional[_SkyPixOverlapStorage] = None): 

105 self._db = db 

106 self._table = table 

107 self._element = element 

108 self._fetchColumns: Dict[str, sqlalchemy.sql.ColumnElement] = { 

109 dimension.name: self._table.columns[name] 

110 for dimension, name in zip(self._element.dimensions, 

111 self._element.RecordClass.fields.dimensions.names) 

112 } 

113 self._skyPixOverlap = skyPixOverlap 

114 self._otherOverlaps: List[DatabaseDimensionOverlapStorage] = [] 

115 

116 @classmethod 

117 def initialize( 

118 cls, 

119 db: Database, 

120 element: DatabaseDimensionElement, *, 

121 context: Optional[StaticTablesContext] = None, 

122 config: Mapping[str, Any], 

123 governors: NamedKeyMapping[GovernorDimension, GovernorDimensionRecordStorage], 

124 ) -> DatabaseDimensionRecordStorage: 

125 # Docstring inherited from DatabaseDimensionRecordStorage. 

126 spec = element.RecordClass.fields.makeTableSpec( 

127 RegionReprClass=db.getSpatialRegionRepresentation(), 

128 TimespanReprClass=db.getTimespanRepresentation(), 

129 ) 

130 if context is not None: 130 ↛ 133line 130 didn't jump to line 133, because the condition on line 130 was never false

131 table = context.addTable(element.name, spec) 

132 else: 

133 table = db.ensureTableExists(element.name, spec) 

134 skyPixOverlap: Optional[_SkyPixOverlapStorage] 

135 if element.spatial is not None: 

136 governor = governors[element.spatial.governor] 

137 skyPixOverlap = _SkyPixOverlapStorage.initialize( 

138 db, 

139 element, 

140 context=context, 

141 governor=governor, 

142 ) 

143 result = cls(db, element, table=table, skyPixOverlap=skyPixOverlap) 

144 

145 # Whenever anyone inserts a new governor dimension value, we want 

146 # to enable overlaps for that value between this element and 

147 # commonSkyPix. 

148 def callback(record: DimensionRecord) -> None: 

149 skyPixOverlap.enable( # type: ignore 

150 result, 

151 element.universe.commonSkyPix, 

152 getattr(record, element.spatial.governor.primaryKey.name), # type: ignore 

153 ) 

154 

155 governor.registerInsertionListener(callback) 

156 return result 

157 else: 

158 return cls(db, element, table=table) 

159 

160 @property 

161 def element(self) -> DatabaseDimensionElement: 

162 # Docstring inherited from DimensionRecordStorage.element. 

163 return self._element 

164 

165 def clearCaches(self) -> None: 

166 # Docstring inherited from DimensionRecordStorage.clearCaches. 

167 pass 

168 

169 def join( 

170 self, 

171 builder: QueryBuilder, *, 

172 regions: Optional[NamedKeyDict[DimensionElement, SpatialRegionDatabaseRepresentation]] = None, 

173 timespans: Optional[NamedKeyDict[DimensionElement, TimespanDatabaseRepresentation]] = None, 

174 ) -> None: 

175 # Docstring inherited from DimensionRecordStorage. 

176 if regions is not None: 

177 dimensions = NamedValueSet(self.element.required) 

178 dimensions.add(self.element.universe.commonSkyPix) 

179 assert self._skyPixOverlap is not None 

180 builder.joinTable( 

181 self._skyPixOverlap.select(self.element.universe.commonSkyPix, Ellipsis), 

182 dimensions, 

183 ) 

184 regionsInTable = self._db.getSpatialRegionRepresentation().fromSelectable(self._table) 

185 regions[self.element] = regionsInTable 

186 joinOn = builder.startJoin(self._table, self.element.dimensions, 

187 self.element.RecordClass.fields.dimensions.names) 

188 if timespans is not None: 

189 timespanInTable = self._db.getTimespanRepresentation().fromSelectable(self._table) 

190 for timespanInQuery in timespans.values(): 190 ↛ 191line 190 didn't jump to line 191, because the loop on line 190 never started

191 joinOn.append(timespanInQuery.overlaps(timespanInTable)) 

192 timespans[self.element] = timespanInTable 

193 builder.finishJoin(self._table, joinOn) 

194 return self._table 

195 

196 def fetch(self, dataIds: DataCoordinateIterable) -> Iterable[DimensionRecord]: 

197 # Docstring inherited from DimensionRecordStorage.fetch. 

198 RecordClass = self.element.RecordClass 

199 query = SimpleQuery() 

200 query.columns.extend(self._table.columns[name] for name in RecordClass.fields.standard.names) 

201 if self.element.spatial is not None: 

202 query.columns.append(self._table.columns["region"]) 

203 if self.element.temporal is not None: 

204 TimespanReprClass = self._db.getTimespanRepresentation() 

205 query.columns.extend(self._table.columns[name] for name in TimespanReprClass.getFieldNames()) 

206 query.join(self._table) 

207 dataIds.constrain(query, lambda name: self._fetchColumns[name]) 

208 for row in self._db.query(query.combine()): 

209 values = row._asdict() 

210 if self.element.temporal is not None: 

211 values[TimespanDatabaseRepresentation.NAME] = TimespanReprClass.extract(values) 

212 yield RecordClass(**values) 

213 

214 def insert(self, *records: DimensionRecord, replace: bool = False) -> None: 

215 # Docstring inherited from DimensionRecordStorage.insert. 

216 elementRows = [record.toDict() for record in records] 

217 if self.element.temporal is not None: 

218 TimespanReprClass = self._db.getTimespanRepresentation() 

219 for row in elementRows: 

220 timespan = row.pop(TimespanDatabaseRepresentation.NAME) 

221 TimespanReprClass.update(timespan, result=row) 

222 with self._db.transaction(): 

223 if replace: 

224 self._db.replace(self._table, *elementRows) 

225 else: 

226 self._db.insert(self._table, *elementRows) 

227 if self._skyPixOverlap is not None: 

228 self._skyPixOverlap.insert(records, replace=replace) 

229 

230 def sync(self, record: DimensionRecord, update: bool = False) -> Union[bool, Dict[str, Any]]: 

231 # Docstring inherited from DimensionRecordStorage.sync. 

232 compared = record.toDict() 

233 keys = {} 

234 for name in record.fields.required.names: 

235 keys[name] = compared.pop(name) 

236 if self.element.temporal is not None: 

237 TimespanReprClass = self._db.getTimespanRepresentation() 

238 timespan = compared.pop(TimespanDatabaseRepresentation.NAME) 

239 TimespanReprClass.update(timespan, result=compared) 

240 with self._db.transaction(): 

241 _, inserted_or_updated = self._db.sync( 

242 self._table, 

243 keys=keys, 

244 compared=compared, 

245 update=update, 

246 ) 

247 if inserted_or_updated and self._skyPixOverlap is not None: 

248 if inserted_or_updated is True: 

249 # Inserted a new row, so we just need to insert new overlap 

250 # rows. 

251 self._skyPixOverlap.insert([record]) 

252 elif "region" in inserted_or_updated: # type: ignore 252 ↛ 259line 252 didn't jump to line 259, because the condition on line 252 was never false

253 # Updated the region, so we need to delete old overlap rows 

254 # and insert new ones. 

255 # (mypy should be able to tell that inserted_or_updated 

256 # must be a dict if we get to this clause, but it can't) 

257 self._skyPixOverlap.insert([record], replace=True) 

258 # We updated something other than a region. 

259 return inserted_or_updated 

260 

261 def digestTables(self) -> Iterable[sqlalchemy.schema.Table]: 

262 # Docstring inherited from DimensionRecordStorage.digestTables. 

263 result = [self._table] 

264 if self._skyPixOverlap is not None: 

265 result.extend(self._skyPixOverlap.digestTables()) 

266 return result 

267 

268 def connect(self, overlaps: DatabaseDimensionOverlapStorage) -> None: 

269 # Docstring inherited from DatabaseDimensionRecordStorage. 

270 self._otherOverlaps.append(overlaps) 

271 

272 

273class _SkyPixOverlapStorage: 

274 """A helper object for `TableDimensionRecordStorage` that manages its 

275 materialized overlaps with skypix dimensions. 

276 

277 New instances should be constructed by calling `initialize`, not by calling 

278 the constructor directly. 

279 

280 Parameters 

281 ---------- 

282 db : `Database` 

283 Interface to the underlying database engine and namespace. 

284 element : `DatabaseDimensionElement` 

285 Dimension element whose overlaps are to be managed. 

286 summaryTable : `sqlalchemy.schema.Table` 

287 Table that records which combinations of skypix dimensions and 

288 governor dimension values have materialized overlap rows. 

289 overlapTable : `sqlalchemy.schema.Table` 

290 Table containing the actual materialized overlap rows. 

291 governor : `GovernorDimensionRecordStorage` 

292 Record storage backend for this element's governor dimension. 

293 

294 Notes 

295 ----- 

296 This class (and most importantly, the tables it relies on) can in principle 

297 manage overlaps between with any skypix dimension, but at present it is 

298 only being used to manage relationships with the special ``commonSkyPix`` 

299 dimension, because that's all the query system uses. Eventually, we expect 

300 to require users to explicitly materialize all relationships they will 

301 want to use in queries. 

302 

303 Other possible future improvements include: 

304 

305 - allowing finer-grained skypix dimensions to provide overlap rows for 

306 coarser ones, by dividing indices by powers of 4 (and possibly doing 

307 ``SELECT DISTINCT`` in the subquery to remove duplicates); 

308 

309 - allowing finer-grained database elements (e.g. patch) to provide overlap 

310 rows for coarser ones (e.g. tract), by ignoring irrelevant columns 

311 (e.g. the patch IDs) in the subquery (again, possible with 

312 ``SELECT DISTINCT``). 

313 

314 But there's no point to doing any of that until the query system can 

315 figure out how best to ask for overlap rows when an exact match isn't 

316 available. 

317 """ 

318 def __init__( 

319 self, 

320 db: Database, 

321 element: DatabaseDimensionElement, 

322 summaryTable: sqlalchemy.schema.Table, 

323 overlapTable: sqlalchemy.schema.Table, 

324 governor: GovernorDimensionRecordStorage, 

325 ): 

326 self._db = db 

327 self.element = element 

328 assert element.spatial is not None 

329 self._summaryTable = summaryTable 

330 self._overlapTable = overlapTable 

331 self._governor = governor 

332 

333 @classmethod 

334 def initialize( 

335 cls, 

336 db: Database, 

337 element: DatabaseDimensionElement, *, 

338 context: Optional[StaticTablesContext], 

339 governor: GovernorDimensionRecordStorage, 

340 ) -> _SkyPixOverlapStorage: 

341 """Construct a new instance, creating tables as needed. 

342 

343 Parameters 

344 ---------- 

345 db : `Database` 

346 Interface to the underlying database engine and namespace. 

347 element : `DatabaseDimensionElement` 

348 Dimension element whose overlaps are to be managed. 

349 context : `StaticTablesContext`, optional 

350 If provided, an object to use to create any new tables. If not 

351 provided, ``db.ensureTableExists`` should be used instead. 

352 governor : `GovernorDimensionRecordStorage` 

353 Record storage backend for this element's governor dimension. 

354 """ 

355 if context is not None: 355 ↛ 358line 355 didn't jump to line 358, because the condition on line 355 was never false

356 op = context.addTable 

357 else: 

358 op = db.ensureTableExists 

359 summaryTable = op( 

360 cls._SUMMARY_TABLE_NAME_SPEC.format(element=element), 

361 cls._makeSummaryTableSpec(element), 

362 ) 

363 overlapTable = op( 

364 cls._OVERLAP_TABLE_NAME_SPEC.format(element=element), 

365 cls._makeOverlapTableSpec(element), 

366 ) 

367 return _SkyPixOverlapStorage(db, element, summaryTable=summaryTable, overlapTable=overlapTable, 

368 governor=governor) 

369 

370 _SUMMARY_TABLE_NAME_SPEC = "{element.name}_skypix_overlap_summary" 

371 

372 @classmethod 

373 def _makeSummaryTableSpec(cls, element: DatabaseDimensionElement) -> ddl.TableSpec: 

374 """Create a specification for the table that records which combinations 

375 of skypix dimension and governor value have materialized overlaps. 

376 

377 Parameters 

378 ---------- 

379 element : `DatabaseDimensionElement` 

380 Dimension element whose overlaps are to be managed. 

381 

382 Returns 

383 ------- 

384 tableSpec : `ddl.TableSpec` 

385 Table specification. 

386 """ 

387 assert element.spatial is not None 

388 tableSpec = ddl.TableSpec( 

389 fields=[ 

390 ddl.FieldSpec( 

391 name="skypix_system", 

392 dtype=sqlalchemy.String, 

393 length=16, 

394 nullable=False, 

395 primaryKey=True, 

396 ), 

397 ddl.FieldSpec( 

398 name="skypix_level", 

399 dtype=sqlalchemy.SmallInteger, 

400 nullable=False, 

401 primaryKey=True, 

402 ), 

403 ] 

404 ) 

405 addDimensionForeignKey(tableSpec, element.spatial.governor, primaryKey=True) 

406 return tableSpec 

407 

408 _OVERLAP_TABLE_NAME_SPEC = "{element.name}_skypix_overlap" 

409 

410 @classmethod 

411 def _makeOverlapTableSpec(cls, element: DatabaseDimensionElement) -> ddl.TableSpec: 

412 """Create a specification for the table that holds materialized 

413 overlap rows. 

414 

415 Parameters 

416 ---------- 

417 element : `DatabaseDimensionElement` 

418 Dimension element whose overlaps are to be managed. 

419 

420 Returns 

421 ------- 

422 tableSpec : `ddl.TableSpec` 

423 Table specification. 

424 """ 

425 assert element.spatial is not None 

426 tableSpec = ddl.TableSpec( 

427 fields=[ 

428 ddl.FieldSpec( 

429 name="skypix_system", 

430 dtype=sqlalchemy.String, 

431 length=16, 

432 nullable=False, 

433 primaryKey=True, 

434 ), 

435 ddl.FieldSpec( 

436 name="skypix_level", 

437 dtype=sqlalchemy.SmallInteger, 

438 nullable=False, 

439 primaryKey=True, 

440 ), 

441 # (more columns added below) 

442 ], 

443 unique=set(), 

444 foreignKeys=[ 

445 # Foreign key to summary table. This makes sure we don't 

446 # materialize any overlaps without remembering that we've done 

447 # so in the summary table, though it can't prevent the converse 

448 # of adding a summary row without adding overlap row (either of 

449 # those is a logic bug, of course, but we want to be defensive 

450 # about those). Using ON DELETE CASCADE, it'd be very easy to 

451 # implement "disabling" an overlap materialization, because we 

452 # can just delete the summary row. 

453 # Note that the governor dimension column is added below, in 

454 # the call to addDimensionForeignKey. 

455 ddl.ForeignKeySpec( 

456 cls._SUMMARY_TABLE_NAME_SPEC.format(element=element), 

457 source=("skypix_system", "skypix_level", element.spatial.governor.name), 

458 target=("skypix_system", "skypix_level", element.spatial.governor.name), 

459 onDelete="CASCADE", 

460 ), 

461 ], 

462 ) 

463 # Add fields for the standard element this class manages overlaps for. 

464 # This is guaranteed to add a column for the governor dimension, 

465 # because that's a required dependency of element. 

466 for dimension in element.required: 

467 addDimensionForeignKey(tableSpec, dimension, primaryKey=True) 

468 # Add field for the actual skypix index. We do this later because I 

469 # think we care (at least a bit) about the order in which the primary 

470 # key is defined, in that we want a non-summary column like this one 

471 # to appear after the governor dimension column. 

472 tableSpec.fields.add( 

473 ddl.FieldSpec( 

474 name="skypix_index", 

475 dtype=sqlalchemy.BigInteger, 

476 nullable=False, 

477 primaryKey=True, 

478 ) 

479 ) 

480 return tableSpec 

481 

482 def enable( 

483 self, 

484 storage: TableDimensionRecordStorage, 

485 skypix: SkyPixDimension, 

486 governorValue: str, 

487 ) -> None: 

488 """Enable materialization of overlaps between a skypix dimension 

489 and the records of ``self.element`` with a particular governor value. 

490 

491 Parameters 

492 ---------- 

493 storage : `TableDimensionRecordStorage` 

494 Storage object for the records of ``self.element``. 

495 skypix : `SkyPixDimension` 

496 The skypix dimension (system and level) for which overlaps should 

497 be materialized. 

498 governorValue : `str` 

499 Value of this element's governor dimension for which overlaps 

500 should be materialized. For example, if ``self.element`` is 

501 ``visit``, this is an instrument name; if ``self.element`` is 

502 ``patch``, this is a skymap name. 

503 

504 Notes 

505 ----- 

506 If there are existing rows for the given ``governorValue``, overlap 

507 rows for them will be immediately computed and inserted. At present, 

508 that never happens, because we only enable overlaps with 

509 `DimensionUniverse.commonSkyPix`, and that happens immediately after 

510 each governor row is inserted (and there can't be any patch rows, 

511 for example, until after the corresponding skymap row is inserted). 

512 

513 After calling `enable` for a particular combination, any new records 

514 for ``self.element`` that are inserted will automatically be 

515 accompanied by overlap records (via calls to `insert` made 

516 by `TableDimensionRecordStorage` methods). 

517 """ 

518 # Because we're essentially materializing a view in Python, we 

519 # aggressively lock all tables we're reading and writing in order to be 

520 # sure nothing gets out of sync. This may not be the most efficient 

521 # approach possible, but we'll focus on correct before we focus on 

522 # fast, and enabling a new overlap combination should be a very rare 

523 # operation anyway, and never one we do in parallel. 

524 with self._db.transaction(lock=[self._governor.table, storage._table, 

525 self._summaryTable, self._overlapTable]): 

526 result, inserted = self._db.sync( 

527 self._summaryTable, 

528 keys={ 

529 "skypix_system": skypix.system.name, 

530 "skypix_level": skypix.level, 

531 self._governor.element.name: governorValue, 

532 }, 

533 ) 

534 if inserted: 534 ↛ 544line 534 didn't jump to line 544, because the condition on line 534 was never false

535 _LOG.debug( 

536 "Precomputing initial overlaps for %s vs %s for %s=%s", 

537 skypix.name, 

538 self.element.name, 

539 self._governor.element.name, 

540 governorValue 

541 ) 

542 self._fill(storage=storage, skypix=skypix, governorValue=governorValue) 

543 else: 

544 _LOG.debug( 

545 "Overlaps already precomputed for %s vs %s for %s=%s", 

546 skypix.name, 

547 self.element.name, 

548 self._governor.element.name, 

549 governorValue 

550 ) 

551 

552 def _fill( 

553 self, *, 

554 storage: TableDimensionRecordStorage, 

555 skypix: SkyPixDimension, 

556 governorValue: str, 

557 ) -> None: 

558 """Insert overlap records for a newly-enabled combination of skypix 

559 dimension and governor value. 

560 

561 This method should only be called by `enable`. 

562 

563 Parameters 

564 ---------- 

565 storage : `TableDimensionRecordStorage` 

566 Storage object for the records of ``self.element``. 

567 skypix : `SkyPixDimension` 

568 The skypix dimension (system and level) for which overlaps should 

569 be materialized. 

570 governorValue : `str` 

571 Value of this element's governor dimension for which overlaps 

572 should be materialized. For example, if ``self.element`` is 

573 ``visit``, this is an instrument name; if ``self.element`` is 

574 ``patch``, this is a skymap name. 

575 """ 

576 overlapRecords: List[dict] = [] 

577 # `DimensionRecordStorage.fetch` as defined by the ABC expects to be 

578 # given iterables of data IDs that correspond to that element's graph 

579 # (e.g. {instrument, visit, detector}), not just some subset of it 

580 # (e.g. {instrument}). But we know the implementation of `fetch` for 

581 # `TableDimensionRecordStorage will use this iterable to do exactly 

582 # what we want. 

583 governorDataId = DataCoordinate.standardize({self._governor.element.name: governorValue}, 

584 graph=self._governor.element.graph) 

585 for record in storage.fetch(DataCoordinateIterable.fromScalar(governorDataId)): 585 ↛ 586line 585 didn't jump to line 586, because the loop on line 585 never started

586 if record.region is None: 

587 continue 

588 baseOverlapRecord = record.dataId.byName() 

589 baseOverlapRecord["skypix_system"] = skypix.system.name 

590 baseOverlapRecord["skypix_level"] = skypix.level 

591 for begin, end in skypix.pixelization.envelope(record.region): 

592 overlapRecords.extend( 

593 dict(baseOverlapRecord, skypix_index=index) for index in range(begin, end) 

594 ) 

595 _LOG.debug( 

596 "Inserting %d initial overlap rows for %s vs %s for %s=%r", 

597 len(overlapRecords), 

598 skypix.name, 

599 self.element.name, 

600 self._governor.element.name, 

601 governorValue, 

602 ) 

603 self._db.insert(self._overlapTable, *overlapRecords) 

604 

605 def insert(self, records: Sequence[DimensionRecord], replace: bool = False) -> None: 

606 """Insert overlaps for a sequence of ``self.element`` records that 

607 have just been inserted. 

608 

609 This must be called by any method that inserts records for that 

610 element (i.e. `TableDimensionRecordStorage.insert` and 

611 `TableDimensionRecordStorage.sync`), within the same transaction. 

612 

613 Parameters 

614 ---------- 

615 records : `Sequence` [ `DimensionRecord` ] 

616 Records for ``self.element``. Records with `None` regions are 

617 ignored. 

618 replace : `bool`, optional 

619 If `True` (`False` is default) one or more of the given records may 

620 already exist and is being updated, so we need to delete any 

621 existing overlap records first. 

622 """ 

623 # Group records by family.governor value. 

624 grouped: Dict[str, List[DimensionRecord]] = defaultdict(list) 

625 for record in records: 

626 grouped[getattr(record, self._governor.element.name)].append(record) 

627 _LOG.debug( 

628 "Precomputing new skypix overlaps for %s where %s in %s.", 

629 self.element.name, self._governor.element.name, grouped.keys() 

630 ) 

631 # Make sure the set of combinations to materialize does not change 

632 # while we are materializing the ones we have, by locking the summary 

633 # table. Because we aren't planning to write to the summary table, 

634 # this could just be a SHARED lock instead of an EXCLUSIVE one, but 

635 # there's no API for that right now. 

636 with self._db.transaction(lock=[self._summaryTable]): 

637 # Query for the skypix dimensions to be associated with each 

638 # governor value. 

639 gvCol = self._summaryTable.columns[self._governor.element.name] 

640 sysCol = self._summaryTable.columns.skypix_system 

641 lvlCol = self._summaryTable.columns.skypix_level 

642 query = sqlalchemy.sql.select( 

643 gvCol, sysCol, lvlCol, 

644 ).select_from( 

645 self._summaryTable 

646 ).where( 

647 gvCol.in_(list(grouped.keys())) 

648 ) 

649 # Group results by governor value, then skypix system. 

650 skypix: Dict[str, NamedKeyDict[SkyPixSystem, List[int]]] = { 

651 gv: NamedKeyDict() for gv in grouped.keys() 

652 } 

653 for summaryRow in self._db.query(query).mappings(): 

654 system = self.element.universe.skypix[summaryRow[sysCol]] 

655 skypix[summaryRow[gvCol]].setdefault(system, []).append(summaryRow[lvlCol]) 

656 if replace: 

657 # Construct constraints for a DELETE query as a list of dicts. 

658 # We include the skypix_system and skypix_level column values 

659 # explicitly instead of just letting the query search for all 

660 # of those related to the given records, because they are the 

661 # first columns in the primary key, and hence searching with 

662 # them will be way faster (and we don't want to add a new index 

663 # just for this operation). 

664 to_delete: List[Dict[str, Any]] = [] 

665 for gv, skypix_systems in skypix.items(): 

666 for system, skypix_levels in skypix_systems.items(): 

667 to_delete.extend( 

668 {"skypix_system": system.name, "skypix_level": level, **record.dataId.byName()} 

669 for record, level in itertools.product(grouped[gv], skypix_levels) 

670 ) 

671 self._db.delete( 

672 self._overlapTable, 

673 ["skypix_system", "skypix_level"] + list(self.element.graph.required.names), 

674 *to_delete, 

675 ) 

676 overlapRecords: List[dict] = [] 

677 # Compute overlaps for one governor value at a time, but gather 

678 # them all up for one insert. 

679 for gv, group in grouped.items(): 

680 overlapRecords.extend(self._compute(group, skypix[gv], gv)) 

681 _LOG.debug( 

682 "Inserting %d new skypix overlap rows for %s where %s in %s.", 

683 len(overlapRecords), self.element.name, self._governor.element.name, grouped.keys() 

684 ) 

685 self._db.insert(self._overlapTable, *overlapRecords) 

686 

687 def _compute( 

688 self, 

689 records: Sequence[DimensionRecord], 

690 skypix: NamedKeyDict[SkyPixSystem, List[int]], 

691 governorValue: str, 

692 ) -> Iterator[dict]: 

693 """Compute all overlap rows for a particular governor dimension value 

694 and all of the skypix dimensions for which its overlaps are enabled. 

695 

696 This method should only be called by `insert`. 

697 

698 Parameters 

699 ---------- 

700 records : `Sequence` [ `DimensionRecord` ] 

701 Records for ``self.element``. Records with `None` regions are 

702 ignored. All must have the governor value given. 

703 skypix : `NamedKeyDict` [ `SkyPixSystem`, `list` [ `int` ] ] 

704 Mapping containing all skypix systems and levels for which overlaps 

705 should be computed, grouped by `SkyPixSystem`. 

706 governorValue : `str` 

707 Value of this element's governor dimension for which overlaps 

708 should be computed. For example, if ``self.element`` is ``visit``, 

709 this is an instrument name; if ``self.element`` is ``patch``, this 

710 is a skymap name. 

711 

712 Yields 

713 ------ 

714 row : `dict` 

715 Dictionary representing an overlap row. 

716 """ 

717 # Process input records one at time, computing all skypix indices for 

718 # each. 

719 for record in records: 

720 if record.region is None: 

721 continue 

722 assert getattr(record, self._governor.element.name) == governorValue 

723 for system, levels in skypix.items(): 

724 if not levels: 724 ↛ 725line 724 didn't jump to line 725, because the condition on line 724 was never true

725 continue 

726 baseOverlapRecord = record.dataId.byName() 

727 baseOverlapRecord["skypix_system"] = system.name 

728 levels.sort(reverse=True) 

729 # Start with the first level, which is the finest-grained one. 

730 # Compute skypix envelope indices directly for that. 

731 indices: Dict[int, Set[int]] = {levels[0]: set()} 

732 for begin, end in system[levels[0]].pixelization.envelope(record.region): 

733 indices[levels[0]].update(range(begin, end)) 

734 # Divide those indices by powers of 4 (and remove duplicates) 

735 # work our way up to the last (coarsest) level. 

736 for lastLevel, nextLevel in zip(levels[:-1], levels[1:]): 736 ↛ 737line 736 didn't jump to line 737, because the loop on line 736 never started

737 factor = 4**(lastLevel - nextLevel) 

738 indices[nextLevel] = {index//factor for index in indices[lastLevel]} 

739 for level in levels: 

740 yield from ( 

741 { 

742 "skypix_level": level, 

743 "skypix_index": index, 

744 **baseOverlapRecord, # type: ignore 

745 } for index in indices[level] 

746 ) 

747 

748 def select( 

749 self, 

750 skypix: SkyPixDimension, 

751 governorValues: Union[AbstractSet[str], EllipsisType], 

752 ) -> sqlalchemy.sql.FromClause: 

753 """Construct a subquery expression containing overlaps between the 

754 given skypix dimension and governor values. 

755 

756 Parameters 

757 ---------- 

758 skypix : `SkyPixDimension` 

759 The skypix dimension (system and level) for which overlaps should 

760 be materialized. 

761 governorValues : `str` 

762 Values of this element's governor dimension for which overlaps 

763 should be returned. For example, if ``self.element`` is ``visit``, 

764 this is a set of instrument names; if ``self.element`` is 

765 ``patch``, this is a set of skymap names. If ``...`` all values 

766 in the database are used (`GovernorDimensionRecordStorage.values`). 

767 

768 Returns 

769 ------- 

770 subquery : `sqlalchemy.sql.FromClause` 

771 A SELECT query with an alias, intended for use as a subquery, with 

772 columns equal to ``self.element.required.names`` + ``skypix.name``. 

773 """ 

774 if skypix != self.element.universe.commonSkyPix: 774 ↛ 779line 774 didn't jump to line 779

775 # We guarantee elsewhere that we always materialize all overlaps 

776 # vs. commonSkyPix, but for everything else, we need to check that 

777 # we have materialized this combination of governor values and 

778 # skypix. 

779 summaryWhere = [ 

780 self._summaryTable.columns.skypix_system == skypix.system.name, 

781 self._summaryTable.columns.skypix_level == skypix.level, 

782 ] 

783 gvCol = self._summaryTable.columns[self._governor.element.name] 

784 if governorValues is not Ellipsis: 

785 summaryWhere.append(gvCol.in_(list(governorValues))) 

786 summaryQuery = sqlalchemy.sql.select( 

787 gvCol 

788 ).select_from( 

789 self._summaryTable 

790 ).where( 

791 sqlalchemy.sql.and_(*summaryWhere) 

792 ) 

793 materializedGovernorValues = {row._mapping[gvCol] for row in self._db.query(summaryQuery)} 

794 if governorValues is Ellipsis: 

795 missingGovernorValues = self._governor.values - materializedGovernorValues 

796 else: 

797 missingGovernorValues = governorValues - materializedGovernorValues 

798 if missingGovernorValues: 

799 raise RuntimeError( 

800 f"Query requires an overlap join between {skypix.name} and {self.element.name} " 

801 f"(for {self._governor.element.name} in {missingGovernorValues}), but these " 

802 f"have not been materialized." 

803 ) 

804 columns = [self._overlapTable.columns.skypix_index.label(skypix.name)] 

805 columns.extend(self._overlapTable.columns[name] for name in self.element.graph.required.names) 

806 overlapWhere = [ 

807 self._overlapTable.columns.skypix_system == skypix.system.name, 

808 self._overlapTable.columns.skypix_level == skypix.level, 

809 ] 

810 if governorValues is not Ellipsis: 810 ↛ 811line 810 didn't jump to line 811, because the condition on line 810 was never true

811 overlapWhere.append( 

812 self._overlapTable.columns[self._governor.element.name].in_(list(governorValues)) 

813 ) 

814 overlapQuery = sqlalchemy.sql.select( 

815 *columns 

816 ).select_from( 

817 self._overlapTable 

818 ).where( 

819 sqlalchemy.sql.and_(*overlapWhere) 

820 ) 

821 return overlapQuery.alias(f"{self.element.name}_{skypix.name}_overlap") 

822 

823 def digestTables(self) -> Iterable[sqlalchemy.schema.Table]: 

824 """Return tables used for schema digest. 

825 

826 Returns 

827 ------- 

828 tables : `Iterable` [ `sqlalchemy.schema.Table` ] 

829 Possibly empty set of tables for schema digest calculations. 

830 """ 

831 return [self._summaryTable, self._overlapTable]