Coverage for python/lsst/daf/butler/registry/dimensions/table.py: 86%
236 statements
« prev ^ index » next coverage.py v7.5.0, created at 2024-04-24 23:49 -0700
« prev ^ index » next coverage.py v7.5.0, created at 2024-04-24 23:49 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ["TableDimensionRecordStorage"]
25import itertools
26import logging
27import warnings
28from collections import defaultdict
29from typing import AbstractSet, Any, Dict, Iterable, Iterator, List, Mapping, Optional, Sequence, Set, Union
31import sqlalchemy
33from ...core import (
34 DatabaseDimensionElement,
35 DataCoordinate,
36 DataCoordinateIterable,
37 DimensionElement,
38 DimensionRecord,
39 GovernorDimension,
40 NamedKeyDict,
41 NamedKeyMapping,
42 NamedValueSet,
43 SimpleQuery,
44 SkyPixDimension,
45 SkyPixSystem,
46 SpatialRegionDatabaseRepresentation,
47 TimespanDatabaseRepresentation,
48 addDimensionForeignKey,
49 ddl,
50)
51from ..interfaces import (
52 Database,
53 DatabaseDimensionOverlapStorage,
54 DatabaseDimensionRecordStorage,
55 GovernorDimensionRecordStorage,
56 StaticTablesContext,
57)
58from ..queries import QueryBuilder
59from ..wildcards import Ellipsis, EllipsisType
61_LOG = logging.getLogger(__name__)
64MAX_FETCH_CHUNK = 1000
65"""Maximum number of data IDs we fetch records at a time.
67Barring something database-engine-specific, this sets the size of the actual
68SQL query, not just the number of result rows, because the only way to query
69for multiple data IDs in a single SELECT query via SQLAlchemy is to have an OR
70term in the WHERE clause for each one.
71"""
74class TableDimensionRecordStorage(DatabaseDimensionRecordStorage):
75 """A record storage implementation uses a regular database table.
77 Parameters
78 ----------
79 db : `Database`
80 Interface to the database engine and namespace that will hold these
81 dimension records.
82 element : `DatabaseDimensionElement`
83 The element whose records this storage will manage.
84 table : `sqlalchemy.schema.Table`
85 The logical table for the element.
86 skyPixOverlap : `_SkyPixOverlapStorage`, optional
87 Object that manages the tables that hold materialized spatial overlap
88 joins to skypix dimensions. Should be `None` if (and only if)
89 ``element.spatial is None``.
90 """
92 def __init__(
93 self,
94 db: Database,
95 element: DatabaseDimensionElement,
96 *,
97 table: sqlalchemy.schema.Table,
98 skyPixOverlap: Optional[_SkyPixOverlapStorage] = None,
99 ):
100 self._db = db
101 self._table = table
102 self._element = element
103 self._fetchColumns: Dict[str, sqlalchemy.sql.ColumnElement] = {
104 dimension.name: self._table.columns[name]
105 for dimension, name in zip(
106 self._element.dimensions, self._element.RecordClass.fields.dimensions.names
107 )
108 }
109 self._skyPixOverlap = skyPixOverlap
110 self._otherOverlaps: List[DatabaseDimensionOverlapStorage] = []
112 @classmethod
113 def initialize(
114 cls,
115 db: Database,
116 element: DatabaseDimensionElement,
117 *,
118 context: Optional[StaticTablesContext] = None,
119 config: Mapping[str, Any],
120 governors: NamedKeyMapping[GovernorDimension, GovernorDimensionRecordStorage],
121 ) -> DatabaseDimensionRecordStorage:
122 # Docstring inherited from DatabaseDimensionRecordStorage.
123 spec = element.RecordClass.fields.makeTableSpec(
124 RegionReprClass=db.getSpatialRegionRepresentation(),
125 TimespanReprClass=db.getTimespanRepresentation(),
126 )
127 if context is not None: 127 ↛ 130line 127 didn't jump to line 130, because the condition on line 127 was never false
128 table = context.addTable(element.name, spec)
129 else:
130 table = db.ensureTableExists(element.name, spec)
131 skyPixOverlap: Optional[_SkyPixOverlapStorage]
132 if element.spatial is not None:
133 governor = governors[element.spatial.governor]
134 skyPixOverlap = _SkyPixOverlapStorage.initialize(
135 db,
136 element,
137 context=context,
138 governor=governor,
139 )
140 result = cls(db, element, table=table, skyPixOverlap=skyPixOverlap)
142 # Whenever anyone inserts a new governor dimension value, we want
143 # to enable overlaps for that value between this element and
144 # commonSkyPix.
145 def callback(record: DimensionRecord) -> None:
146 skyPixOverlap.enable( # type: ignore
147 result,
148 element.universe.commonSkyPix,
149 getattr(record, element.spatial.governor.primaryKey.name), # type: ignore
150 )
152 governor.registerInsertionListener(callback)
153 return result
154 else:
155 return cls(db, element, table=table)
157 @property
158 def element(self) -> DatabaseDimensionElement:
159 # Docstring inherited from DimensionRecordStorage.element.
160 return self._element
162 def clearCaches(self) -> None:
163 # Docstring inherited from DimensionRecordStorage.clearCaches.
164 pass
166 def join(
167 self,
168 builder: QueryBuilder,
169 *,
170 regions: Optional[NamedKeyDict[DimensionElement, SpatialRegionDatabaseRepresentation]] = None,
171 timespans: Optional[NamedKeyDict[DimensionElement, TimespanDatabaseRepresentation]] = None,
172 ) -> None:
173 # Docstring inherited from DimensionRecordStorage.
174 if regions is not None:
175 dimensions = NamedValueSet(self.element.required)
176 dimensions.add(self.element.universe.commonSkyPix)
177 assert self._skyPixOverlap is not None
178 builder.joinTable(
179 self._skyPixOverlap.select(self.element.universe.commonSkyPix, Ellipsis),
180 dimensions,
181 )
182 regionsInTable = self._db.getSpatialRegionRepresentation().fromSelectable(self._table)
183 regions[self.element] = regionsInTable
184 joinOn = builder.startJoin(
185 self._table, self.element.dimensions, self.element.RecordClass.fields.dimensions.names
186 )
187 if timespans is not None:
188 timespanInTable = self._db.getTimespanRepresentation().fromSelectable(self._table)
189 for timespanInQuery in timespans.values(): 189 ↛ 190line 189 didn't jump to line 190, because the loop on line 189 never started
190 joinOn.append(timespanInQuery.overlaps(timespanInTable))
191 timespans[self.element] = timespanInTable
192 builder.finishJoin(self._table, joinOn)
193 return self._table
195 def fetch(self, dataIds: DataCoordinateIterable) -> Iterable[DimensionRecord]:
196 # Docstring inherited from DimensionRecordStorage.fetch.
197 RecordClass = self.element.RecordClass
198 query = SimpleQuery()
199 query.columns.extend(self._table.columns[name] for name in RecordClass.fields.standard.names)
200 if self.element.spatial is not None:
201 query.columns.append(self._table.columns["region"])
202 if self.element.temporal is not None:
203 TimespanReprClass = self._db.getTimespanRepresentation()
204 query.columns.extend(self._table.columns[name] for name in TimespanReprClass.getFieldNames())
205 query.join(self._table)
206 dataIds.constrain(query, lambda name: self._fetchColumns[name])
207 with warnings.catch_warnings():
208 # Some of our generated queries may contain cartesian joins, this
209 # is not a serious issue as it is properly constrained, so we want
210 # to suppress sqlalchemy warnings.
211 warnings.filterwarnings(
212 "ignore",
213 message="SELECT statement has a cartesian product",
214 category=sqlalchemy.exc.SAWarning,
215 )
216 with self._db.query(query.combine()) as sql_result:
217 for row in sql_result.fetchall():
218 values = row._asdict()
219 if self.element.temporal is not None:
220 values[TimespanDatabaseRepresentation.NAME] = TimespanReprClass.extract(values)
221 yield RecordClass(**values)
223 def insert(self, *records: DimensionRecord, replace: bool = False, skip_existing: bool = False) -> None:
224 # Docstring inherited from DimensionRecordStorage.insert.
225 elementRows = [record.toDict() for record in records]
226 if self.element.temporal is not None:
227 TimespanReprClass = self._db.getTimespanRepresentation()
228 for row in elementRows:
229 timespan = row.pop(TimespanDatabaseRepresentation.NAME)
230 TimespanReprClass.update(timespan, result=row)
231 with self._db.transaction():
232 if replace:
233 self._db.replace(self._table, *elementRows)
234 elif skip_existing:
235 self._db.ensure(self._table, *elementRows, primary_key_only=True)
236 else:
237 self._db.insert(self._table, *elementRows)
238 if self._skyPixOverlap is not None:
239 self._skyPixOverlap.insert(records, replace=replace)
241 def sync(self, record: DimensionRecord, update: bool = False) -> Union[bool, Dict[str, Any]]:
242 # Docstring inherited from DimensionRecordStorage.sync.
243 compared = record.toDict()
244 keys = {}
245 for name in record.fields.required.names:
246 keys[name] = compared.pop(name)
247 if self.element.temporal is not None:
248 TimespanReprClass = self._db.getTimespanRepresentation()
249 timespan = compared.pop(TimespanDatabaseRepresentation.NAME)
250 TimespanReprClass.update(timespan, result=compared)
251 with self._db.transaction():
252 _, inserted_or_updated = self._db.sync(
253 self._table,
254 keys=keys,
255 compared=compared,
256 update=update,
257 )
258 if inserted_or_updated and self._skyPixOverlap is not None:
259 if inserted_or_updated is True:
260 # Inserted a new row, so we just need to insert new overlap
261 # rows.
262 self._skyPixOverlap.insert([record])
263 elif "region" in inserted_or_updated: 263 ↛ 251line 263 didn't jump to line 251
264 # Updated the region, so we need to delete old overlap rows
265 # and insert new ones.
266 # (mypy should be able to tell that inserted_or_updated
267 # must be a dict if we get to this clause, but it can't)
268 self._skyPixOverlap.insert([record], replace=True)
269 # We updated something other than a region.
270 return inserted_or_updated
272 def digestTables(self) -> Iterable[sqlalchemy.schema.Table]:
273 # Docstring inherited from DimensionRecordStorage.digestTables.
274 result = [self._table]
275 if self._skyPixOverlap is not None:
276 result.extend(self._skyPixOverlap.digestTables())
277 return result
279 def connect(self, overlaps: DatabaseDimensionOverlapStorage) -> None:
280 # Docstring inherited from DatabaseDimensionRecordStorage.
281 self._otherOverlaps.append(overlaps)
284class _SkyPixOverlapStorage:
285 """A helper object for `TableDimensionRecordStorage` that manages its
286 materialized overlaps with skypix dimensions.
288 New instances should be constructed by calling `initialize`, not by calling
289 the constructor directly.
291 Parameters
292 ----------
293 db : `Database`
294 Interface to the underlying database engine and namespace.
295 element : `DatabaseDimensionElement`
296 Dimension element whose overlaps are to be managed.
297 summaryTable : `sqlalchemy.schema.Table`
298 Table that records which combinations of skypix dimensions and
299 governor dimension values have materialized overlap rows.
300 overlapTable : `sqlalchemy.schema.Table`
301 Table containing the actual materialized overlap rows.
302 governor : `GovernorDimensionRecordStorage`
303 Record storage backend for this element's governor dimension.
305 Notes
306 -----
307 This class (and most importantly, the tables it relies on) can in principle
308 manage overlaps between with any skypix dimension, but at present it is
309 only being used to manage relationships with the special ``commonSkyPix``
310 dimension, because that's all the query system uses. Eventually, we expect
311 to require users to explicitly materialize all relationships they will
312 want to use in queries.
314 Other possible future improvements include:
316 - allowing finer-grained skypix dimensions to provide overlap rows for
317 coarser ones, by dividing indices by powers of 4 (and possibly doing
318 ``SELECT DISTINCT`` in the subquery to remove duplicates);
320 - allowing finer-grained database elements (e.g. patch) to provide overlap
321 rows for coarser ones (e.g. tract), by ignoring irrelevant columns
322 (e.g. the patch IDs) in the subquery (again, possible with
323 ``SELECT DISTINCT``).
325 But there's no point to doing any of that until the query system can
326 figure out how best to ask for overlap rows when an exact match isn't
327 available.
328 """
330 def __init__(
331 self,
332 db: Database,
333 element: DatabaseDimensionElement,
334 summaryTable: sqlalchemy.schema.Table,
335 overlapTable: sqlalchemy.schema.Table,
336 governor: GovernorDimensionRecordStorage,
337 ):
338 self._db = db
339 self.element = element
340 assert element.spatial is not None
341 self._summaryTable = summaryTable
342 self._overlapTable = overlapTable
343 self._governor = governor
345 @classmethod
346 def initialize(
347 cls,
348 db: Database,
349 element: DatabaseDimensionElement,
350 *,
351 context: Optional[StaticTablesContext],
352 governor: GovernorDimensionRecordStorage,
353 ) -> _SkyPixOverlapStorage:
354 """Construct a new instance, creating tables as needed.
356 Parameters
357 ----------
358 db : `Database`
359 Interface to the underlying database engine and namespace.
360 element : `DatabaseDimensionElement`
361 Dimension element whose overlaps are to be managed.
362 context : `StaticTablesContext`, optional
363 If provided, an object to use to create any new tables. If not
364 provided, ``db.ensureTableExists`` should be used instead.
365 governor : `GovernorDimensionRecordStorage`
366 Record storage backend for this element's governor dimension.
367 """
368 if context is not None: 368 ↛ 371line 368 didn't jump to line 371, because the condition on line 368 was never false
369 op = context.addTable
370 else:
371 op = db.ensureTableExists
372 summaryTable = op(
373 cls._SUMMARY_TABLE_NAME_SPEC.format(element=element),
374 cls._makeSummaryTableSpec(element),
375 )
376 overlapTable = op(
377 cls._OVERLAP_TABLE_NAME_SPEC.format(element=element),
378 cls._makeOverlapTableSpec(element),
379 )
380 return _SkyPixOverlapStorage(
381 db, element, summaryTable=summaryTable, overlapTable=overlapTable, governor=governor
382 )
384 _SUMMARY_TABLE_NAME_SPEC = "{element.name}_skypix_overlap_summary"
386 @classmethod
387 def _makeSummaryTableSpec(cls, element: DatabaseDimensionElement) -> ddl.TableSpec:
388 """Create a specification for the table that records which combinations
389 of skypix dimension and governor value have materialized overlaps.
391 Parameters
392 ----------
393 element : `DatabaseDimensionElement`
394 Dimension element whose overlaps are to be managed.
396 Returns
397 -------
398 tableSpec : `ddl.TableSpec`
399 Table specification.
400 """
401 assert element.spatial is not None
402 tableSpec = ddl.TableSpec(
403 fields=[
404 ddl.FieldSpec(
405 name="skypix_system",
406 dtype=sqlalchemy.String,
407 length=16,
408 nullable=False,
409 primaryKey=True,
410 ),
411 ddl.FieldSpec(
412 name="skypix_level",
413 dtype=sqlalchemy.SmallInteger,
414 nullable=False,
415 primaryKey=True,
416 ),
417 ]
418 )
419 addDimensionForeignKey(tableSpec, element.spatial.governor, primaryKey=True)
420 return tableSpec
422 _OVERLAP_TABLE_NAME_SPEC = "{element.name}_skypix_overlap"
424 @classmethod
425 def _makeOverlapTableSpec(cls, element: DatabaseDimensionElement) -> ddl.TableSpec:
426 """Create a specification for the table that holds materialized
427 overlap rows.
429 Parameters
430 ----------
431 element : `DatabaseDimensionElement`
432 Dimension element whose overlaps are to be managed.
434 Returns
435 -------
436 tableSpec : `ddl.TableSpec`
437 Table specification.
438 """
439 assert element.spatial is not None
440 tableSpec = ddl.TableSpec(
441 fields=[
442 ddl.FieldSpec(
443 name="skypix_system",
444 dtype=sqlalchemy.String,
445 length=16,
446 nullable=False,
447 primaryKey=True,
448 ),
449 ddl.FieldSpec(
450 name="skypix_level",
451 dtype=sqlalchemy.SmallInteger,
452 nullable=False,
453 primaryKey=True,
454 ),
455 # (more columns added below)
456 ],
457 unique=set(),
458 indexes={
459 # This index has the same fields as the PK, in a different
460 # order, to facilitate queries that know skypix_index and want
461 # to find the other element.
462 (
463 "skypix_system",
464 "skypix_level",
465 "skypix_index",
466 )
467 + tuple(element.graph.required.names),
468 },
469 foreignKeys=[
470 # Foreign key to summary table. This makes sure we don't
471 # materialize any overlaps without remembering that we've done
472 # so in the summary table, though it can't prevent the converse
473 # of adding a summary row without adding overlap row (either of
474 # those is a logic bug, of course, but we want to be defensive
475 # about those). Using ON DELETE CASCADE, it'd be very easy to
476 # implement "disabling" an overlap materialization, because we
477 # can just delete the summary row.
478 # Note that the governor dimension column is added below, in
479 # the call to addDimensionForeignKey.
480 ddl.ForeignKeySpec(
481 cls._SUMMARY_TABLE_NAME_SPEC.format(element=element),
482 source=("skypix_system", "skypix_level", element.spatial.governor.name),
483 target=("skypix_system", "skypix_level", element.spatial.governor.name),
484 onDelete="CASCADE",
485 ),
486 ],
487 )
488 # Add fields for the standard element this class manages overlaps for.
489 # This is guaranteed to add a column for the governor dimension,
490 # because that's a required dependency of element.
491 for dimension in element.required:
492 addDimensionForeignKey(tableSpec, dimension, primaryKey=True)
493 # Add field for the actual skypix index. We do this later because I
494 # think we care (at least a bit) about the order in which the primary
495 # key is defined, in that we want a non-summary column like this one
496 # to appear after the governor dimension column.
497 tableSpec.fields.add(
498 ddl.FieldSpec(
499 name="skypix_index",
500 dtype=sqlalchemy.BigInteger,
501 nullable=False,
502 primaryKey=True,
503 )
504 )
505 return tableSpec
507 def enable(
508 self,
509 storage: TableDimensionRecordStorage,
510 skypix: SkyPixDimension,
511 governorValue: str,
512 ) -> None:
513 """Enable materialization of overlaps between a skypix dimension
514 and the records of ``self.element`` with a particular governor value.
516 Parameters
517 ----------
518 storage : `TableDimensionRecordStorage`
519 Storage object for the records of ``self.element``.
520 skypix : `SkyPixDimension`
521 The skypix dimension (system and level) for which overlaps should
522 be materialized.
523 governorValue : `str`
524 Value of this element's governor dimension for which overlaps
525 should be materialized. For example, if ``self.element`` is
526 ``visit``, this is an instrument name; if ``self.element`` is
527 ``patch``, this is a skymap name.
529 Notes
530 -----
531 If there are existing rows for the given ``governorValue``, overlap
532 rows for them will be immediately computed and inserted. At present,
533 that never happens, because we only enable overlaps with
534 `DimensionUniverse.commonSkyPix`, and that happens immediately after
535 each governor row is inserted (and there can't be any patch rows,
536 for example, until after the corresponding skymap row is inserted).
538 After calling `enable` for a particular combination, any new records
539 for ``self.element`` that are inserted will automatically be
540 accompanied by overlap records (via calls to `insert` made
541 by `TableDimensionRecordStorage` methods).
542 """
543 # Because we're essentially materializing a view in Python, we
544 # aggressively lock all tables we're reading and writing in order to be
545 # sure nothing gets out of sync. This may not be the most efficient
546 # approach possible, but we'll focus on correct before we focus on
547 # fast, and enabling a new overlap combination should be a very rare
548 # operation anyway, and never one we do in parallel.
549 with self._db.transaction(
550 lock=[self._governor.table, storage._table, self._summaryTable, self._overlapTable]
551 ):
552 result, inserted = self._db.sync(
553 self._summaryTable,
554 keys={
555 "skypix_system": skypix.system.name,
556 "skypix_level": skypix.level,
557 self._governor.element.name: governorValue,
558 },
559 )
560 if inserted:
561 _LOG.debug(
562 "Precomputing initial overlaps for %s vs %s for %s=%s",
563 skypix.name,
564 self.element.name,
565 self._governor.element.name,
566 governorValue,
567 )
568 self._fill(storage=storage, skypix=skypix, governorValue=governorValue)
569 else:
570 _LOG.debug(
571 "Overlaps already precomputed for %s vs %s for %s=%s",
572 skypix.name,
573 self.element.name,
574 self._governor.element.name,
575 governorValue,
576 )
578 def _fill(
579 self,
580 *,
581 storage: TableDimensionRecordStorage,
582 skypix: SkyPixDimension,
583 governorValue: str,
584 ) -> None:
585 """Insert overlap records for a newly-enabled combination of skypix
586 dimension and governor value.
588 This method should only be called by `enable`.
590 Parameters
591 ----------
592 storage : `TableDimensionRecordStorage`
593 Storage object for the records of ``self.element``.
594 skypix : `SkyPixDimension`
595 The skypix dimension (system and level) for which overlaps should
596 be materialized.
597 governorValue : `str`
598 Value of this element's governor dimension for which overlaps
599 should be materialized. For example, if ``self.element`` is
600 ``visit``, this is an instrument name; if ``self.element`` is
601 ``patch``, this is a skymap name.
602 """
603 overlapRecords: List[dict] = []
604 # `DimensionRecordStorage.fetch` as defined by the ABC expects to be
605 # given iterables of data IDs that correspond to that element's graph
606 # (e.g. {instrument, visit, detector}), not just some subset of it
607 # (e.g. {instrument}). But we know the implementation of `fetch` for
608 # `TableDimensionRecordStorage will use this iterable to do exactly
609 # what we want.
610 governorDataId = DataCoordinate.standardize(
611 {self._governor.element.name: governorValue}, graph=self._governor.element.graph
612 )
613 for record in storage.fetch(DataCoordinateIterable.fromScalar(governorDataId)): 613 ↛ 614line 613 didn't jump to line 614, because the loop on line 613 never started
614 if record.region is None:
615 continue
616 baseOverlapRecord = record.dataId.byName()
617 baseOverlapRecord["skypix_system"] = skypix.system.name
618 baseOverlapRecord["skypix_level"] = skypix.level
619 for begin, end in skypix.pixelization.envelope(record.region):
620 overlapRecords.extend(
621 dict(baseOverlapRecord, skypix_index=index) for index in range(begin, end)
622 )
623 _LOG.debug(
624 "Inserting %d initial overlap rows for %s vs %s for %s=%r",
625 len(overlapRecords),
626 skypix.name,
627 self.element.name,
628 self._governor.element.name,
629 governorValue,
630 )
631 self._db.insert(self._overlapTable, *overlapRecords)
633 def insert(self, records: Sequence[DimensionRecord], replace: bool = False) -> None:
634 """Insert overlaps for a sequence of ``self.element`` records that
635 have just been inserted.
637 This must be called by any method that inserts records for that
638 element (i.e. `TableDimensionRecordStorage.insert` and
639 `TableDimensionRecordStorage.sync`), within the same transaction.
641 Parameters
642 ----------
643 records : `Sequence` [ `DimensionRecord` ]
644 Records for ``self.element``. Records with `None` regions are
645 ignored.
646 replace : `bool`, optional
647 If `True` (`False` is default) one or more of the given records may
648 already exist and is being updated, so we need to delete any
649 existing overlap records first.
650 """
651 # Group records by family.governor value.
652 grouped: Dict[str, List[DimensionRecord]] = defaultdict(list)
653 for record in records:
654 grouped[getattr(record, self._governor.element.name)].append(record)
655 _LOG.debug(
656 "Precomputing new skypix overlaps for %s where %s in %s.",
657 self.element.name,
658 self._governor.element.name,
659 grouped.keys(),
660 )
661 # Make sure the set of combinations to materialize does not change
662 # while we are materializing the ones we have, by locking the summary
663 # table. Because we aren't planning to write to the summary table,
664 # this could just be a SHARED lock instead of an EXCLUSIVE one, but
665 # there's no API for that right now.
666 with self._db.transaction(lock=[self._summaryTable]):
667 # Query for the skypix dimensions to be associated with each
668 # governor value.
669 gvCol = self._summaryTable.columns[self._governor.element.name]
670 sysCol = self._summaryTable.columns.skypix_system
671 lvlCol = self._summaryTable.columns.skypix_level
672 query = (
673 sqlalchemy.sql.select(
674 gvCol,
675 sysCol,
676 lvlCol,
677 )
678 .select_from(self._summaryTable)
679 .where(gvCol.in_(list(grouped.keys())))
680 )
681 # Group results by governor value, then skypix system.
682 skypix: Dict[str, NamedKeyDict[SkyPixSystem, List[int]]] = {
683 gv: NamedKeyDict() for gv in grouped.keys()
684 }
685 with self._db.query(query) as sql_result:
686 for summaryRow in sql_result.mappings():
687 system = self.element.universe.skypix[summaryRow[sysCol]]
688 skypix[summaryRow[gvCol]].setdefault(system, []).append(summaryRow[lvlCol])
689 if replace:
690 # Construct constraints for a DELETE query as a list of dicts.
691 # We include the skypix_system and skypix_level column values
692 # explicitly instead of just letting the query search for all
693 # of those related to the given records, because they are the
694 # first columns in the primary key, and hence searching with
695 # them will be way faster (and we don't want to add a new index
696 # just for this operation).
697 to_delete: List[Dict[str, Any]] = []
698 for gv, skypix_systems in skypix.items():
699 for system, skypix_levels in skypix_systems.items():
700 to_delete.extend(
701 {"skypix_system": system.name, "skypix_level": level, **record.dataId.byName()}
702 for record, level in itertools.product(grouped[gv], skypix_levels)
703 )
704 self._db.delete(
705 self._overlapTable,
706 ["skypix_system", "skypix_level"] + list(self.element.graph.required.names),
707 *to_delete,
708 )
709 overlapRecords: List[dict] = []
710 # Compute overlaps for one governor value at a time, but gather
711 # them all up for one insert.
712 for gv, group in grouped.items():
713 overlapRecords.extend(self._compute(group, skypix[gv], gv))
714 _LOG.debug(
715 "Inserting %d new skypix overlap rows for %s where %s in %s.",
716 len(overlapRecords),
717 self.element.name,
718 self._governor.element.name,
719 grouped.keys(),
720 )
721 self._db.insert(self._overlapTable, *overlapRecords)
723 def _compute(
724 self,
725 records: Sequence[DimensionRecord],
726 skypix: NamedKeyDict[SkyPixSystem, List[int]],
727 governorValue: str,
728 ) -> Iterator[dict]:
729 """Compute all overlap rows for a particular governor dimension value
730 and all of the skypix dimensions for which its overlaps are enabled.
732 This method should only be called by `insert`.
734 Parameters
735 ----------
736 records : `Sequence` [ `DimensionRecord` ]
737 Records for ``self.element``. Records with `None` regions are
738 ignored. All must have the governor value given.
739 skypix : `NamedKeyDict` [ `SkyPixSystem`, `list` [ `int` ] ]
740 Mapping containing all skypix systems and levels for which overlaps
741 should be computed, grouped by `SkyPixSystem`.
742 governorValue : `str`
743 Value of this element's governor dimension for which overlaps
744 should be computed. For example, if ``self.element`` is ``visit``,
745 this is an instrument name; if ``self.element`` is ``patch``, this
746 is a skymap name.
748 Yields
749 ------
750 row : `dict`
751 Dictionary representing an overlap row.
752 """
753 # Process input records one at time, computing all skypix indices for
754 # each.
755 for record in records:
756 if record.region is None:
757 continue
758 assert getattr(record, self._governor.element.name) == governorValue
759 for system, levels in skypix.items():
760 if not levels: 760 ↛ 761line 760 didn't jump to line 761, because the condition on line 760 was never true
761 continue
762 baseOverlapRecord = record.dataId.byName()
763 baseOverlapRecord["skypix_system"] = system.name
764 levels.sort(reverse=True)
765 # Start with the first level, which is the finest-grained one.
766 # Compute skypix envelope indices directly for that.
767 indices: Dict[int, Set[int]] = {levels[0]: set()}
768 for begin, end in system[levels[0]].pixelization.envelope(record.region):
769 indices[levels[0]].update(range(begin, end))
770 # Divide those indices by powers of 4 (and remove duplicates)
771 # work our way up to the last (coarsest) level.
772 for lastLevel, nextLevel in zip(levels[:-1], levels[1:]): 772 ↛ 773line 772 didn't jump to line 773, because the loop on line 772 never started
773 factor = 4 ** (lastLevel - nextLevel)
774 indices[nextLevel] = {index // factor for index in indices[lastLevel]}
775 for level in levels:
776 yield from (
777 {
778 "skypix_level": level,
779 "skypix_index": index,
780 **baseOverlapRecord, # type: ignore
781 }
782 for index in indices[level]
783 )
785 def select(
786 self,
787 skypix: SkyPixDimension,
788 governorValues: Union[AbstractSet[str], EllipsisType],
789 ) -> sqlalchemy.sql.FromClause:
790 """Construct a subquery expression containing overlaps between the
791 given skypix dimension and governor values.
793 Parameters
794 ----------
795 skypix : `SkyPixDimension`
796 The skypix dimension (system and level) for which overlaps should
797 be materialized.
798 governorValues : `str`
799 Values of this element's governor dimension for which overlaps
800 should be returned. For example, if ``self.element`` is ``visit``,
801 this is a set of instrument names; if ``self.element`` is
802 ``patch``, this is a set of skymap names. If ``...`` all values
803 in the database are used (`GovernorDimensionRecordStorage.values`).
805 Returns
806 -------
807 subquery : `sqlalchemy.sql.FromClause`
808 A SELECT query with an alias, intended for use as a subquery, with
809 columns equal to ``self.element.required.names`` + ``skypix.name``.
810 """
811 if skypix != self.element.universe.commonSkyPix: 811 ↛ 816line 811 didn't jump to line 816
812 # We guarantee elsewhere that we always materialize all overlaps
813 # vs. commonSkyPix, but for everything else, we need to check that
814 # we have materialized this combination of governor values and
815 # skypix.
816 summaryWhere = [
817 self._summaryTable.columns.skypix_system == skypix.system.name,
818 self._summaryTable.columns.skypix_level == skypix.level,
819 ]
820 gvCol = self._summaryTable.columns[self._governor.element.name]
821 if governorValues is not Ellipsis:
822 summaryWhere.append(gvCol.in_(list(governorValues)))
823 summaryQuery = (
824 sqlalchemy.sql.select(gvCol)
825 .select_from(self._summaryTable)
826 .where(sqlalchemy.sql.and_(*summaryWhere))
827 )
828 materializedGovernorValues = {row._mapping[gvCol] for row in self._db.query(summaryQuery)}
829 if governorValues is Ellipsis:
830 missingGovernorValues = self._governor.values - materializedGovernorValues
831 else:
832 missingGovernorValues = governorValues - materializedGovernorValues
833 if missingGovernorValues:
834 raise RuntimeError(
835 f"Query requires an overlap join between {skypix.name} and {self.element.name} "
836 f"(for {self._governor.element.name} in {missingGovernorValues}), but these "
837 f"have not been materialized."
838 )
839 columns = [self._overlapTable.columns.skypix_index.label(skypix.name)]
840 columns.extend(self._overlapTable.columns[name] for name in self.element.graph.required.names)
841 overlapWhere = [
842 self._overlapTable.columns.skypix_system == skypix.system.name,
843 self._overlapTable.columns.skypix_level == skypix.level,
844 ]
845 if governorValues is not Ellipsis: 845 ↛ 846line 845 didn't jump to line 846, because the condition on line 845 was never true
846 overlapWhere.append(
847 self._overlapTable.columns[self._governor.element.name].in_(list(governorValues))
848 )
849 overlapQuery = (
850 sqlalchemy.sql.select(*columns)
851 .select_from(self._overlapTable)
852 .where(sqlalchemy.sql.and_(*overlapWhere))
853 )
854 return overlapQuery.alias(f"{self.element.name}_{skypix.name}_overlap")
856 def digestTables(self) -> Iterable[sqlalchemy.schema.Table]:
857 """Return tables used for schema digest.
859 Returns
860 -------
861 tables : `Iterable` [ `sqlalchemy.schema.Table` ]
862 Possibly empty set of tables for schema digest calculations.
863 """
864 return [self._summaryTable, self._overlapTable]