Coverage for python/lsst/daf/butler/registry/dimensions/table.py: 86%
234 statements
« prev ^ index » next coverage.py v6.4.1, created at 2022-06-17 02:07 -0700
« prev ^ index » next coverage.py v6.4.1, created at 2022-06-17 02:07 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ["TableDimensionRecordStorage"]
25import itertools
26import logging
27import warnings
28from collections import defaultdict
29from typing import AbstractSet, Any, Dict, Iterable, Iterator, List, Mapping, Optional, Sequence, Set, Union
31import sqlalchemy
33from ...core import (
34 DatabaseDimensionElement,
35 DataCoordinate,
36 DataCoordinateIterable,
37 DimensionElement,
38 DimensionRecord,
39 GovernorDimension,
40 NamedKeyDict,
41 NamedKeyMapping,
42 NamedValueSet,
43 SimpleQuery,
44 SkyPixDimension,
45 SkyPixSystem,
46 SpatialRegionDatabaseRepresentation,
47 TimespanDatabaseRepresentation,
48 addDimensionForeignKey,
49 ddl,
50)
51from ..interfaces import (
52 Database,
53 DatabaseDimensionOverlapStorage,
54 DatabaseDimensionRecordStorage,
55 GovernorDimensionRecordStorage,
56 StaticTablesContext,
57)
58from ..queries import QueryBuilder
59from ..wildcards import Ellipsis, EllipsisType
61_LOG = logging.getLogger(__name__)
64MAX_FETCH_CHUNK = 1000
65"""Maximum number of data IDs we fetch records at a time.
67Barring something database-engine-specific, this sets the size of the actual
68SQL query, not just the number of result rows, because the only way to query
69for multiple data IDs in a single SELECT query via SQLAlchemy is to have an OR
70term in the WHERE clause for each one.
71"""
74class TableDimensionRecordStorage(DatabaseDimensionRecordStorage):
75 """A record storage implementation uses a regular database table.
77 Parameters
78 ----------
79 db : `Database`
80 Interface to the database engine and namespace that will hold these
81 dimension records.
82 element : `DatabaseDimensionElement`
83 The element whose records this storage will manage.
84 table : `sqlalchemy.schema.Table`
85 The logical table for the element.
86 skyPixOverlap : `_SkyPixOverlapStorage`, optional
87 Object that manages the tables that hold materialized spatial overlap
88 joins to skypix dimensions. Should be `None` if (and only if)
89 ``element.spatial is None``.
90 """
92 def __init__(
93 self,
94 db: Database,
95 element: DatabaseDimensionElement,
96 *,
97 table: sqlalchemy.schema.Table,
98 skyPixOverlap: Optional[_SkyPixOverlapStorage] = None,
99 ):
100 self._db = db
101 self._table = table
102 self._element = element
103 self._fetchColumns: Dict[str, sqlalchemy.sql.ColumnElement] = {
104 dimension.name: self._table.columns[name]
105 for dimension, name in zip(
106 self._element.dimensions, self._element.RecordClass.fields.dimensions.names
107 )
108 }
109 self._skyPixOverlap = skyPixOverlap
110 self._otherOverlaps: List[DatabaseDimensionOverlapStorage] = []
112 @classmethod
113 def initialize(
114 cls,
115 db: Database,
116 element: DatabaseDimensionElement,
117 *,
118 context: Optional[StaticTablesContext] = None,
119 config: Mapping[str, Any],
120 governors: NamedKeyMapping[GovernorDimension, GovernorDimensionRecordStorage],
121 ) -> DatabaseDimensionRecordStorage:
122 # Docstring inherited from DatabaseDimensionRecordStorage.
123 spec = element.RecordClass.fields.makeTableSpec(
124 RegionReprClass=db.getSpatialRegionRepresentation(),
125 TimespanReprClass=db.getTimespanRepresentation(),
126 )
127 if context is not None: 127 ↛ 130line 127 didn't jump to line 130, because the condition on line 127 was never false
128 table = context.addTable(element.name, spec)
129 else:
130 table = db.ensureTableExists(element.name, spec)
131 skyPixOverlap: Optional[_SkyPixOverlapStorage]
132 if element.spatial is not None:
133 governor = governors[element.spatial.governor]
134 skyPixOverlap = _SkyPixOverlapStorage.initialize(
135 db,
136 element,
137 context=context,
138 governor=governor,
139 )
140 result = cls(db, element, table=table, skyPixOverlap=skyPixOverlap)
142 # Whenever anyone inserts a new governor dimension value, we want
143 # to enable overlaps for that value between this element and
144 # commonSkyPix.
145 def callback(record: DimensionRecord) -> None:
146 skyPixOverlap.enable( # type: ignore
147 result,
148 element.universe.commonSkyPix,
149 getattr(record, element.spatial.governor.primaryKey.name), # type: ignore
150 )
152 governor.registerInsertionListener(callback)
153 return result
154 else:
155 return cls(db, element, table=table)
157 @property
158 def element(self) -> DatabaseDimensionElement:
159 # Docstring inherited from DimensionRecordStorage.element.
160 return self._element
162 def clearCaches(self) -> None:
163 # Docstring inherited from DimensionRecordStorage.clearCaches.
164 pass
166 def join(
167 self,
168 builder: QueryBuilder,
169 *,
170 regions: Optional[NamedKeyDict[DimensionElement, SpatialRegionDatabaseRepresentation]] = None,
171 timespans: Optional[NamedKeyDict[DimensionElement, TimespanDatabaseRepresentation]] = None,
172 ) -> None:
173 # Docstring inherited from DimensionRecordStorage.
174 if regions is not None:
175 dimensions = NamedValueSet(self.element.required)
176 dimensions.add(self.element.universe.commonSkyPix)
177 assert self._skyPixOverlap is not None
178 builder.joinTable(
179 self._skyPixOverlap.select(self.element.universe.commonSkyPix, Ellipsis),
180 dimensions,
181 )
182 regionsInTable = self._db.getSpatialRegionRepresentation().fromSelectable(self._table)
183 regions[self.element] = regionsInTable
184 joinOn = builder.startJoin(
185 self._table, self.element.dimensions, self.element.RecordClass.fields.dimensions.names
186 )
187 if timespans is not None:
188 timespanInTable = self._db.getTimespanRepresentation().fromSelectable(self._table)
189 for timespanInQuery in timespans.values(): 189 ↛ 190line 189 didn't jump to line 190, because the loop on line 189 never started
190 joinOn.append(timespanInQuery.overlaps(timespanInTable))
191 timespans[self.element] = timespanInTable
192 builder.finishJoin(self._table, joinOn)
193 return self._table
195 def fetch(self, dataIds: DataCoordinateIterable) -> Iterable[DimensionRecord]:
196 # Docstring inherited from DimensionRecordStorage.fetch.
197 RecordClass = self.element.RecordClass
198 query = SimpleQuery()
199 query.columns.extend(self._table.columns[name] for name in RecordClass.fields.standard.names)
200 if self.element.spatial is not None:
201 query.columns.append(self._table.columns["region"])
202 if self.element.temporal is not None:
203 TimespanReprClass = self._db.getTimespanRepresentation()
204 query.columns.extend(self._table.columns[name] for name in TimespanReprClass.getFieldNames())
205 query.join(self._table)
206 dataIds.constrain(query, lambda name: self._fetchColumns[name])
207 with warnings.catch_warnings():
208 # Some of our generated queries may contain cartesian joins, this
209 # is not a serious issue as it is properly constrained, so we want
210 # to suppress sqlalchemy warnings.
211 warnings.filterwarnings(
212 "ignore",
213 message="SELECT statement has a cartesian product",
214 category=sqlalchemy.exc.SAWarning,
215 )
216 for row in self._db.query(query.combine()):
217 values = row._asdict()
218 if self.element.temporal is not None:
219 values[TimespanDatabaseRepresentation.NAME] = TimespanReprClass.extract(values)
220 yield RecordClass(**values)
222 def insert(self, *records: DimensionRecord, replace: bool = False, skip_existing: bool = False) -> None:
223 # Docstring inherited from DimensionRecordStorage.insert.
224 elementRows = [record.toDict() for record in records]
225 if self.element.temporal is not None:
226 TimespanReprClass = self._db.getTimespanRepresentation()
227 for row in elementRows:
228 timespan = row.pop(TimespanDatabaseRepresentation.NAME)
229 TimespanReprClass.update(timespan, result=row)
230 with self._db.transaction():
231 if replace:
232 self._db.replace(self._table, *elementRows)
233 elif skip_existing:
234 self._db.ensure(self._table, *elementRows, primary_key_only=True)
235 else:
236 self._db.insert(self._table, *elementRows)
237 if self._skyPixOverlap is not None:
238 self._skyPixOverlap.insert(records, replace=replace)
240 def sync(self, record: DimensionRecord, update: bool = False) -> Union[bool, Dict[str, Any]]:
241 # Docstring inherited from DimensionRecordStorage.sync.
242 compared = record.toDict()
243 keys = {}
244 for name in record.fields.required.names:
245 keys[name] = compared.pop(name)
246 if self.element.temporal is not None:
247 TimespanReprClass = self._db.getTimespanRepresentation()
248 timespan = compared.pop(TimespanDatabaseRepresentation.NAME)
249 TimespanReprClass.update(timespan, result=compared)
250 with self._db.transaction():
251 _, inserted_or_updated = self._db.sync(
252 self._table,
253 keys=keys,
254 compared=compared,
255 update=update,
256 )
257 if inserted_or_updated and self._skyPixOverlap is not None:
258 if inserted_or_updated is True:
259 # Inserted a new row, so we just need to insert new overlap
260 # rows.
261 self._skyPixOverlap.insert([record])
262 elif "region" in inserted_or_updated: 262 ↛ 250line 262 didn't jump to line 250
263 # Updated the region, so we need to delete old overlap rows
264 # and insert new ones.
265 # (mypy should be able to tell that inserted_or_updated
266 # must be a dict if we get to this clause, but it can't)
267 self._skyPixOverlap.insert([record], replace=True)
268 # We updated something other than a region.
269 return inserted_or_updated
271 def digestTables(self) -> Iterable[sqlalchemy.schema.Table]:
272 # Docstring inherited from DimensionRecordStorage.digestTables.
273 result = [self._table]
274 if self._skyPixOverlap is not None:
275 result.extend(self._skyPixOverlap.digestTables())
276 return result
278 def connect(self, overlaps: DatabaseDimensionOverlapStorage) -> None:
279 # Docstring inherited from DatabaseDimensionRecordStorage.
280 self._otherOverlaps.append(overlaps)
283class _SkyPixOverlapStorage:
284 """A helper object for `TableDimensionRecordStorage` that manages its
285 materialized overlaps with skypix dimensions.
287 New instances should be constructed by calling `initialize`, not by calling
288 the constructor directly.
290 Parameters
291 ----------
292 db : `Database`
293 Interface to the underlying database engine and namespace.
294 element : `DatabaseDimensionElement`
295 Dimension element whose overlaps are to be managed.
296 summaryTable : `sqlalchemy.schema.Table`
297 Table that records which combinations of skypix dimensions and
298 governor dimension values have materialized overlap rows.
299 overlapTable : `sqlalchemy.schema.Table`
300 Table containing the actual materialized overlap rows.
301 governor : `GovernorDimensionRecordStorage`
302 Record storage backend for this element's governor dimension.
304 Notes
305 -----
306 This class (and most importantly, the tables it relies on) can in principle
307 manage overlaps between with any skypix dimension, but at present it is
308 only being used to manage relationships with the special ``commonSkyPix``
309 dimension, because that's all the query system uses. Eventually, we expect
310 to require users to explicitly materialize all relationships they will
311 want to use in queries.
313 Other possible future improvements include:
315 - allowing finer-grained skypix dimensions to provide overlap rows for
316 coarser ones, by dividing indices by powers of 4 (and possibly doing
317 ``SELECT DISTINCT`` in the subquery to remove duplicates);
319 - allowing finer-grained database elements (e.g. patch) to provide overlap
320 rows for coarser ones (e.g. tract), by ignoring irrelevant columns
321 (e.g. the patch IDs) in the subquery (again, possible with
322 ``SELECT DISTINCT``).
324 But there's no point to doing any of that until the query system can
325 figure out how best to ask for overlap rows when an exact match isn't
326 available.
327 """
329 def __init__(
330 self,
331 db: Database,
332 element: DatabaseDimensionElement,
333 summaryTable: sqlalchemy.schema.Table,
334 overlapTable: sqlalchemy.schema.Table,
335 governor: GovernorDimensionRecordStorage,
336 ):
337 self._db = db
338 self.element = element
339 assert element.spatial is not None
340 self._summaryTable = summaryTable
341 self._overlapTable = overlapTable
342 self._governor = governor
344 @classmethod
345 def initialize(
346 cls,
347 db: Database,
348 element: DatabaseDimensionElement,
349 *,
350 context: Optional[StaticTablesContext],
351 governor: GovernorDimensionRecordStorage,
352 ) -> _SkyPixOverlapStorage:
353 """Construct a new instance, creating tables as needed.
355 Parameters
356 ----------
357 db : `Database`
358 Interface to the underlying database engine and namespace.
359 element : `DatabaseDimensionElement`
360 Dimension element whose overlaps are to be managed.
361 context : `StaticTablesContext`, optional
362 If provided, an object to use to create any new tables. If not
363 provided, ``db.ensureTableExists`` should be used instead.
364 governor : `GovernorDimensionRecordStorage`
365 Record storage backend for this element's governor dimension.
366 """
367 if context is not None: 367 ↛ 370line 367 didn't jump to line 370, because the condition on line 367 was never false
368 op = context.addTable
369 else:
370 op = db.ensureTableExists
371 summaryTable = op(
372 cls._SUMMARY_TABLE_NAME_SPEC.format(element=element),
373 cls._makeSummaryTableSpec(element),
374 )
375 overlapTable = op(
376 cls._OVERLAP_TABLE_NAME_SPEC.format(element=element),
377 cls._makeOverlapTableSpec(element),
378 )
379 return _SkyPixOverlapStorage(
380 db, element, summaryTable=summaryTable, overlapTable=overlapTable, governor=governor
381 )
383 _SUMMARY_TABLE_NAME_SPEC = "{element.name}_skypix_overlap_summary"
385 @classmethod
386 def _makeSummaryTableSpec(cls, element: DatabaseDimensionElement) -> ddl.TableSpec:
387 """Create a specification for the table that records which combinations
388 of skypix dimension and governor value have materialized overlaps.
390 Parameters
391 ----------
392 element : `DatabaseDimensionElement`
393 Dimension element whose overlaps are to be managed.
395 Returns
396 -------
397 tableSpec : `ddl.TableSpec`
398 Table specification.
399 """
400 assert element.spatial is not None
401 tableSpec = ddl.TableSpec(
402 fields=[
403 ddl.FieldSpec(
404 name="skypix_system",
405 dtype=sqlalchemy.String,
406 length=16,
407 nullable=False,
408 primaryKey=True,
409 ),
410 ddl.FieldSpec(
411 name="skypix_level",
412 dtype=sqlalchemy.SmallInteger,
413 nullable=False,
414 primaryKey=True,
415 ),
416 ]
417 )
418 addDimensionForeignKey(tableSpec, element.spatial.governor, primaryKey=True)
419 return tableSpec
421 _OVERLAP_TABLE_NAME_SPEC = "{element.name}_skypix_overlap"
423 @classmethod
424 def _makeOverlapTableSpec(cls, element: DatabaseDimensionElement) -> ddl.TableSpec:
425 """Create a specification for the table that holds materialized
426 overlap rows.
428 Parameters
429 ----------
430 element : `DatabaseDimensionElement`
431 Dimension element whose overlaps are to be managed.
433 Returns
434 -------
435 tableSpec : `ddl.TableSpec`
436 Table specification.
437 """
438 assert element.spatial is not None
439 tableSpec = ddl.TableSpec(
440 fields=[
441 ddl.FieldSpec(
442 name="skypix_system",
443 dtype=sqlalchemy.String,
444 length=16,
445 nullable=False,
446 primaryKey=True,
447 ),
448 ddl.FieldSpec(
449 name="skypix_level",
450 dtype=sqlalchemy.SmallInteger,
451 nullable=False,
452 primaryKey=True,
453 ),
454 # (more columns added below)
455 ],
456 unique=set(),
457 indexes={
458 # This index has the same fields as the PK, in a different
459 # order, to facilitate queries that know skypix_index and want
460 # to find the other element.
461 (
462 "skypix_system",
463 "skypix_level",
464 "skypix_index",
465 )
466 + tuple(element.graph.required.names),
467 },
468 foreignKeys=[
469 # Foreign key to summary table. This makes sure we don't
470 # materialize any overlaps without remembering that we've done
471 # so in the summary table, though it can't prevent the converse
472 # of adding a summary row without adding overlap row (either of
473 # those is a logic bug, of course, but we want to be defensive
474 # about those). Using ON DELETE CASCADE, it'd be very easy to
475 # implement "disabling" an overlap materialization, because we
476 # can just delete the summary row.
477 # Note that the governor dimension column is added below, in
478 # the call to addDimensionForeignKey.
479 ddl.ForeignKeySpec(
480 cls._SUMMARY_TABLE_NAME_SPEC.format(element=element),
481 source=("skypix_system", "skypix_level", element.spatial.governor.name),
482 target=("skypix_system", "skypix_level", element.spatial.governor.name),
483 onDelete="CASCADE",
484 ),
485 ],
486 )
487 # Add fields for the standard element this class manages overlaps for.
488 # This is guaranteed to add a column for the governor dimension,
489 # because that's a required dependency of element.
490 for dimension in element.required:
491 addDimensionForeignKey(tableSpec, dimension, primaryKey=True)
492 # Add field for the actual skypix index. We do this later because I
493 # think we care (at least a bit) about the order in which the primary
494 # key is defined, in that we want a non-summary column like this one
495 # to appear after the governor dimension column.
496 tableSpec.fields.add(
497 ddl.FieldSpec(
498 name="skypix_index",
499 dtype=sqlalchemy.BigInteger,
500 nullable=False,
501 primaryKey=True,
502 )
503 )
504 return tableSpec
506 def enable(
507 self,
508 storage: TableDimensionRecordStorage,
509 skypix: SkyPixDimension,
510 governorValue: str,
511 ) -> None:
512 """Enable materialization of overlaps between a skypix dimension
513 and the records of ``self.element`` with a particular governor value.
515 Parameters
516 ----------
517 storage : `TableDimensionRecordStorage`
518 Storage object for the records of ``self.element``.
519 skypix : `SkyPixDimension`
520 The skypix dimension (system and level) for which overlaps should
521 be materialized.
522 governorValue : `str`
523 Value of this element's governor dimension for which overlaps
524 should be materialized. For example, if ``self.element`` is
525 ``visit``, this is an instrument name; if ``self.element`` is
526 ``patch``, this is a skymap name.
528 Notes
529 -----
530 If there are existing rows for the given ``governorValue``, overlap
531 rows for them will be immediately computed and inserted. At present,
532 that never happens, because we only enable overlaps with
533 `DimensionUniverse.commonSkyPix`, and that happens immediately after
534 each governor row is inserted (and there can't be any patch rows,
535 for example, until after the corresponding skymap row is inserted).
537 After calling `enable` for a particular combination, any new records
538 for ``self.element`` that are inserted will automatically be
539 accompanied by overlap records (via calls to `insert` made
540 by `TableDimensionRecordStorage` methods).
541 """
542 # Because we're essentially materializing a view in Python, we
543 # aggressively lock all tables we're reading and writing in order to be
544 # sure nothing gets out of sync. This may not be the most efficient
545 # approach possible, but we'll focus on correct before we focus on
546 # fast, and enabling a new overlap combination should be a very rare
547 # operation anyway, and never one we do in parallel.
548 with self._db.transaction(
549 lock=[self._governor.table, storage._table, self._summaryTable, self._overlapTable]
550 ):
551 result, inserted = self._db.sync(
552 self._summaryTable,
553 keys={
554 "skypix_system": skypix.system.name,
555 "skypix_level": skypix.level,
556 self._governor.element.name: governorValue,
557 },
558 )
559 if inserted:
560 _LOG.debug(
561 "Precomputing initial overlaps for %s vs %s for %s=%s",
562 skypix.name,
563 self.element.name,
564 self._governor.element.name,
565 governorValue,
566 )
567 self._fill(storage=storage, skypix=skypix, governorValue=governorValue)
568 else:
569 _LOG.debug(
570 "Overlaps already precomputed for %s vs %s for %s=%s",
571 skypix.name,
572 self.element.name,
573 self._governor.element.name,
574 governorValue,
575 )
577 def _fill(
578 self,
579 *,
580 storage: TableDimensionRecordStorage,
581 skypix: SkyPixDimension,
582 governorValue: str,
583 ) -> None:
584 """Insert overlap records for a newly-enabled combination of skypix
585 dimension and governor value.
587 This method should only be called by `enable`.
589 Parameters
590 ----------
591 storage : `TableDimensionRecordStorage`
592 Storage object for the records of ``self.element``.
593 skypix : `SkyPixDimension`
594 The skypix dimension (system and level) for which overlaps should
595 be materialized.
596 governorValue : `str`
597 Value of this element's governor dimension for which overlaps
598 should be materialized. For example, if ``self.element`` is
599 ``visit``, this is an instrument name; if ``self.element`` is
600 ``patch``, this is a skymap name.
601 """
602 overlapRecords: List[dict] = []
603 # `DimensionRecordStorage.fetch` as defined by the ABC expects to be
604 # given iterables of data IDs that correspond to that element's graph
605 # (e.g. {instrument, visit, detector}), not just some subset of it
606 # (e.g. {instrument}). But we know the implementation of `fetch` for
607 # `TableDimensionRecordStorage will use this iterable to do exactly
608 # what we want.
609 governorDataId = DataCoordinate.standardize(
610 {self._governor.element.name: governorValue}, graph=self._governor.element.graph
611 )
612 for record in storage.fetch(DataCoordinateIterable.fromScalar(governorDataId)): 612 ↛ 613line 612 didn't jump to line 613, because the loop on line 612 never started
613 if record.region is None:
614 continue
615 baseOverlapRecord = record.dataId.byName()
616 baseOverlapRecord["skypix_system"] = skypix.system.name
617 baseOverlapRecord["skypix_level"] = skypix.level
618 for begin, end in skypix.pixelization.envelope(record.region):
619 overlapRecords.extend(
620 dict(baseOverlapRecord, skypix_index=index) for index in range(begin, end)
621 )
622 _LOG.debug(
623 "Inserting %d initial overlap rows for %s vs %s for %s=%r",
624 len(overlapRecords),
625 skypix.name,
626 self.element.name,
627 self._governor.element.name,
628 governorValue,
629 )
630 self._db.insert(self._overlapTable, *overlapRecords)
632 def insert(self, records: Sequence[DimensionRecord], replace: bool = False) -> None:
633 """Insert overlaps for a sequence of ``self.element`` records that
634 have just been inserted.
636 This must be called by any method that inserts records for that
637 element (i.e. `TableDimensionRecordStorage.insert` and
638 `TableDimensionRecordStorage.sync`), within the same transaction.
640 Parameters
641 ----------
642 records : `Sequence` [ `DimensionRecord` ]
643 Records for ``self.element``. Records with `None` regions are
644 ignored.
645 replace : `bool`, optional
646 If `True` (`False` is default) one or more of the given records may
647 already exist and is being updated, so we need to delete any
648 existing overlap records first.
649 """
650 # Group records by family.governor value.
651 grouped: Dict[str, List[DimensionRecord]] = defaultdict(list)
652 for record in records:
653 grouped[getattr(record, self._governor.element.name)].append(record)
654 _LOG.debug(
655 "Precomputing new skypix overlaps for %s where %s in %s.",
656 self.element.name,
657 self._governor.element.name,
658 grouped.keys(),
659 )
660 # Make sure the set of combinations to materialize does not change
661 # while we are materializing the ones we have, by locking the summary
662 # table. Because we aren't planning to write to the summary table,
663 # this could just be a SHARED lock instead of an EXCLUSIVE one, but
664 # there's no API for that right now.
665 with self._db.transaction(lock=[self._summaryTable]):
666 # Query for the skypix dimensions to be associated with each
667 # governor value.
668 gvCol = self._summaryTable.columns[self._governor.element.name]
669 sysCol = self._summaryTable.columns.skypix_system
670 lvlCol = self._summaryTable.columns.skypix_level
671 query = (
672 sqlalchemy.sql.select(
673 gvCol,
674 sysCol,
675 lvlCol,
676 )
677 .select_from(self._summaryTable)
678 .where(gvCol.in_(list(grouped.keys())))
679 )
680 # Group results by governor value, then skypix system.
681 skypix: Dict[str, NamedKeyDict[SkyPixSystem, List[int]]] = {
682 gv: NamedKeyDict() for gv in grouped.keys()
683 }
684 for summaryRow in self._db.query(query).mappings():
685 system = self.element.universe.skypix[summaryRow[sysCol]]
686 skypix[summaryRow[gvCol]].setdefault(system, []).append(summaryRow[lvlCol])
687 if replace:
688 # Construct constraints for a DELETE query as a list of dicts.
689 # We include the skypix_system and skypix_level column values
690 # explicitly instead of just letting the query search for all
691 # of those related to the given records, because they are the
692 # first columns in the primary key, and hence searching with
693 # them will be way faster (and we don't want to add a new index
694 # just for this operation).
695 to_delete: List[Dict[str, Any]] = []
696 for gv, skypix_systems in skypix.items():
697 for system, skypix_levels in skypix_systems.items():
698 to_delete.extend(
699 {"skypix_system": system.name, "skypix_level": level, **record.dataId.byName()}
700 for record, level in itertools.product(grouped[gv], skypix_levels)
701 )
702 self._db.delete(
703 self._overlapTable,
704 ["skypix_system", "skypix_level"] + list(self.element.graph.required.names),
705 *to_delete,
706 )
707 overlapRecords: List[dict] = []
708 # Compute overlaps for one governor value at a time, but gather
709 # them all up for one insert.
710 for gv, group in grouped.items():
711 overlapRecords.extend(self._compute(group, skypix[gv], gv))
712 _LOG.debug(
713 "Inserting %d new skypix overlap rows for %s where %s in %s.",
714 len(overlapRecords),
715 self.element.name,
716 self._governor.element.name,
717 grouped.keys(),
718 )
719 self._db.insert(self._overlapTable, *overlapRecords)
721 def _compute(
722 self,
723 records: Sequence[DimensionRecord],
724 skypix: NamedKeyDict[SkyPixSystem, List[int]],
725 governorValue: str,
726 ) -> Iterator[dict]:
727 """Compute all overlap rows for a particular governor dimension value
728 and all of the skypix dimensions for which its overlaps are enabled.
730 This method should only be called by `insert`.
732 Parameters
733 ----------
734 records : `Sequence` [ `DimensionRecord` ]
735 Records for ``self.element``. Records with `None` regions are
736 ignored. All must have the governor value given.
737 skypix : `NamedKeyDict` [ `SkyPixSystem`, `list` [ `int` ] ]
738 Mapping containing all skypix systems and levels for which overlaps
739 should be computed, grouped by `SkyPixSystem`.
740 governorValue : `str`
741 Value of this element's governor dimension for which overlaps
742 should be computed. For example, if ``self.element`` is ``visit``,
743 this is an instrument name; if ``self.element`` is ``patch``, this
744 is a skymap name.
746 Yields
747 ------
748 row : `dict`
749 Dictionary representing an overlap row.
750 """
751 # Process input records one at time, computing all skypix indices for
752 # each.
753 for record in records:
754 if record.region is None:
755 continue
756 assert getattr(record, self._governor.element.name) == governorValue
757 for system, levels in skypix.items():
758 if not levels: 758 ↛ 759line 758 didn't jump to line 759, because the condition on line 758 was never true
759 continue
760 baseOverlapRecord = record.dataId.byName()
761 baseOverlapRecord["skypix_system"] = system.name
762 levels.sort(reverse=True)
763 # Start with the first level, which is the finest-grained one.
764 # Compute skypix envelope indices directly for that.
765 indices: Dict[int, Set[int]] = {levels[0]: set()}
766 for begin, end in system[levels[0]].pixelization.envelope(record.region):
767 indices[levels[0]].update(range(begin, end))
768 # Divide those indices by powers of 4 (and remove duplicates)
769 # work our way up to the last (coarsest) level.
770 for lastLevel, nextLevel in zip(levels[:-1], levels[1:]): 770 ↛ 771line 770 didn't jump to line 771, because the loop on line 770 never started
771 factor = 4 ** (lastLevel - nextLevel)
772 indices[nextLevel] = {index // factor for index in indices[lastLevel]}
773 for level in levels:
774 yield from (
775 {
776 "skypix_level": level,
777 "skypix_index": index,
778 **baseOverlapRecord, # type: ignore
779 }
780 for index in indices[level]
781 )
783 def select(
784 self,
785 skypix: SkyPixDimension,
786 governorValues: Union[AbstractSet[str], EllipsisType],
787 ) -> sqlalchemy.sql.FromClause:
788 """Construct a subquery expression containing overlaps between the
789 given skypix dimension and governor values.
791 Parameters
792 ----------
793 skypix : `SkyPixDimension`
794 The skypix dimension (system and level) for which overlaps should
795 be materialized.
796 governorValues : `str`
797 Values of this element's governor dimension for which overlaps
798 should be returned. For example, if ``self.element`` is ``visit``,
799 this is a set of instrument names; if ``self.element`` is
800 ``patch``, this is a set of skymap names. If ``...`` all values
801 in the database are used (`GovernorDimensionRecordStorage.values`).
803 Returns
804 -------
805 subquery : `sqlalchemy.sql.FromClause`
806 A SELECT query with an alias, intended for use as a subquery, with
807 columns equal to ``self.element.required.names`` + ``skypix.name``.
808 """
809 if skypix != self.element.universe.commonSkyPix: 809 ↛ 814line 809 didn't jump to line 814
810 # We guarantee elsewhere that we always materialize all overlaps
811 # vs. commonSkyPix, but for everything else, we need to check that
812 # we have materialized this combination of governor values and
813 # skypix.
814 summaryWhere = [
815 self._summaryTable.columns.skypix_system == skypix.system.name,
816 self._summaryTable.columns.skypix_level == skypix.level,
817 ]
818 gvCol = self._summaryTable.columns[self._governor.element.name]
819 if governorValues is not Ellipsis:
820 summaryWhere.append(gvCol.in_(list(governorValues)))
821 summaryQuery = (
822 sqlalchemy.sql.select(gvCol)
823 .select_from(self._summaryTable)
824 .where(sqlalchemy.sql.and_(*summaryWhere))
825 )
826 materializedGovernorValues = {row._mapping[gvCol] for row in self._db.query(summaryQuery)}
827 if governorValues is Ellipsis:
828 missingGovernorValues = self._governor.values - materializedGovernorValues
829 else:
830 missingGovernorValues = governorValues - materializedGovernorValues
831 if missingGovernorValues:
832 raise RuntimeError(
833 f"Query requires an overlap join between {skypix.name} and {self.element.name} "
834 f"(for {self._governor.element.name} in {missingGovernorValues}), but these "
835 f"have not been materialized."
836 )
837 columns = [self._overlapTable.columns.skypix_index.label(skypix.name)]
838 columns.extend(self._overlapTable.columns[name] for name in self.element.graph.required.names)
839 overlapWhere = [
840 self._overlapTable.columns.skypix_system == skypix.system.name,
841 self._overlapTable.columns.skypix_level == skypix.level,
842 ]
843 if governorValues is not Ellipsis: 843 ↛ 844line 843 didn't jump to line 844, because the condition on line 843 was never true
844 overlapWhere.append(
845 self._overlapTable.columns[self._governor.element.name].in_(list(governorValues))
846 )
847 overlapQuery = (
848 sqlalchemy.sql.select(*columns)
849 .select_from(self._overlapTable)
850 .where(sqlalchemy.sql.and_(*overlapWhere))
851 )
852 return overlapQuery.alias(f"{self.element.name}_{skypix.name}_overlap")
854 def digestTables(self) -> Iterable[sqlalchemy.schema.Table]:
855 """Return tables used for schema digest.
857 Returns
858 -------
859 tables : `Iterable` [ `sqlalchemy.schema.Table` ]
860 Possibly empty set of tables for schema digest calculations.
861 """
862 return [self._summaryTable, self._overlapTable]