Coverage for python/lsst/daf/butler/registry/dimensions/table.py: 85%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ["TableDimensionRecordStorage"]
25from collections import defaultdict
26import itertools
27import logging
28from typing import (
29 AbstractSet,
30 Any,
31 Dict,
32 Iterable,
33 Iterator,
34 List,
35 Mapping,
36 Optional,
37 Sequence,
38 Set,
39 Union,
40)
41import warnings
43import sqlalchemy
45from ...core import (
46 addDimensionForeignKey,
47 DatabaseDimensionElement,
48 DataCoordinate,
49 DataCoordinateIterable,
50 ddl,
51 DimensionElement,
52 DimensionRecord,
53 GovernorDimension,
54 NamedKeyDict,
55 NamedKeyMapping,
56 NamedValueSet,
57 SimpleQuery,
58 SkyPixDimension,
59 SkyPixSystem,
60 SpatialRegionDatabaseRepresentation,
61 TimespanDatabaseRepresentation,
62)
63from ..interfaces import (
64 Database,
65 DatabaseDimensionOverlapStorage,
66 DatabaseDimensionRecordStorage,
67 GovernorDimensionRecordStorage,
68 StaticTablesContext,
69)
70from ..queries import QueryBuilder
71from ..wildcards import Ellipsis, EllipsisType
74_LOG = logging.getLogger(__name__)
77MAX_FETCH_CHUNK = 1000
78"""Maximum number of data IDs we fetch records at a time.
80Barring something database-engine-specific, this sets the size of the actual
81SQL query, not just the number of result rows, because the only way to query
82for multiple data IDs in a single SELECT query via SQLAlchemy is to have an OR
83term in the WHERE clause for each one.
84"""
87class TableDimensionRecordStorage(DatabaseDimensionRecordStorage):
88 """A record storage implementation uses a regular database table.
90 Parameters
91 ----------
92 db : `Database`
93 Interface to the database engine and namespace that will hold these
94 dimension records.
95 element : `DatabaseDimensionElement`
96 The element whose records this storage will manage.
97 table : `sqlalchemy.schema.Table`
98 The logical table for the element.
99 skyPixOverlap : `_SkyPixOverlapStorage`, optional
100 Object that manages the tables that hold materialized spatial overlap
101 joins to skypix dimensions. Should be `None` if (and only if)
102 ``element.spatial is None``.
103 """
104 def __init__(self, db: Database, element: DatabaseDimensionElement, *, table: sqlalchemy.schema.Table,
105 skyPixOverlap: Optional[_SkyPixOverlapStorage] = None):
106 self._db = db
107 self._table = table
108 self._element = element
109 self._fetchColumns: Dict[str, sqlalchemy.sql.ColumnElement] = {
110 dimension.name: self._table.columns[name]
111 for dimension, name in zip(self._element.dimensions,
112 self._element.RecordClass.fields.dimensions.names)
113 }
114 self._skyPixOverlap = skyPixOverlap
115 self._otherOverlaps: List[DatabaseDimensionOverlapStorage] = []
117 @classmethod
118 def initialize(
119 cls,
120 db: Database,
121 element: DatabaseDimensionElement, *,
122 context: Optional[StaticTablesContext] = None,
123 config: Mapping[str, Any],
124 governors: NamedKeyMapping[GovernorDimension, GovernorDimensionRecordStorage],
125 ) -> DatabaseDimensionRecordStorage:
126 # Docstring inherited from DatabaseDimensionRecordStorage.
127 spec = element.RecordClass.fields.makeTableSpec(
128 RegionReprClass=db.getSpatialRegionRepresentation(),
129 TimespanReprClass=db.getTimespanRepresentation(),
130 )
131 if context is not None: 131 ↛ 134line 131 didn't jump to line 134, because the condition on line 131 was never false
132 table = context.addTable(element.name, spec)
133 else:
134 table = db.ensureTableExists(element.name, spec)
135 skyPixOverlap: Optional[_SkyPixOverlapStorage]
136 if element.spatial is not None:
137 governor = governors[element.spatial.governor]
138 skyPixOverlap = _SkyPixOverlapStorage.initialize(
139 db,
140 element,
141 context=context,
142 governor=governor,
143 )
144 result = cls(db, element, table=table, skyPixOverlap=skyPixOverlap)
146 # Whenever anyone inserts a new governor dimension value, we want
147 # to enable overlaps for that value between this element and
148 # commonSkyPix.
149 def callback(record: DimensionRecord) -> None:
150 skyPixOverlap.enable( # type: ignore
151 result,
152 element.universe.commonSkyPix,
153 getattr(record, element.spatial.governor.primaryKey.name), # type: ignore
154 )
156 governor.registerInsertionListener(callback)
157 return result
158 else:
159 return cls(db, element, table=table)
161 @property
162 def element(self) -> DatabaseDimensionElement:
163 # Docstring inherited from DimensionRecordStorage.element.
164 return self._element
166 def clearCaches(self) -> None:
167 # Docstring inherited from DimensionRecordStorage.clearCaches.
168 pass
170 def join(
171 self,
172 builder: QueryBuilder, *,
173 regions: Optional[NamedKeyDict[DimensionElement, SpatialRegionDatabaseRepresentation]] = None,
174 timespans: Optional[NamedKeyDict[DimensionElement, TimespanDatabaseRepresentation]] = None,
175 ) -> None:
176 # Docstring inherited from DimensionRecordStorage.
177 if regions is not None:
178 dimensions = NamedValueSet(self.element.required)
179 dimensions.add(self.element.universe.commonSkyPix)
180 assert self._skyPixOverlap is not None
181 builder.joinTable(
182 self._skyPixOverlap.select(self.element.universe.commonSkyPix, Ellipsis),
183 dimensions,
184 )
185 regionsInTable = self._db.getSpatialRegionRepresentation().fromSelectable(self._table)
186 regions[self.element] = regionsInTable
187 joinOn = builder.startJoin(self._table, self.element.dimensions,
188 self.element.RecordClass.fields.dimensions.names)
189 if timespans is not None:
190 timespanInTable = self._db.getTimespanRepresentation().fromSelectable(self._table)
191 for timespanInQuery in timespans.values(): 191 ↛ 192line 191 didn't jump to line 192, because the loop on line 191 never started
192 joinOn.append(timespanInQuery.overlaps(timespanInTable))
193 timespans[self.element] = timespanInTable
194 builder.finishJoin(self._table, joinOn)
195 return self._table
197 def fetch(self, dataIds: DataCoordinateIterable) -> Iterable[DimensionRecord]:
198 # Docstring inherited from DimensionRecordStorage.fetch.
199 RecordClass = self.element.RecordClass
200 query = SimpleQuery()
201 query.columns.extend(self._table.columns[name] for name in RecordClass.fields.standard.names)
202 if self.element.spatial is not None:
203 query.columns.append(self._table.columns["region"])
204 if self.element.temporal is not None:
205 TimespanReprClass = self._db.getTimespanRepresentation()
206 query.columns.extend(self._table.columns[name] for name in TimespanReprClass.getFieldNames())
207 query.join(self._table)
208 dataIds.constrain(query, lambda name: self._fetchColumns[name])
209 with warnings.catch_warnings():
210 # Some of our generated queries may contain cartesian joins, this
211 # is not a serious issue as it is properly constrained, so we want
212 # to suppress sqlalchemy warnings.
213 warnings.filterwarnings("ignore", message="SELECT statement has a cartesian product",
214 category=sqlalchemy.exc.SAWarning)
215 for row in self._db.query(query.combine()):
216 values = row._asdict()
217 if self.element.temporal is not None:
218 values[TimespanDatabaseRepresentation.NAME] = TimespanReprClass.extract(values)
219 yield RecordClass(**values)
221 def insert(self, *records: DimensionRecord, replace: bool = False) -> None:
222 # Docstring inherited from DimensionRecordStorage.insert.
223 elementRows = [record.toDict() for record in records]
224 if self.element.temporal is not None:
225 TimespanReprClass = self._db.getTimespanRepresentation()
226 for row in elementRows:
227 timespan = row.pop(TimespanDatabaseRepresentation.NAME)
228 TimespanReprClass.update(timespan, result=row)
229 with self._db.transaction():
230 if replace:
231 self._db.replace(self._table, *elementRows)
232 else:
233 self._db.insert(self._table, *elementRows)
234 if self._skyPixOverlap is not None:
235 self._skyPixOverlap.insert(records, replace=replace)
237 def sync(self, record: DimensionRecord, update: bool = False) -> Union[bool, Dict[str, Any]]:
238 # Docstring inherited from DimensionRecordStorage.sync.
239 compared = record.toDict()
240 keys = {}
241 for name in record.fields.required.names:
242 keys[name] = compared.pop(name)
243 if self.element.temporal is not None:
244 TimespanReprClass = self._db.getTimespanRepresentation()
245 timespan = compared.pop(TimespanDatabaseRepresentation.NAME)
246 TimespanReprClass.update(timespan, result=compared)
247 with self._db.transaction():
248 _, inserted_or_updated = self._db.sync(
249 self._table,
250 keys=keys,
251 compared=compared,
252 update=update,
253 )
254 if inserted_or_updated and self._skyPixOverlap is not None:
255 if inserted_or_updated is True:
256 # Inserted a new row, so we just need to insert new overlap
257 # rows.
258 self._skyPixOverlap.insert([record])
259 elif "region" in inserted_or_updated: # type: ignore 259 ↛ 266line 259 didn't jump to line 266, because the condition on line 259 was never false
260 # Updated the region, so we need to delete old overlap rows
261 # and insert new ones.
262 # (mypy should be able to tell that inserted_or_updated
263 # must be a dict if we get to this clause, but it can't)
264 self._skyPixOverlap.insert([record], replace=True)
265 # We updated something other than a region.
266 return inserted_or_updated
268 def digestTables(self) -> Iterable[sqlalchemy.schema.Table]:
269 # Docstring inherited from DimensionRecordStorage.digestTables.
270 result = [self._table]
271 if self._skyPixOverlap is not None:
272 result.extend(self._skyPixOverlap.digestTables())
273 return result
275 def connect(self, overlaps: DatabaseDimensionOverlapStorage) -> None:
276 # Docstring inherited from DatabaseDimensionRecordStorage.
277 self._otherOverlaps.append(overlaps)
280class _SkyPixOverlapStorage:
281 """A helper object for `TableDimensionRecordStorage` that manages its
282 materialized overlaps with skypix dimensions.
284 New instances should be constructed by calling `initialize`, not by calling
285 the constructor directly.
287 Parameters
288 ----------
289 db : `Database`
290 Interface to the underlying database engine and namespace.
291 element : `DatabaseDimensionElement`
292 Dimension element whose overlaps are to be managed.
293 summaryTable : `sqlalchemy.schema.Table`
294 Table that records which combinations of skypix dimensions and
295 governor dimension values have materialized overlap rows.
296 overlapTable : `sqlalchemy.schema.Table`
297 Table containing the actual materialized overlap rows.
298 governor : `GovernorDimensionRecordStorage`
299 Record storage backend for this element's governor dimension.
301 Notes
302 -----
303 This class (and most importantly, the tables it relies on) can in principle
304 manage overlaps between with any skypix dimension, but at present it is
305 only being used to manage relationships with the special ``commonSkyPix``
306 dimension, because that's all the query system uses. Eventually, we expect
307 to require users to explicitly materialize all relationships they will
308 want to use in queries.
310 Other possible future improvements include:
312 - allowing finer-grained skypix dimensions to provide overlap rows for
313 coarser ones, by dividing indices by powers of 4 (and possibly doing
314 ``SELECT DISTINCT`` in the subquery to remove duplicates);
316 - allowing finer-grained database elements (e.g. patch) to provide overlap
317 rows for coarser ones (e.g. tract), by ignoring irrelevant columns
318 (e.g. the patch IDs) in the subquery (again, possible with
319 ``SELECT DISTINCT``).
321 But there's no point to doing any of that until the query system can
322 figure out how best to ask for overlap rows when an exact match isn't
323 available.
324 """
325 def __init__(
326 self,
327 db: Database,
328 element: DatabaseDimensionElement,
329 summaryTable: sqlalchemy.schema.Table,
330 overlapTable: sqlalchemy.schema.Table,
331 governor: GovernorDimensionRecordStorage,
332 ):
333 self._db = db
334 self.element = element
335 assert element.spatial is not None
336 self._summaryTable = summaryTable
337 self._overlapTable = overlapTable
338 self._governor = governor
340 @classmethod
341 def initialize(
342 cls,
343 db: Database,
344 element: DatabaseDimensionElement, *,
345 context: Optional[StaticTablesContext],
346 governor: GovernorDimensionRecordStorage,
347 ) -> _SkyPixOverlapStorage:
348 """Construct a new instance, creating tables as needed.
350 Parameters
351 ----------
352 db : `Database`
353 Interface to the underlying database engine and namespace.
354 element : `DatabaseDimensionElement`
355 Dimension element whose overlaps are to be managed.
356 context : `StaticTablesContext`, optional
357 If provided, an object to use to create any new tables. If not
358 provided, ``db.ensureTableExists`` should be used instead.
359 governor : `GovernorDimensionRecordStorage`
360 Record storage backend for this element's governor dimension.
361 """
362 if context is not None: 362 ↛ 365line 362 didn't jump to line 365, because the condition on line 362 was never false
363 op = context.addTable
364 else:
365 op = db.ensureTableExists
366 summaryTable = op(
367 cls._SUMMARY_TABLE_NAME_SPEC.format(element=element),
368 cls._makeSummaryTableSpec(element),
369 )
370 overlapTable = op(
371 cls._OVERLAP_TABLE_NAME_SPEC.format(element=element),
372 cls._makeOverlapTableSpec(element),
373 )
374 return _SkyPixOverlapStorage(db, element, summaryTable=summaryTable, overlapTable=overlapTable,
375 governor=governor)
377 _SUMMARY_TABLE_NAME_SPEC = "{element.name}_skypix_overlap_summary"
379 @classmethod
380 def _makeSummaryTableSpec(cls, element: DatabaseDimensionElement) -> ddl.TableSpec:
381 """Create a specification for the table that records which combinations
382 of skypix dimension and governor value have materialized overlaps.
384 Parameters
385 ----------
386 element : `DatabaseDimensionElement`
387 Dimension element whose overlaps are to be managed.
389 Returns
390 -------
391 tableSpec : `ddl.TableSpec`
392 Table specification.
393 """
394 assert element.spatial is not None
395 tableSpec = ddl.TableSpec(
396 fields=[
397 ddl.FieldSpec(
398 name="skypix_system",
399 dtype=sqlalchemy.String,
400 length=16,
401 nullable=False,
402 primaryKey=True,
403 ),
404 ddl.FieldSpec(
405 name="skypix_level",
406 dtype=sqlalchemy.SmallInteger,
407 nullable=False,
408 primaryKey=True,
409 ),
410 ]
411 )
412 addDimensionForeignKey(tableSpec, element.spatial.governor, primaryKey=True)
413 return tableSpec
415 _OVERLAP_TABLE_NAME_SPEC = "{element.name}_skypix_overlap"
417 @classmethod
418 def _makeOverlapTableSpec(cls, element: DatabaseDimensionElement) -> ddl.TableSpec:
419 """Create a specification for the table that holds materialized
420 overlap rows.
422 Parameters
423 ----------
424 element : `DatabaseDimensionElement`
425 Dimension element whose overlaps are to be managed.
427 Returns
428 -------
429 tableSpec : `ddl.TableSpec`
430 Table specification.
431 """
432 assert element.spatial is not None
433 tableSpec = ddl.TableSpec(
434 fields=[
435 ddl.FieldSpec(
436 name="skypix_system",
437 dtype=sqlalchemy.String,
438 length=16,
439 nullable=False,
440 primaryKey=True,
441 ),
442 ddl.FieldSpec(
443 name="skypix_level",
444 dtype=sqlalchemy.SmallInteger,
445 nullable=False,
446 primaryKey=True,
447 ),
448 # (more columns added below)
449 ],
450 unique=set(),
451 indexes={
452 # This index has the same fields as the PK, in a different
453 # order, to facilitate queries that know skypix_index and want
454 # to find the other element.
455 ("skypix_system", "skypix_level", "skypix_index",) + tuple(element.graph.required.names),
456 },
457 foreignKeys=[
458 # Foreign key to summary table. This makes sure we don't
459 # materialize any overlaps without remembering that we've done
460 # so in the summary table, though it can't prevent the converse
461 # of adding a summary row without adding overlap row (either of
462 # those is a logic bug, of course, but we want to be defensive
463 # about those). Using ON DELETE CASCADE, it'd be very easy to
464 # implement "disabling" an overlap materialization, because we
465 # can just delete the summary row.
466 # Note that the governor dimension column is added below, in
467 # the call to addDimensionForeignKey.
468 ddl.ForeignKeySpec(
469 cls._SUMMARY_TABLE_NAME_SPEC.format(element=element),
470 source=("skypix_system", "skypix_level", element.spatial.governor.name),
471 target=("skypix_system", "skypix_level", element.spatial.governor.name),
472 onDelete="CASCADE",
473 ),
474 ],
475 )
476 # Add fields for the standard element this class manages overlaps for.
477 # This is guaranteed to add a column for the governor dimension,
478 # because that's a required dependency of element.
479 for dimension in element.required:
480 addDimensionForeignKey(tableSpec, dimension, primaryKey=True)
481 # Add field for the actual skypix index. We do this later because I
482 # think we care (at least a bit) about the order in which the primary
483 # key is defined, in that we want a non-summary column like this one
484 # to appear after the governor dimension column.
485 tableSpec.fields.add(
486 ddl.FieldSpec(
487 name="skypix_index",
488 dtype=sqlalchemy.BigInteger,
489 nullable=False,
490 primaryKey=True,
491 )
492 )
493 return tableSpec
495 def enable(
496 self,
497 storage: TableDimensionRecordStorage,
498 skypix: SkyPixDimension,
499 governorValue: str,
500 ) -> None:
501 """Enable materialization of overlaps between a skypix dimension
502 and the records of ``self.element`` with a particular governor value.
504 Parameters
505 ----------
506 storage : `TableDimensionRecordStorage`
507 Storage object for the records of ``self.element``.
508 skypix : `SkyPixDimension`
509 The skypix dimension (system and level) for which overlaps should
510 be materialized.
511 governorValue : `str`
512 Value of this element's governor dimension for which overlaps
513 should be materialized. For example, if ``self.element`` is
514 ``visit``, this is an instrument name; if ``self.element`` is
515 ``patch``, this is a skymap name.
517 Notes
518 -----
519 If there are existing rows for the given ``governorValue``, overlap
520 rows for them will be immediately computed and inserted. At present,
521 that never happens, because we only enable overlaps with
522 `DimensionUniverse.commonSkyPix`, and that happens immediately after
523 each governor row is inserted (and there can't be any patch rows,
524 for example, until after the corresponding skymap row is inserted).
526 After calling `enable` for a particular combination, any new records
527 for ``self.element`` that are inserted will automatically be
528 accompanied by overlap records (via calls to `insert` made
529 by `TableDimensionRecordStorage` methods).
530 """
531 # Because we're essentially materializing a view in Python, we
532 # aggressively lock all tables we're reading and writing in order to be
533 # sure nothing gets out of sync. This may not be the most efficient
534 # approach possible, but we'll focus on correct before we focus on
535 # fast, and enabling a new overlap combination should be a very rare
536 # operation anyway, and never one we do in parallel.
537 with self._db.transaction(lock=[self._governor.table, storage._table,
538 self._summaryTable, self._overlapTable]):
539 result, inserted = self._db.sync(
540 self._summaryTable,
541 keys={
542 "skypix_system": skypix.system.name,
543 "skypix_level": skypix.level,
544 self._governor.element.name: governorValue,
545 },
546 )
547 if inserted: 547 ↛ 557line 547 didn't jump to line 557, because the condition on line 547 was never false
548 _LOG.debug(
549 "Precomputing initial overlaps for %s vs %s for %s=%s",
550 skypix.name,
551 self.element.name,
552 self._governor.element.name,
553 governorValue
554 )
555 self._fill(storage=storage, skypix=skypix, governorValue=governorValue)
556 else:
557 _LOG.debug(
558 "Overlaps already precomputed for %s vs %s for %s=%s",
559 skypix.name,
560 self.element.name,
561 self._governor.element.name,
562 governorValue
563 )
565 def _fill(
566 self, *,
567 storage: TableDimensionRecordStorage,
568 skypix: SkyPixDimension,
569 governorValue: str,
570 ) -> None:
571 """Insert overlap records for a newly-enabled combination of skypix
572 dimension and governor value.
574 This method should only be called by `enable`.
576 Parameters
577 ----------
578 storage : `TableDimensionRecordStorage`
579 Storage object for the records of ``self.element``.
580 skypix : `SkyPixDimension`
581 The skypix dimension (system and level) for which overlaps should
582 be materialized.
583 governorValue : `str`
584 Value of this element's governor dimension for which overlaps
585 should be materialized. For example, if ``self.element`` is
586 ``visit``, this is an instrument name; if ``self.element`` is
587 ``patch``, this is a skymap name.
588 """
589 overlapRecords: List[dict] = []
590 # `DimensionRecordStorage.fetch` as defined by the ABC expects to be
591 # given iterables of data IDs that correspond to that element's graph
592 # (e.g. {instrument, visit, detector}), not just some subset of it
593 # (e.g. {instrument}). But we know the implementation of `fetch` for
594 # `TableDimensionRecordStorage will use this iterable to do exactly
595 # what we want.
596 governorDataId = DataCoordinate.standardize({self._governor.element.name: governorValue},
597 graph=self._governor.element.graph)
598 for record in storage.fetch(DataCoordinateIterable.fromScalar(governorDataId)): 598 ↛ 599line 598 didn't jump to line 599, because the loop on line 598 never started
599 if record.region is None:
600 continue
601 baseOverlapRecord = record.dataId.byName()
602 baseOverlapRecord["skypix_system"] = skypix.system.name
603 baseOverlapRecord["skypix_level"] = skypix.level
604 for begin, end in skypix.pixelization.envelope(record.region):
605 overlapRecords.extend(
606 dict(baseOverlapRecord, skypix_index=index) for index in range(begin, end)
607 )
608 _LOG.debug(
609 "Inserting %d initial overlap rows for %s vs %s for %s=%r",
610 len(overlapRecords),
611 skypix.name,
612 self.element.name,
613 self._governor.element.name,
614 governorValue,
615 )
616 self._db.insert(self._overlapTable, *overlapRecords)
618 def insert(self, records: Sequence[DimensionRecord], replace: bool = False) -> None:
619 """Insert overlaps for a sequence of ``self.element`` records that
620 have just been inserted.
622 This must be called by any method that inserts records for that
623 element (i.e. `TableDimensionRecordStorage.insert` and
624 `TableDimensionRecordStorage.sync`), within the same transaction.
626 Parameters
627 ----------
628 records : `Sequence` [ `DimensionRecord` ]
629 Records for ``self.element``. Records with `None` regions are
630 ignored.
631 replace : `bool`, optional
632 If `True` (`False` is default) one or more of the given records may
633 already exist and is being updated, so we need to delete any
634 existing overlap records first.
635 """
636 # Group records by family.governor value.
637 grouped: Dict[str, List[DimensionRecord]] = defaultdict(list)
638 for record in records:
639 grouped[getattr(record, self._governor.element.name)].append(record)
640 _LOG.debug(
641 "Precomputing new skypix overlaps for %s where %s in %s.",
642 self.element.name, self._governor.element.name, grouped.keys()
643 )
644 # Make sure the set of combinations to materialize does not change
645 # while we are materializing the ones we have, by locking the summary
646 # table. Because we aren't planning to write to the summary table,
647 # this could just be a SHARED lock instead of an EXCLUSIVE one, but
648 # there's no API for that right now.
649 with self._db.transaction(lock=[self._summaryTable]):
650 # Query for the skypix dimensions to be associated with each
651 # governor value.
652 gvCol = self._summaryTable.columns[self._governor.element.name]
653 sysCol = self._summaryTable.columns.skypix_system
654 lvlCol = self._summaryTable.columns.skypix_level
655 query = sqlalchemy.sql.select(
656 gvCol, sysCol, lvlCol,
657 ).select_from(
658 self._summaryTable
659 ).where(
660 gvCol.in_(list(grouped.keys()))
661 )
662 # Group results by governor value, then skypix system.
663 skypix: Dict[str, NamedKeyDict[SkyPixSystem, List[int]]] = {
664 gv: NamedKeyDict() for gv in grouped.keys()
665 }
666 for summaryRow in self._db.query(query).mappings():
667 system = self.element.universe.skypix[summaryRow[sysCol]]
668 skypix[summaryRow[gvCol]].setdefault(system, []).append(summaryRow[lvlCol])
669 if replace:
670 # Construct constraints for a DELETE query as a list of dicts.
671 # We include the skypix_system and skypix_level column values
672 # explicitly instead of just letting the query search for all
673 # of those related to the given records, because they are the
674 # first columns in the primary key, and hence searching with
675 # them will be way faster (and we don't want to add a new index
676 # just for this operation).
677 to_delete: List[Dict[str, Any]] = []
678 for gv, skypix_systems in skypix.items():
679 for system, skypix_levels in skypix_systems.items():
680 to_delete.extend(
681 {"skypix_system": system.name, "skypix_level": level, **record.dataId.byName()}
682 for record, level in itertools.product(grouped[gv], skypix_levels)
683 )
684 self._db.delete(
685 self._overlapTable,
686 ["skypix_system", "skypix_level"] + list(self.element.graph.required.names),
687 *to_delete,
688 )
689 overlapRecords: List[dict] = []
690 # Compute overlaps for one governor value at a time, but gather
691 # them all up for one insert.
692 for gv, group in grouped.items():
693 overlapRecords.extend(self._compute(group, skypix[gv], gv))
694 _LOG.debug(
695 "Inserting %d new skypix overlap rows for %s where %s in %s.",
696 len(overlapRecords), self.element.name, self._governor.element.name, grouped.keys()
697 )
698 self._db.insert(self._overlapTable, *overlapRecords)
700 def _compute(
701 self,
702 records: Sequence[DimensionRecord],
703 skypix: NamedKeyDict[SkyPixSystem, List[int]],
704 governorValue: str,
705 ) -> Iterator[dict]:
706 """Compute all overlap rows for a particular governor dimension value
707 and all of the skypix dimensions for which its overlaps are enabled.
709 This method should only be called by `insert`.
711 Parameters
712 ----------
713 records : `Sequence` [ `DimensionRecord` ]
714 Records for ``self.element``. Records with `None` regions are
715 ignored. All must have the governor value given.
716 skypix : `NamedKeyDict` [ `SkyPixSystem`, `list` [ `int` ] ]
717 Mapping containing all skypix systems and levels for which overlaps
718 should be computed, grouped by `SkyPixSystem`.
719 governorValue : `str`
720 Value of this element's governor dimension for which overlaps
721 should be computed. For example, if ``self.element`` is ``visit``,
722 this is an instrument name; if ``self.element`` is ``patch``, this
723 is a skymap name.
725 Yields
726 ------
727 row : `dict`
728 Dictionary representing an overlap row.
729 """
730 # Process input records one at time, computing all skypix indices for
731 # each.
732 for record in records:
733 if record.region is None:
734 continue
735 assert getattr(record, self._governor.element.name) == governorValue
736 for system, levels in skypix.items():
737 if not levels: 737 ↛ 738line 737 didn't jump to line 738, because the condition on line 737 was never true
738 continue
739 baseOverlapRecord = record.dataId.byName()
740 baseOverlapRecord["skypix_system"] = system.name
741 levels.sort(reverse=True)
742 # Start with the first level, which is the finest-grained one.
743 # Compute skypix envelope indices directly for that.
744 indices: Dict[int, Set[int]] = {levels[0]: set()}
745 for begin, end in system[levels[0]].pixelization.envelope(record.region):
746 indices[levels[0]].update(range(begin, end))
747 # Divide those indices by powers of 4 (and remove duplicates)
748 # work our way up to the last (coarsest) level.
749 for lastLevel, nextLevel in zip(levels[:-1], levels[1:]): 749 ↛ 750line 749 didn't jump to line 750, because the loop on line 749 never started
750 factor = 4**(lastLevel - nextLevel)
751 indices[nextLevel] = {index//factor for index in indices[lastLevel]}
752 for level in levels:
753 yield from (
754 {
755 "skypix_level": level,
756 "skypix_index": index,
757 **baseOverlapRecord, # type: ignore
758 } for index in indices[level]
759 )
761 def select(
762 self,
763 skypix: SkyPixDimension,
764 governorValues: Union[AbstractSet[str], EllipsisType],
765 ) -> sqlalchemy.sql.FromClause:
766 """Construct a subquery expression containing overlaps between the
767 given skypix dimension and governor values.
769 Parameters
770 ----------
771 skypix : `SkyPixDimension`
772 The skypix dimension (system and level) for which overlaps should
773 be materialized.
774 governorValues : `str`
775 Values of this element's governor dimension for which overlaps
776 should be returned. For example, if ``self.element`` is ``visit``,
777 this is a set of instrument names; if ``self.element`` is
778 ``patch``, this is a set of skymap names. If ``...`` all values
779 in the database are used (`GovernorDimensionRecordStorage.values`).
781 Returns
782 -------
783 subquery : `sqlalchemy.sql.FromClause`
784 A SELECT query with an alias, intended for use as a subquery, with
785 columns equal to ``self.element.required.names`` + ``skypix.name``.
786 """
787 if skypix != self.element.universe.commonSkyPix: 787 ↛ 792line 787 didn't jump to line 792
788 # We guarantee elsewhere that we always materialize all overlaps
789 # vs. commonSkyPix, but for everything else, we need to check that
790 # we have materialized this combination of governor values and
791 # skypix.
792 summaryWhere = [
793 self._summaryTable.columns.skypix_system == skypix.system.name,
794 self._summaryTable.columns.skypix_level == skypix.level,
795 ]
796 gvCol = self._summaryTable.columns[self._governor.element.name]
797 if governorValues is not Ellipsis:
798 summaryWhere.append(gvCol.in_(list(governorValues)))
799 summaryQuery = sqlalchemy.sql.select(
800 gvCol
801 ).select_from(
802 self._summaryTable
803 ).where(
804 sqlalchemy.sql.and_(*summaryWhere)
805 )
806 materializedGovernorValues = {row._mapping[gvCol] for row in self._db.query(summaryQuery)}
807 if governorValues is Ellipsis:
808 missingGovernorValues = self._governor.values - materializedGovernorValues
809 else:
810 missingGovernorValues = governorValues - materializedGovernorValues
811 if missingGovernorValues:
812 raise RuntimeError(
813 f"Query requires an overlap join between {skypix.name} and {self.element.name} "
814 f"(for {self._governor.element.name} in {missingGovernorValues}), but these "
815 f"have not been materialized."
816 )
817 columns = [self._overlapTable.columns.skypix_index.label(skypix.name)]
818 columns.extend(self._overlapTable.columns[name] for name in self.element.graph.required.names)
819 overlapWhere = [
820 self._overlapTable.columns.skypix_system == skypix.system.name,
821 self._overlapTable.columns.skypix_level == skypix.level,
822 ]
823 if governorValues is not Ellipsis: 823 ↛ 824line 823 didn't jump to line 824, because the condition on line 823 was never true
824 overlapWhere.append(
825 self._overlapTable.columns[self._governor.element.name].in_(list(governorValues))
826 )
827 overlapQuery = sqlalchemy.sql.select(
828 *columns
829 ).select_from(
830 self._overlapTable
831 ).where(
832 sqlalchemy.sql.and_(*overlapWhere)
833 )
834 return overlapQuery.alias(f"{self.element.name}_{skypix.name}_overlap")
836 def digestTables(self) -> Iterable[sqlalchemy.schema.Table]:
837 """Return tables used for schema digest.
839 Returns
840 -------
841 tables : `Iterable` [ `sqlalchemy.schema.Table` ]
842 Possibly empty set of tables for schema digest calculations.
843 """
844 return [self._summaryTable, self._overlapTable]