Coverage for python/lsst/daf/butler/registry/dimensions/table.py: 85%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ["TableDimensionRecordStorage"]
25import itertools
26import logging
27import warnings
28from collections import defaultdict
29from typing import AbstractSet, Any, Dict, Iterable, Iterator, List, Mapping, Optional, Sequence, Set, Union
31import sqlalchemy
33from ...core import (
34 DatabaseDimensionElement,
35 DataCoordinate,
36 DataCoordinateIterable,
37 DimensionElement,
38 DimensionRecord,
39 GovernorDimension,
40 NamedKeyDict,
41 NamedKeyMapping,
42 NamedValueSet,
43 SimpleQuery,
44 SkyPixDimension,
45 SkyPixSystem,
46 SpatialRegionDatabaseRepresentation,
47 TimespanDatabaseRepresentation,
48 addDimensionForeignKey,
49 ddl,
50)
51from ..interfaces import (
52 Database,
53 DatabaseDimensionOverlapStorage,
54 DatabaseDimensionRecordStorage,
55 GovernorDimensionRecordStorage,
56 StaticTablesContext,
57)
58from ..queries import QueryBuilder
59from ..wildcards import Ellipsis, EllipsisType
61_LOG = logging.getLogger(__name__)
64MAX_FETCH_CHUNK = 1000
65"""Maximum number of data IDs we fetch records at a time.
67Barring something database-engine-specific, this sets the size of the actual
68SQL query, not just the number of result rows, because the only way to query
69for multiple data IDs in a single SELECT query via SQLAlchemy is to have an OR
70term in the WHERE clause for each one.
71"""
74class TableDimensionRecordStorage(DatabaseDimensionRecordStorage):
75 """A record storage implementation uses a regular database table.
77 Parameters
78 ----------
79 db : `Database`
80 Interface to the database engine and namespace that will hold these
81 dimension records.
82 element : `DatabaseDimensionElement`
83 The element whose records this storage will manage.
84 table : `sqlalchemy.schema.Table`
85 The logical table for the element.
86 skyPixOverlap : `_SkyPixOverlapStorage`, optional
87 Object that manages the tables that hold materialized spatial overlap
88 joins to skypix dimensions. Should be `None` if (and only if)
89 ``element.spatial is None``.
90 """
92 def __init__(
93 self,
94 db: Database,
95 element: DatabaseDimensionElement,
96 *,
97 table: sqlalchemy.schema.Table,
98 skyPixOverlap: Optional[_SkyPixOverlapStorage] = None,
99 ):
100 self._db = db
101 self._table = table
102 self._element = element
103 self._fetchColumns: Dict[str, sqlalchemy.sql.ColumnElement] = {
104 dimension.name: self._table.columns[name]
105 for dimension, name in zip(
106 self._element.dimensions, self._element.RecordClass.fields.dimensions.names
107 )
108 }
109 self._skyPixOverlap = skyPixOverlap
110 self._otherOverlaps: List[DatabaseDimensionOverlapStorage] = []
112 @classmethod
113 def initialize(
114 cls,
115 db: Database,
116 element: DatabaseDimensionElement,
117 *,
118 context: Optional[StaticTablesContext] = None,
119 config: Mapping[str, Any],
120 governors: NamedKeyMapping[GovernorDimension, GovernorDimensionRecordStorage],
121 ) -> DatabaseDimensionRecordStorage:
122 # Docstring inherited from DatabaseDimensionRecordStorage.
123 spec = element.RecordClass.fields.makeTableSpec(
124 RegionReprClass=db.getSpatialRegionRepresentation(),
125 TimespanReprClass=db.getTimespanRepresentation(),
126 )
127 if context is not None: 127 ↛ 130line 127 didn't jump to line 130, because the condition on line 127 was never false
128 table = context.addTable(element.name, spec)
129 else:
130 table = db.ensureTableExists(element.name, spec)
131 skyPixOverlap: Optional[_SkyPixOverlapStorage]
132 if element.spatial is not None:
133 governor = governors[element.spatial.governor]
134 skyPixOverlap = _SkyPixOverlapStorage.initialize(
135 db,
136 element,
137 context=context,
138 governor=governor,
139 )
140 result = cls(db, element, table=table, skyPixOverlap=skyPixOverlap)
142 # Whenever anyone inserts a new governor dimension value, we want
143 # to enable overlaps for that value between this element and
144 # commonSkyPix.
145 def callback(record: DimensionRecord) -> None:
146 skyPixOverlap.enable( # type: ignore
147 result,
148 element.universe.commonSkyPix,
149 getattr(record, element.spatial.governor.primaryKey.name), # type: ignore
150 )
152 governor.registerInsertionListener(callback)
153 return result
154 else:
155 return cls(db, element, table=table)
157 @property
158 def element(self) -> DatabaseDimensionElement:
159 # Docstring inherited from DimensionRecordStorage.element.
160 return self._element
162 def clearCaches(self) -> None:
163 # Docstring inherited from DimensionRecordStorage.clearCaches.
164 pass
166 def join(
167 self,
168 builder: QueryBuilder,
169 *,
170 regions: Optional[NamedKeyDict[DimensionElement, SpatialRegionDatabaseRepresentation]] = None,
171 timespans: Optional[NamedKeyDict[DimensionElement, TimespanDatabaseRepresentation]] = None,
172 ) -> None:
173 # Docstring inherited from DimensionRecordStorage.
174 if regions is not None:
175 dimensions = NamedValueSet(self.element.required)
176 dimensions.add(self.element.universe.commonSkyPix)
177 assert self._skyPixOverlap is not None
178 builder.joinTable(
179 self._skyPixOverlap.select(self.element.universe.commonSkyPix, Ellipsis),
180 dimensions,
181 )
182 regionsInTable = self._db.getSpatialRegionRepresentation().fromSelectable(self._table)
183 regions[self.element] = regionsInTable
184 joinOn = builder.startJoin(
185 self._table, self.element.dimensions, self.element.RecordClass.fields.dimensions.names
186 )
187 if timespans is not None:
188 timespanInTable = self._db.getTimespanRepresentation().fromSelectable(self._table)
189 for timespanInQuery in timespans.values(): 189 ↛ 190line 189 didn't jump to line 190, because the loop on line 189 never started
190 joinOn.append(timespanInQuery.overlaps(timespanInTable))
191 timespans[self.element] = timespanInTable
192 builder.finishJoin(self._table, joinOn)
193 return self._table
195 def fetch(self, dataIds: DataCoordinateIterable) -> Iterable[DimensionRecord]:
196 # Docstring inherited from DimensionRecordStorage.fetch.
197 RecordClass = self.element.RecordClass
198 query = SimpleQuery()
199 query.columns.extend(self._table.columns[name] for name in RecordClass.fields.standard.names)
200 if self.element.spatial is not None:
201 query.columns.append(self._table.columns["region"])
202 if self.element.temporal is not None:
203 TimespanReprClass = self._db.getTimespanRepresentation()
204 query.columns.extend(self._table.columns[name] for name in TimespanReprClass.getFieldNames())
205 query.join(self._table)
206 dataIds.constrain(query, lambda name: self._fetchColumns[name])
207 with warnings.catch_warnings():
208 # Some of our generated queries may contain cartesian joins, this
209 # is not a serious issue as it is properly constrained, so we want
210 # to suppress sqlalchemy warnings.
211 warnings.filterwarnings(
212 "ignore",
213 message="SELECT statement has a cartesian product",
214 category=sqlalchemy.exc.SAWarning,
215 )
216 for row in self._db.query(query.combine()):
217 values = row._asdict()
218 if self.element.temporal is not None:
219 values[TimespanDatabaseRepresentation.NAME] = TimespanReprClass.extract(values)
220 yield RecordClass(**values)
222 def insert(self, *records: DimensionRecord, replace: bool = False) -> None:
223 # Docstring inherited from DimensionRecordStorage.insert.
224 elementRows = [record.toDict() for record in records]
225 if self.element.temporal is not None:
226 TimespanReprClass = self._db.getTimespanRepresentation()
227 for row in elementRows:
228 timespan = row.pop(TimespanDatabaseRepresentation.NAME)
229 TimespanReprClass.update(timespan, result=row)
230 with self._db.transaction():
231 if replace:
232 self._db.replace(self._table, *elementRows)
233 else:
234 self._db.insert(self._table, *elementRows)
235 if self._skyPixOverlap is not None:
236 self._skyPixOverlap.insert(records, replace=replace)
238 def sync(self, record: DimensionRecord, update: bool = False) -> Union[bool, Dict[str, Any]]:
239 # Docstring inherited from DimensionRecordStorage.sync.
240 compared = record.toDict()
241 keys = {}
242 for name in record.fields.required.names:
243 keys[name] = compared.pop(name)
244 if self.element.temporal is not None:
245 TimespanReprClass = self._db.getTimespanRepresentation()
246 timespan = compared.pop(TimespanDatabaseRepresentation.NAME)
247 TimespanReprClass.update(timespan, result=compared)
248 with self._db.transaction():
249 _, inserted_or_updated = self._db.sync(
250 self._table,
251 keys=keys,
252 compared=compared,
253 update=update,
254 )
255 if inserted_or_updated and self._skyPixOverlap is not None:
256 if inserted_or_updated is True:
257 # Inserted a new row, so we just need to insert new overlap
258 # rows.
259 self._skyPixOverlap.insert([record])
260 elif "region" in inserted_or_updated: # type: ignore 260 ↛ 267line 260 didn't jump to line 267, because the condition on line 260 was never false
261 # Updated the region, so we need to delete old overlap rows
262 # and insert new ones.
263 # (mypy should be able to tell that inserted_or_updated
264 # must be a dict if we get to this clause, but it can't)
265 self._skyPixOverlap.insert([record], replace=True)
266 # We updated something other than a region.
267 return inserted_or_updated
269 def digestTables(self) -> Iterable[sqlalchemy.schema.Table]:
270 # Docstring inherited from DimensionRecordStorage.digestTables.
271 result = [self._table]
272 if self._skyPixOverlap is not None:
273 result.extend(self._skyPixOverlap.digestTables())
274 return result
276 def connect(self, overlaps: DatabaseDimensionOverlapStorage) -> None:
277 # Docstring inherited from DatabaseDimensionRecordStorage.
278 self._otherOverlaps.append(overlaps)
281class _SkyPixOverlapStorage:
282 """A helper object for `TableDimensionRecordStorage` that manages its
283 materialized overlaps with skypix dimensions.
285 New instances should be constructed by calling `initialize`, not by calling
286 the constructor directly.
288 Parameters
289 ----------
290 db : `Database`
291 Interface to the underlying database engine and namespace.
292 element : `DatabaseDimensionElement`
293 Dimension element whose overlaps are to be managed.
294 summaryTable : `sqlalchemy.schema.Table`
295 Table that records which combinations of skypix dimensions and
296 governor dimension values have materialized overlap rows.
297 overlapTable : `sqlalchemy.schema.Table`
298 Table containing the actual materialized overlap rows.
299 governor : `GovernorDimensionRecordStorage`
300 Record storage backend for this element's governor dimension.
302 Notes
303 -----
304 This class (and most importantly, the tables it relies on) can in principle
305 manage overlaps between with any skypix dimension, but at present it is
306 only being used to manage relationships with the special ``commonSkyPix``
307 dimension, because that's all the query system uses. Eventually, we expect
308 to require users to explicitly materialize all relationships they will
309 want to use in queries.
311 Other possible future improvements include:
313 - allowing finer-grained skypix dimensions to provide overlap rows for
314 coarser ones, by dividing indices by powers of 4 (and possibly doing
315 ``SELECT DISTINCT`` in the subquery to remove duplicates);
317 - allowing finer-grained database elements (e.g. patch) to provide overlap
318 rows for coarser ones (e.g. tract), by ignoring irrelevant columns
319 (e.g. the patch IDs) in the subquery (again, possible with
320 ``SELECT DISTINCT``).
322 But there's no point to doing any of that until the query system can
323 figure out how best to ask for overlap rows when an exact match isn't
324 available.
325 """
327 def __init__(
328 self,
329 db: Database,
330 element: DatabaseDimensionElement,
331 summaryTable: sqlalchemy.schema.Table,
332 overlapTable: sqlalchemy.schema.Table,
333 governor: GovernorDimensionRecordStorage,
334 ):
335 self._db = db
336 self.element = element
337 assert element.spatial is not None
338 self._summaryTable = summaryTable
339 self._overlapTable = overlapTable
340 self._governor = governor
342 @classmethod
343 def initialize(
344 cls,
345 db: Database,
346 element: DatabaseDimensionElement,
347 *,
348 context: Optional[StaticTablesContext],
349 governor: GovernorDimensionRecordStorage,
350 ) -> _SkyPixOverlapStorage:
351 """Construct a new instance, creating tables as needed.
353 Parameters
354 ----------
355 db : `Database`
356 Interface to the underlying database engine and namespace.
357 element : `DatabaseDimensionElement`
358 Dimension element whose overlaps are to be managed.
359 context : `StaticTablesContext`, optional
360 If provided, an object to use to create any new tables. If not
361 provided, ``db.ensureTableExists`` should be used instead.
362 governor : `GovernorDimensionRecordStorage`
363 Record storage backend for this element's governor dimension.
364 """
365 if context is not None: 365 ↛ 368line 365 didn't jump to line 368, because the condition on line 365 was never false
366 op = context.addTable
367 else:
368 op = db.ensureTableExists
369 summaryTable = op(
370 cls._SUMMARY_TABLE_NAME_SPEC.format(element=element),
371 cls._makeSummaryTableSpec(element),
372 )
373 overlapTable = op(
374 cls._OVERLAP_TABLE_NAME_SPEC.format(element=element),
375 cls._makeOverlapTableSpec(element),
376 )
377 return _SkyPixOverlapStorage(
378 db, element, summaryTable=summaryTable, overlapTable=overlapTable, governor=governor
379 )
381 _SUMMARY_TABLE_NAME_SPEC = "{element.name}_skypix_overlap_summary"
383 @classmethod
384 def _makeSummaryTableSpec(cls, element: DatabaseDimensionElement) -> ddl.TableSpec:
385 """Create a specification for the table that records which combinations
386 of skypix dimension and governor value have materialized overlaps.
388 Parameters
389 ----------
390 element : `DatabaseDimensionElement`
391 Dimension element whose overlaps are to be managed.
393 Returns
394 -------
395 tableSpec : `ddl.TableSpec`
396 Table specification.
397 """
398 assert element.spatial is not None
399 tableSpec = ddl.TableSpec(
400 fields=[
401 ddl.FieldSpec(
402 name="skypix_system",
403 dtype=sqlalchemy.String,
404 length=16,
405 nullable=False,
406 primaryKey=True,
407 ),
408 ddl.FieldSpec(
409 name="skypix_level",
410 dtype=sqlalchemy.SmallInteger,
411 nullable=False,
412 primaryKey=True,
413 ),
414 ]
415 )
416 addDimensionForeignKey(tableSpec, element.spatial.governor, primaryKey=True)
417 return tableSpec
419 _OVERLAP_TABLE_NAME_SPEC = "{element.name}_skypix_overlap"
421 @classmethod
422 def _makeOverlapTableSpec(cls, element: DatabaseDimensionElement) -> ddl.TableSpec:
423 """Create a specification for the table that holds materialized
424 overlap rows.
426 Parameters
427 ----------
428 element : `DatabaseDimensionElement`
429 Dimension element whose overlaps are to be managed.
431 Returns
432 -------
433 tableSpec : `ddl.TableSpec`
434 Table specification.
435 """
436 assert element.spatial is not None
437 tableSpec = ddl.TableSpec(
438 fields=[
439 ddl.FieldSpec(
440 name="skypix_system",
441 dtype=sqlalchemy.String,
442 length=16,
443 nullable=False,
444 primaryKey=True,
445 ),
446 ddl.FieldSpec(
447 name="skypix_level",
448 dtype=sqlalchemy.SmallInteger,
449 nullable=False,
450 primaryKey=True,
451 ),
452 # (more columns added below)
453 ],
454 unique=set(),
455 indexes={
456 # This index has the same fields as the PK, in a different
457 # order, to facilitate queries that know skypix_index and want
458 # to find the other element.
459 (
460 "skypix_system",
461 "skypix_level",
462 "skypix_index",
463 )
464 + tuple(element.graph.required.names),
465 },
466 foreignKeys=[
467 # Foreign key to summary table. This makes sure we don't
468 # materialize any overlaps without remembering that we've done
469 # so in the summary table, though it can't prevent the converse
470 # of adding a summary row without adding overlap row (either of
471 # those is a logic bug, of course, but we want to be defensive
472 # about those). Using ON DELETE CASCADE, it'd be very easy to
473 # implement "disabling" an overlap materialization, because we
474 # can just delete the summary row.
475 # Note that the governor dimension column is added below, in
476 # the call to addDimensionForeignKey.
477 ddl.ForeignKeySpec(
478 cls._SUMMARY_TABLE_NAME_SPEC.format(element=element),
479 source=("skypix_system", "skypix_level", element.spatial.governor.name),
480 target=("skypix_system", "skypix_level", element.spatial.governor.name),
481 onDelete="CASCADE",
482 ),
483 ],
484 )
485 # Add fields for the standard element this class manages overlaps for.
486 # This is guaranteed to add a column for the governor dimension,
487 # because that's a required dependency of element.
488 for dimension in element.required:
489 addDimensionForeignKey(tableSpec, dimension, primaryKey=True)
490 # Add field for the actual skypix index. We do this later because I
491 # think we care (at least a bit) about the order in which the primary
492 # key is defined, in that we want a non-summary column like this one
493 # to appear after the governor dimension column.
494 tableSpec.fields.add(
495 ddl.FieldSpec(
496 name="skypix_index",
497 dtype=sqlalchemy.BigInteger,
498 nullable=False,
499 primaryKey=True,
500 )
501 )
502 return tableSpec
504 def enable(
505 self,
506 storage: TableDimensionRecordStorage,
507 skypix: SkyPixDimension,
508 governorValue: str,
509 ) -> None:
510 """Enable materialization of overlaps between a skypix dimension
511 and the records of ``self.element`` with a particular governor value.
513 Parameters
514 ----------
515 storage : `TableDimensionRecordStorage`
516 Storage object for the records of ``self.element``.
517 skypix : `SkyPixDimension`
518 The skypix dimension (system and level) for which overlaps should
519 be materialized.
520 governorValue : `str`
521 Value of this element's governor dimension for which overlaps
522 should be materialized. For example, if ``self.element`` is
523 ``visit``, this is an instrument name; if ``self.element`` is
524 ``patch``, this is a skymap name.
526 Notes
527 -----
528 If there are existing rows for the given ``governorValue``, overlap
529 rows for them will be immediately computed and inserted. At present,
530 that never happens, because we only enable overlaps with
531 `DimensionUniverse.commonSkyPix`, and that happens immediately after
532 each governor row is inserted (and there can't be any patch rows,
533 for example, until after the corresponding skymap row is inserted).
535 After calling `enable` for a particular combination, any new records
536 for ``self.element`` that are inserted will automatically be
537 accompanied by overlap records (via calls to `insert` made
538 by `TableDimensionRecordStorage` methods).
539 """
540 # Because we're essentially materializing a view in Python, we
541 # aggressively lock all tables we're reading and writing in order to be
542 # sure nothing gets out of sync. This may not be the most efficient
543 # approach possible, but we'll focus on correct before we focus on
544 # fast, and enabling a new overlap combination should be a very rare
545 # operation anyway, and never one we do in parallel.
546 with self._db.transaction(
547 lock=[self._governor.table, storage._table, self._summaryTable, self._overlapTable]
548 ):
549 result, inserted = self._db.sync(
550 self._summaryTable,
551 keys={
552 "skypix_system": skypix.system.name,
553 "skypix_level": skypix.level,
554 self._governor.element.name: governorValue,
555 },
556 )
557 if inserted: 557 ↛ 567line 557 didn't jump to line 567, because the condition on line 557 was never false
558 _LOG.debug(
559 "Precomputing initial overlaps for %s vs %s for %s=%s",
560 skypix.name,
561 self.element.name,
562 self._governor.element.name,
563 governorValue,
564 )
565 self._fill(storage=storage, skypix=skypix, governorValue=governorValue)
566 else:
567 _LOG.debug(
568 "Overlaps already precomputed for %s vs %s for %s=%s",
569 skypix.name,
570 self.element.name,
571 self._governor.element.name,
572 governorValue,
573 )
575 def _fill(
576 self,
577 *,
578 storage: TableDimensionRecordStorage,
579 skypix: SkyPixDimension,
580 governorValue: str,
581 ) -> None:
582 """Insert overlap records for a newly-enabled combination of skypix
583 dimension and governor value.
585 This method should only be called by `enable`.
587 Parameters
588 ----------
589 storage : `TableDimensionRecordStorage`
590 Storage object for the records of ``self.element``.
591 skypix : `SkyPixDimension`
592 The skypix dimension (system and level) for which overlaps should
593 be materialized.
594 governorValue : `str`
595 Value of this element's governor dimension for which overlaps
596 should be materialized. For example, if ``self.element`` is
597 ``visit``, this is an instrument name; if ``self.element`` is
598 ``patch``, this is a skymap name.
599 """
600 overlapRecords: List[dict] = []
601 # `DimensionRecordStorage.fetch` as defined by the ABC expects to be
602 # given iterables of data IDs that correspond to that element's graph
603 # (e.g. {instrument, visit, detector}), not just some subset of it
604 # (e.g. {instrument}). But we know the implementation of `fetch` for
605 # `TableDimensionRecordStorage will use this iterable to do exactly
606 # what we want.
607 governorDataId = DataCoordinate.standardize(
608 {self._governor.element.name: governorValue}, graph=self._governor.element.graph
609 )
610 for record in storage.fetch(DataCoordinateIterable.fromScalar(governorDataId)): 610 ↛ 611line 610 didn't jump to line 611, because the loop on line 610 never started
611 if record.region is None:
612 continue
613 baseOverlapRecord = record.dataId.byName()
614 baseOverlapRecord["skypix_system"] = skypix.system.name
615 baseOverlapRecord["skypix_level"] = skypix.level
616 for begin, end in skypix.pixelization.envelope(record.region):
617 overlapRecords.extend(
618 dict(baseOverlapRecord, skypix_index=index) for index in range(begin, end)
619 )
620 _LOG.debug(
621 "Inserting %d initial overlap rows for %s vs %s for %s=%r",
622 len(overlapRecords),
623 skypix.name,
624 self.element.name,
625 self._governor.element.name,
626 governorValue,
627 )
628 self._db.insert(self._overlapTable, *overlapRecords)
630 def insert(self, records: Sequence[DimensionRecord], replace: bool = False) -> None:
631 """Insert overlaps for a sequence of ``self.element`` records that
632 have just been inserted.
634 This must be called by any method that inserts records for that
635 element (i.e. `TableDimensionRecordStorage.insert` and
636 `TableDimensionRecordStorage.sync`), within the same transaction.
638 Parameters
639 ----------
640 records : `Sequence` [ `DimensionRecord` ]
641 Records for ``self.element``. Records with `None` regions are
642 ignored.
643 replace : `bool`, optional
644 If `True` (`False` is default) one or more of the given records may
645 already exist and is being updated, so we need to delete any
646 existing overlap records first.
647 """
648 # Group records by family.governor value.
649 grouped: Dict[str, List[DimensionRecord]] = defaultdict(list)
650 for record in records:
651 grouped[getattr(record, self._governor.element.name)].append(record)
652 _LOG.debug(
653 "Precomputing new skypix overlaps for %s where %s in %s.",
654 self.element.name,
655 self._governor.element.name,
656 grouped.keys(),
657 )
658 # Make sure the set of combinations to materialize does not change
659 # while we are materializing the ones we have, by locking the summary
660 # table. Because we aren't planning to write to the summary table,
661 # this could just be a SHARED lock instead of an EXCLUSIVE one, but
662 # there's no API for that right now.
663 with self._db.transaction(lock=[self._summaryTable]):
664 # Query for the skypix dimensions to be associated with each
665 # governor value.
666 gvCol = self._summaryTable.columns[self._governor.element.name]
667 sysCol = self._summaryTable.columns.skypix_system
668 lvlCol = self._summaryTable.columns.skypix_level
669 query = (
670 sqlalchemy.sql.select(
671 gvCol,
672 sysCol,
673 lvlCol,
674 )
675 .select_from(self._summaryTable)
676 .where(gvCol.in_(list(grouped.keys())))
677 )
678 # Group results by governor value, then skypix system.
679 skypix: Dict[str, NamedKeyDict[SkyPixSystem, List[int]]] = {
680 gv: NamedKeyDict() for gv in grouped.keys()
681 }
682 for summaryRow in self._db.query(query).mappings():
683 system = self.element.universe.skypix[summaryRow[sysCol]]
684 skypix[summaryRow[gvCol]].setdefault(system, []).append(summaryRow[lvlCol])
685 if replace:
686 # Construct constraints for a DELETE query as a list of dicts.
687 # We include the skypix_system and skypix_level column values
688 # explicitly instead of just letting the query search for all
689 # of those related to the given records, because they are the
690 # first columns in the primary key, and hence searching with
691 # them will be way faster (and we don't want to add a new index
692 # just for this operation).
693 to_delete: List[Dict[str, Any]] = []
694 for gv, skypix_systems in skypix.items():
695 for system, skypix_levels in skypix_systems.items():
696 to_delete.extend(
697 {"skypix_system": system.name, "skypix_level": level, **record.dataId.byName()}
698 for record, level in itertools.product(grouped[gv], skypix_levels)
699 )
700 self._db.delete(
701 self._overlapTable,
702 ["skypix_system", "skypix_level"] + list(self.element.graph.required.names),
703 *to_delete,
704 )
705 overlapRecords: List[dict] = []
706 # Compute overlaps for one governor value at a time, but gather
707 # them all up for one insert.
708 for gv, group in grouped.items():
709 overlapRecords.extend(self._compute(group, skypix[gv], gv))
710 _LOG.debug(
711 "Inserting %d new skypix overlap rows for %s where %s in %s.",
712 len(overlapRecords),
713 self.element.name,
714 self._governor.element.name,
715 grouped.keys(),
716 )
717 self._db.insert(self._overlapTable, *overlapRecords)
719 def _compute(
720 self,
721 records: Sequence[DimensionRecord],
722 skypix: NamedKeyDict[SkyPixSystem, List[int]],
723 governorValue: str,
724 ) -> Iterator[dict]:
725 """Compute all overlap rows for a particular governor dimension value
726 and all of the skypix dimensions for which its overlaps are enabled.
728 This method should only be called by `insert`.
730 Parameters
731 ----------
732 records : `Sequence` [ `DimensionRecord` ]
733 Records for ``self.element``. Records with `None` regions are
734 ignored. All must have the governor value given.
735 skypix : `NamedKeyDict` [ `SkyPixSystem`, `list` [ `int` ] ]
736 Mapping containing all skypix systems and levels for which overlaps
737 should be computed, grouped by `SkyPixSystem`.
738 governorValue : `str`
739 Value of this element's governor dimension for which overlaps
740 should be computed. For example, if ``self.element`` is ``visit``,
741 this is an instrument name; if ``self.element`` is ``patch``, this
742 is a skymap name.
744 Yields
745 ------
746 row : `dict`
747 Dictionary representing an overlap row.
748 """
749 # Process input records one at time, computing all skypix indices for
750 # each.
751 for record in records:
752 if record.region is None:
753 continue
754 assert getattr(record, self._governor.element.name) == governorValue
755 for system, levels in skypix.items():
756 if not levels: 756 ↛ 757line 756 didn't jump to line 757, because the condition on line 756 was never true
757 continue
758 baseOverlapRecord = record.dataId.byName()
759 baseOverlapRecord["skypix_system"] = system.name
760 levels.sort(reverse=True)
761 # Start with the first level, which is the finest-grained one.
762 # Compute skypix envelope indices directly for that.
763 indices: Dict[int, Set[int]] = {levels[0]: set()}
764 for begin, end in system[levels[0]].pixelization.envelope(record.region):
765 indices[levels[0]].update(range(begin, end))
766 # Divide those indices by powers of 4 (and remove duplicates)
767 # work our way up to the last (coarsest) level.
768 for lastLevel, nextLevel in zip(levels[:-1], levels[1:]): 768 ↛ 769line 768 didn't jump to line 769, because the loop on line 768 never started
769 factor = 4 ** (lastLevel - nextLevel)
770 indices[nextLevel] = {index // factor for index in indices[lastLevel]}
771 for level in levels:
772 yield from (
773 {
774 "skypix_level": level,
775 "skypix_index": index,
776 **baseOverlapRecord, # type: ignore
777 }
778 for index in indices[level]
779 )
781 def select(
782 self,
783 skypix: SkyPixDimension,
784 governorValues: Union[AbstractSet[str], EllipsisType],
785 ) -> sqlalchemy.sql.FromClause:
786 """Construct a subquery expression containing overlaps between the
787 given skypix dimension and governor values.
789 Parameters
790 ----------
791 skypix : `SkyPixDimension`
792 The skypix dimension (system and level) for which overlaps should
793 be materialized.
794 governorValues : `str`
795 Values of this element's governor dimension for which overlaps
796 should be returned. For example, if ``self.element`` is ``visit``,
797 this is a set of instrument names; if ``self.element`` is
798 ``patch``, this is a set of skymap names. If ``...`` all values
799 in the database are used (`GovernorDimensionRecordStorage.values`).
801 Returns
802 -------
803 subquery : `sqlalchemy.sql.FromClause`
804 A SELECT query with an alias, intended for use as a subquery, with
805 columns equal to ``self.element.required.names`` + ``skypix.name``.
806 """
807 if skypix != self.element.universe.commonSkyPix: 807 ↛ 812line 807 didn't jump to line 812
808 # We guarantee elsewhere that we always materialize all overlaps
809 # vs. commonSkyPix, but for everything else, we need to check that
810 # we have materialized this combination of governor values and
811 # skypix.
812 summaryWhere = [
813 self._summaryTable.columns.skypix_system == skypix.system.name,
814 self._summaryTable.columns.skypix_level == skypix.level,
815 ]
816 gvCol = self._summaryTable.columns[self._governor.element.name]
817 if governorValues is not Ellipsis:
818 summaryWhere.append(gvCol.in_(list(governorValues)))
819 summaryQuery = (
820 sqlalchemy.sql.select(gvCol)
821 .select_from(self._summaryTable)
822 .where(sqlalchemy.sql.and_(*summaryWhere))
823 )
824 materializedGovernorValues = {row._mapping[gvCol] for row in self._db.query(summaryQuery)}
825 if governorValues is Ellipsis:
826 missingGovernorValues = self._governor.values - materializedGovernorValues
827 else:
828 missingGovernorValues = governorValues - materializedGovernorValues
829 if missingGovernorValues:
830 raise RuntimeError(
831 f"Query requires an overlap join between {skypix.name} and {self.element.name} "
832 f"(for {self._governor.element.name} in {missingGovernorValues}), but these "
833 f"have not been materialized."
834 )
835 columns = [self._overlapTable.columns.skypix_index.label(skypix.name)]
836 columns.extend(self._overlapTable.columns[name] for name in self.element.graph.required.names)
837 overlapWhere = [
838 self._overlapTable.columns.skypix_system == skypix.system.name,
839 self._overlapTable.columns.skypix_level == skypix.level,
840 ]
841 if governorValues is not Ellipsis: 841 ↛ 842line 841 didn't jump to line 842, because the condition on line 841 was never true
842 overlapWhere.append(
843 self._overlapTable.columns[self._governor.element.name].in_(list(governorValues))
844 )
845 overlapQuery = (
846 sqlalchemy.sql.select(*columns)
847 .select_from(self._overlapTable)
848 .where(sqlalchemy.sql.and_(*overlapWhere))
849 )
850 return overlapQuery.alias(f"{self.element.name}_{skypix.name}_overlap")
852 def digestTables(self) -> Iterable[sqlalchemy.schema.Table]:
853 """Return tables used for schema digest.
855 Returns
856 -------
857 tables : `Iterable` [ `sqlalchemy.schema.Table` ]
858 Possibly empty set of tables for schema digest calculations.
859 """
860 return [self._summaryTable, self._overlapTable]