Coverage for python/lsst/daf/butler/registry/dimensions/table.py: 85%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ["TableDimensionRecordStorage"]
25from collections import defaultdict
26import itertools
27import logging
28from typing import (
29 AbstractSet,
30 Any,
31 Dict,
32 Iterable,
33 Iterator,
34 List,
35 Mapping,
36 Optional,
37 Sequence,
38 Set,
39 Union,
40)
42import sqlalchemy
44from ...core import (
45 addDimensionForeignKey,
46 DatabaseDimensionElement,
47 DataCoordinate,
48 DataCoordinateIterable,
49 ddl,
50 DimensionElement,
51 DimensionRecord,
52 GovernorDimension,
53 NamedKeyDict,
54 NamedKeyMapping,
55 NamedValueSet,
56 SimpleQuery,
57 SkyPixDimension,
58 SkyPixSystem,
59 SpatialRegionDatabaseRepresentation,
60 TimespanDatabaseRepresentation,
61)
62from ..interfaces import (
63 Database,
64 DatabaseDimensionOverlapStorage,
65 DatabaseDimensionRecordStorage,
66 GovernorDimensionRecordStorage,
67 StaticTablesContext,
68)
69from ..queries import QueryBuilder
70from ..wildcards import Ellipsis, EllipsisType
73_LOG = logging.getLogger(__name__)
76MAX_FETCH_CHUNK = 1000
77"""Maximum number of data IDs we fetch records at a time.
79Barring something database-engine-specific, this sets the size of the actual
80SQL query, not just the number of result rows, because the only way to query
81for multiple data IDs in a single SELECT query via SQLAlchemy is to have an OR
82term in the WHERE clause for each one.
83"""
86class TableDimensionRecordStorage(DatabaseDimensionRecordStorage):
87 """A record storage implementation uses a regular database table.
89 Parameters
90 ----------
91 db : `Database`
92 Interface to the database engine and namespace that will hold these
93 dimension records.
94 element : `DatabaseDimensionElement`
95 The element whose records this storage will manage.
96 table : `sqlalchemy.schema.Table`
97 The logical table for the element.
98 skyPixOverlap : `_SkyPixOverlapStorage`, optional
99 Object that manages the tables that hold materialized spatial overlap
100 joins to skypix dimensions. Should be `None` if (and only if)
101 ``element.spatial is None``.
102 """
103 def __init__(self, db: Database, element: DatabaseDimensionElement, *, table: sqlalchemy.schema.Table,
104 skyPixOverlap: Optional[_SkyPixOverlapStorage] = None):
105 self._db = db
106 self._table = table
107 self._element = element
108 self._fetchColumns: Dict[str, sqlalchemy.sql.ColumnElement] = {
109 dimension.name: self._table.columns[name]
110 for dimension, name in zip(self._element.dimensions,
111 self._element.RecordClass.fields.dimensions.names)
112 }
113 self._skyPixOverlap = skyPixOverlap
114 self._otherOverlaps: List[DatabaseDimensionOverlapStorage] = []
116 @classmethod
117 def initialize(
118 cls,
119 db: Database,
120 element: DatabaseDimensionElement, *,
121 context: Optional[StaticTablesContext] = None,
122 config: Mapping[str, Any],
123 governors: NamedKeyMapping[GovernorDimension, GovernorDimensionRecordStorage],
124 ) -> DatabaseDimensionRecordStorage:
125 # Docstring inherited from DatabaseDimensionRecordStorage.
126 spec = element.RecordClass.fields.makeTableSpec(
127 RegionReprClass=db.getSpatialRegionRepresentation(),
128 TimespanReprClass=db.getTimespanRepresentation(),
129 )
130 if context is not None: 130 ↛ 133line 130 didn't jump to line 133, because the condition on line 130 was never false
131 table = context.addTable(element.name, spec)
132 else:
133 table = db.ensureTableExists(element.name, spec)
134 skyPixOverlap: Optional[_SkyPixOverlapStorage]
135 if element.spatial is not None:
136 governor = governors[element.spatial.governor]
137 skyPixOverlap = _SkyPixOverlapStorage.initialize(
138 db,
139 element,
140 context=context,
141 governor=governor,
142 )
143 result = cls(db, element, table=table, skyPixOverlap=skyPixOverlap)
145 # Whenever anyone inserts a new governor dimension value, we want
146 # to enable overlaps for that value between this element and
147 # commonSkyPix.
148 def callback(record: DimensionRecord) -> None:
149 skyPixOverlap.enable( # type: ignore
150 result,
151 element.universe.commonSkyPix,
152 getattr(record, element.spatial.governor.primaryKey.name), # type: ignore
153 )
155 governor.registerInsertionListener(callback)
156 return result
157 else:
158 return cls(db, element, table=table)
160 @property
161 def element(self) -> DatabaseDimensionElement:
162 # Docstring inherited from DimensionRecordStorage.element.
163 return self._element
165 def clearCaches(self) -> None:
166 # Docstring inherited from DimensionRecordStorage.clearCaches.
167 pass
169 def join(
170 self,
171 builder: QueryBuilder, *,
172 regions: Optional[NamedKeyDict[DimensionElement, SpatialRegionDatabaseRepresentation]] = None,
173 timespans: Optional[NamedKeyDict[DimensionElement, TimespanDatabaseRepresentation]] = None,
174 ) -> None:
175 # Docstring inherited from DimensionRecordStorage.
176 if regions is not None:
177 dimensions = NamedValueSet(self.element.required)
178 dimensions.add(self.element.universe.commonSkyPix)
179 assert self._skyPixOverlap is not None
180 builder.joinTable(
181 self._skyPixOverlap.select(self.element.universe.commonSkyPix, Ellipsis),
182 dimensions,
183 )
184 regionsInTable = self._db.getSpatialRegionRepresentation().fromSelectable(self._table)
185 regions[self.element] = regionsInTable
186 joinOn = builder.startJoin(self._table, self.element.dimensions,
187 self.element.RecordClass.fields.dimensions.names)
188 if timespans is not None:
189 timespanInTable = self._db.getTimespanRepresentation().fromSelectable(self._table)
190 for timespanInQuery in timespans.values(): 190 ↛ 191line 190 didn't jump to line 191, because the loop on line 190 never started
191 joinOn.append(timespanInQuery.overlaps(timespanInTable))
192 timespans[self.element] = timespanInTable
193 builder.finishJoin(self._table, joinOn)
194 return self._table
196 def fetch(self, dataIds: DataCoordinateIterable) -> Iterable[DimensionRecord]:
197 # Docstring inherited from DimensionRecordStorage.fetch.
198 RecordClass = self.element.RecordClass
199 query = SimpleQuery()
200 query.columns.extend(self._table.columns[name] for name in RecordClass.fields.standard.names)
201 if self.element.spatial is not None:
202 query.columns.append(self._table.columns["region"])
203 if self.element.temporal is not None:
204 TimespanReprClass = self._db.getTimespanRepresentation()
205 query.columns.extend(self._table.columns[name] for name in TimespanReprClass.getFieldNames())
206 query.join(self._table)
207 dataIds.constrain(query, lambda name: self._fetchColumns[name])
208 for row in self._db.query(query.combine()):
209 values = row._asdict()
210 if self.element.temporal is not None:
211 values[TimespanDatabaseRepresentation.NAME] = TimespanReprClass.extract(values)
212 yield RecordClass(**values)
214 def insert(self, *records: DimensionRecord, replace: bool = False) -> None:
215 # Docstring inherited from DimensionRecordStorage.insert.
216 elementRows = [record.toDict() for record in records]
217 if self.element.temporal is not None:
218 TimespanReprClass = self._db.getTimespanRepresentation()
219 for row in elementRows:
220 timespan = row.pop(TimespanDatabaseRepresentation.NAME)
221 TimespanReprClass.update(timespan, result=row)
222 with self._db.transaction():
223 if replace:
224 self._db.replace(self._table, *elementRows)
225 else:
226 self._db.insert(self._table, *elementRows)
227 if self._skyPixOverlap is not None:
228 self._skyPixOverlap.insert(records, replace=replace)
230 def sync(self, record: DimensionRecord, update: bool = False) -> Union[bool, Dict[str, Any]]:
231 # Docstring inherited from DimensionRecordStorage.sync.
232 compared = record.toDict()
233 keys = {}
234 for name in record.fields.required.names:
235 keys[name] = compared.pop(name)
236 if self.element.temporal is not None:
237 TimespanReprClass = self._db.getTimespanRepresentation()
238 timespan = compared.pop(TimespanDatabaseRepresentation.NAME)
239 TimespanReprClass.update(timespan, result=compared)
240 with self._db.transaction():
241 _, inserted_or_updated = self._db.sync(
242 self._table,
243 keys=keys,
244 compared=compared,
245 update=update,
246 )
247 if inserted_or_updated and self._skyPixOverlap is not None:
248 if inserted_or_updated is True:
249 # Inserted a new row, so we just need to insert new overlap
250 # rows.
251 self._skyPixOverlap.insert([record])
252 elif "region" in inserted_or_updated: # type: ignore 252 ↛ 259line 252 didn't jump to line 259, because the condition on line 252 was never false
253 # Updated the region, so we need to delete old overlap rows
254 # and insert new ones.
255 # (mypy should be able to tell that inserted_or_updated
256 # must be a dict if we get to this clause, but it can't)
257 self._skyPixOverlap.insert([record], replace=True)
258 # We updated something other than a region.
259 return inserted_or_updated
261 def digestTables(self) -> Iterable[sqlalchemy.schema.Table]:
262 # Docstring inherited from DimensionRecordStorage.digestTables.
263 result = [self._table]
264 if self._skyPixOverlap is not None:
265 result.extend(self._skyPixOverlap.digestTables())
266 return result
268 def connect(self, overlaps: DatabaseDimensionOverlapStorage) -> None:
269 # Docstring inherited from DatabaseDimensionRecordStorage.
270 self._otherOverlaps.append(overlaps)
273class _SkyPixOverlapStorage:
274 """A helper object for `TableDimensionRecordStorage` that manages its
275 materialized overlaps with skypix dimensions.
277 New instances should be constructed by calling `initialize`, not by calling
278 the constructor directly.
280 Parameters
281 ----------
282 db : `Database`
283 Interface to the underlying database engine and namespace.
284 element : `DatabaseDimensionElement`
285 Dimension element whose overlaps are to be managed.
286 summaryTable : `sqlalchemy.schema.Table`
287 Table that records which combinations of skypix dimensions and
288 governor dimension values have materialized overlap rows.
289 overlapTable : `sqlalchemy.schema.Table`
290 Table containing the actual materialized overlap rows.
291 governor : `GovernorDimensionRecordStorage`
292 Record storage backend for this element's governor dimension.
294 Notes
295 -----
296 This class (and most importantly, the tables it relies on) can in principle
297 manage overlaps between with any skypix dimension, but at present it is
298 only being used to manage relationships with the special ``commonSkyPix``
299 dimension, because that's all the query system uses. Eventually, we expect
300 to require users to explicitly materialize all relationships they will
301 want to use in queries.
303 Other possible future improvements include:
305 - allowing finer-grained skypix dimensions to provide overlap rows for
306 coarser ones, by dividing indices by powers of 4 (and possibly doing
307 ``SELECT DISTINCT`` in the subquery to remove duplicates);
309 - allowing finer-grained database elements (e.g. patch) to provide overlap
310 rows for coarser ones (e.g. tract), by ignoring irrelevant columns
311 (e.g. the patch IDs) in the subquery (again, possible with
312 ``SELECT DISTINCT``).
314 But there's no point to doing any of that until the query system can
315 figure out how best to ask for overlap rows when an exact match isn't
316 available.
317 """
318 def __init__(
319 self,
320 db: Database,
321 element: DatabaseDimensionElement,
322 summaryTable: sqlalchemy.schema.Table,
323 overlapTable: sqlalchemy.schema.Table,
324 governor: GovernorDimensionRecordStorage,
325 ):
326 self._db = db
327 self.element = element
328 assert element.spatial is not None
329 self._summaryTable = summaryTable
330 self._overlapTable = overlapTable
331 self._governor = governor
333 @classmethod
334 def initialize(
335 cls,
336 db: Database,
337 element: DatabaseDimensionElement, *,
338 context: Optional[StaticTablesContext],
339 governor: GovernorDimensionRecordStorage,
340 ) -> _SkyPixOverlapStorage:
341 """Construct a new instance, creating tables as needed.
343 Parameters
344 ----------
345 db : `Database`
346 Interface to the underlying database engine and namespace.
347 element : `DatabaseDimensionElement`
348 Dimension element whose overlaps are to be managed.
349 context : `StaticTablesContext`, optional
350 If provided, an object to use to create any new tables. If not
351 provided, ``db.ensureTableExists`` should be used instead.
352 governor : `GovernorDimensionRecordStorage`
353 Record storage backend for this element's governor dimension.
354 """
355 if context is not None: 355 ↛ 358line 355 didn't jump to line 358, because the condition on line 355 was never false
356 op = context.addTable
357 else:
358 op = db.ensureTableExists
359 summaryTable = op(
360 cls._SUMMARY_TABLE_NAME_SPEC.format(element=element),
361 cls._makeSummaryTableSpec(element),
362 )
363 overlapTable = op(
364 cls._OVERLAP_TABLE_NAME_SPEC.format(element=element),
365 cls._makeOverlapTableSpec(element),
366 )
367 return _SkyPixOverlapStorage(db, element, summaryTable=summaryTable, overlapTable=overlapTable,
368 governor=governor)
370 _SUMMARY_TABLE_NAME_SPEC = "{element.name}_skypix_overlap_summary"
372 @classmethod
373 def _makeSummaryTableSpec(cls, element: DatabaseDimensionElement) -> ddl.TableSpec:
374 """Create a specification for the table that records which combinations
375 of skypix dimension and governor value have materialized overlaps.
377 Parameters
378 ----------
379 element : `DatabaseDimensionElement`
380 Dimension element whose overlaps are to be managed.
382 Returns
383 -------
384 tableSpec : `ddl.TableSpec`
385 Table specification.
386 """
387 assert element.spatial is not None
388 tableSpec = ddl.TableSpec(
389 fields=[
390 ddl.FieldSpec(
391 name="skypix_system",
392 dtype=sqlalchemy.String,
393 length=16,
394 nullable=False,
395 primaryKey=True,
396 ),
397 ddl.FieldSpec(
398 name="skypix_level",
399 dtype=sqlalchemy.SmallInteger,
400 nullable=False,
401 primaryKey=True,
402 ),
403 ]
404 )
405 addDimensionForeignKey(tableSpec, element.spatial.governor, primaryKey=True)
406 return tableSpec
408 _OVERLAP_TABLE_NAME_SPEC = "{element.name}_skypix_overlap"
410 @classmethod
411 def _makeOverlapTableSpec(cls, element: DatabaseDimensionElement) -> ddl.TableSpec:
412 """Create a specification for the table that holds materialized
413 overlap rows.
415 Parameters
416 ----------
417 element : `DatabaseDimensionElement`
418 Dimension element whose overlaps are to be managed.
420 Returns
421 -------
422 tableSpec : `ddl.TableSpec`
423 Table specification.
424 """
425 assert element.spatial is not None
426 tableSpec = ddl.TableSpec(
427 fields=[
428 ddl.FieldSpec(
429 name="skypix_system",
430 dtype=sqlalchemy.String,
431 length=16,
432 nullable=False,
433 primaryKey=True,
434 ),
435 ddl.FieldSpec(
436 name="skypix_level",
437 dtype=sqlalchemy.SmallInteger,
438 nullable=False,
439 primaryKey=True,
440 ),
441 # (more columns added below)
442 ],
443 unique=set(),
444 indexes={
445 # This index has the same fields as the PK, in a different
446 # order, to facilitate queries that know skypix_index and want
447 # to find the other element.
448 ("skypix_system", "skypix_level", "skypix_index",) + tuple(element.graph.required.names),
449 },
450 foreignKeys=[
451 # Foreign key to summary table. This makes sure we don't
452 # materialize any overlaps without remembering that we've done
453 # so in the summary table, though it can't prevent the converse
454 # of adding a summary row without adding overlap row (either of
455 # those is a logic bug, of course, but we want to be defensive
456 # about those). Using ON DELETE CASCADE, it'd be very easy to
457 # implement "disabling" an overlap materialization, because we
458 # can just delete the summary row.
459 # Note that the governor dimension column is added below, in
460 # the call to addDimensionForeignKey.
461 ddl.ForeignKeySpec(
462 cls._SUMMARY_TABLE_NAME_SPEC.format(element=element),
463 source=("skypix_system", "skypix_level", element.spatial.governor.name),
464 target=("skypix_system", "skypix_level", element.spatial.governor.name),
465 onDelete="CASCADE",
466 ),
467 ],
468 )
469 # Add fields for the standard element this class manages overlaps for.
470 # This is guaranteed to add a column for the governor dimension,
471 # because that's a required dependency of element.
472 for dimension in element.required:
473 addDimensionForeignKey(tableSpec, dimension, primaryKey=True)
474 # Add field for the actual skypix index. We do this later because I
475 # think we care (at least a bit) about the order in which the primary
476 # key is defined, in that we want a non-summary column like this one
477 # to appear after the governor dimension column.
478 tableSpec.fields.add(
479 ddl.FieldSpec(
480 name="skypix_index",
481 dtype=sqlalchemy.BigInteger,
482 nullable=False,
483 primaryKey=True,
484 )
485 )
486 return tableSpec
488 def enable(
489 self,
490 storage: TableDimensionRecordStorage,
491 skypix: SkyPixDimension,
492 governorValue: str,
493 ) -> None:
494 """Enable materialization of overlaps between a skypix dimension
495 and the records of ``self.element`` with a particular governor value.
497 Parameters
498 ----------
499 storage : `TableDimensionRecordStorage`
500 Storage object for the records of ``self.element``.
501 skypix : `SkyPixDimension`
502 The skypix dimension (system and level) for which overlaps should
503 be materialized.
504 governorValue : `str`
505 Value of this element's governor dimension for which overlaps
506 should be materialized. For example, if ``self.element`` is
507 ``visit``, this is an instrument name; if ``self.element`` is
508 ``patch``, this is a skymap name.
510 Notes
511 -----
512 If there are existing rows for the given ``governorValue``, overlap
513 rows for them will be immediately computed and inserted. At present,
514 that never happens, because we only enable overlaps with
515 `DimensionUniverse.commonSkyPix`, and that happens immediately after
516 each governor row is inserted (and there can't be any patch rows,
517 for example, until after the corresponding skymap row is inserted).
519 After calling `enable` for a particular combination, any new records
520 for ``self.element`` that are inserted will automatically be
521 accompanied by overlap records (via calls to `insert` made
522 by `TableDimensionRecordStorage` methods).
523 """
524 # Because we're essentially materializing a view in Python, we
525 # aggressively lock all tables we're reading and writing in order to be
526 # sure nothing gets out of sync. This may not be the most efficient
527 # approach possible, but we'll focus on correct before we focus on
528 # fast, and enabling a new overlap combination should be a very rare
529 # operation anyway, and never one we do in parallel.
530 with self._db.transaction(lock=[self._governor.table, storage._table,
531 self._summaryTable, self._overlapTable]):
532 result, inserted = self._db.sync(
533 self._summaryTable,
534 keys={
535 "skypix_system": skypix.system.name,
536 "skypix_level": skypix.level,
537 self._governor.element.name: governorValue,
538 },
539 )
540 if inserted: 540 ↛ 550line 540 didn't jump to line 550, because the condition on line 540 was never false
541 _LOG.debug(
542 "Precomputing initial overlaps for %s vs %s for %s=%s",
543 skypix.name,
544 self.element.name,
545 self._governor.element.name,
546 governorValue
547 )
548 self._fill(storage=storage, skypix=skypix, governorValue=governorValue)
549 else:
550 _LOG.debug(
551 "Overlaps already precomputed for %s vs %s for %s=%s",
552 skypix.name,
553 self.element.name,
554 self._governor.element.name,
555 governorValue
556 )
558 def _fill(
559 self, *,
560 storage: TableDimensionRecordStorage,
561 skypix: SkyPixDimension,
562 governorValue: str,
563 ) -> None:
564 """Insert overlap records for a newly-enabled combination of skypix
565 dimension and governor value.
567 This method should only be called by `enable`.
569 Parameters
570 ----------
571 storage : `TableDimensionRecordStorage`
572 Storage object for the records of ``self.element``.
573 skypix : `SkyPixDimension`
574 The skypix dimension (system and level) for which overlaps should
575 be materialized.
576 governorValue : `str`
577 Value of this element's governor dimension for which overlaps
578 should be materialized. For example, if ``self.element`` is
579 ``visit``, this is an instrument name; if ``self.element`` is
580 ``patch``, this is a skymap name.
581 """
582 overlapRecords: List[dict] = []
583 # `DimensionRecordStorage.fetch` as defined by the ABC expects to be
584 # given iterables of data IDs that correspond to that element's graph
585 # (e.g. {instrument, visit, detector}), not just some subset of it
586 # (e.g. {instrument}). But we know the implementation of `fetch` for
587 # `TableDimensionRecordStorage will use this iterable to do exactly
588 # what we want.
589 governorDataId = DataCoordinate.standardize({self._governor.element.name: governorValue},
590 graph=self._governor.element.graph)
591 for record in storage.fetch(DataCoordinateIterable.fromScalar(governorDataId)): 591 ↛ 592line 591 didn't jump to line 592, because the loop on line 591 never started
592 if record.region is None:
593 continue
594 baseOverlapRecord = record.dataId.byName()
595 baseOverlapRecord["skypix_system"] = skypix.system.name
596 baseOverlapRecord["skypix_level"] = skypix.level
597 for begin, end in skypix.pixelization.envelope(record.region):
598 overlapRecords.extend(
599 dict(baseOverlapRecord, skypix_index=index) for index in range(begin, end)
600 )
601 _LOG.debug(
602 "Inserting %d initial overlap rows for %s vs %s for %s=%r",
603 len(overlapRecords),
604 skypix.name,
605 self.element.name,
606 self._governor.element.name,
607 governorValue,
608 )
609 self._db.insert(self._overlapTable, *overlapRecords)
611 def insert(self, records: Sequence[DimensionRecord], replace: bool = False) -> None:
612 """Insert overlaps for a sequence of ``self.element`` records that
613 have just been inserted.
615 This must be called by any method that inserts records for that
616 element (i.e. `TableDimensionRecordStorage.insert` and
617 `TableDimensionRecordStorage.sync`), within the same transaction.
619 Parameters
620 ----------
621 records : `Sequence` [ `DimensionRecord` ]
622 Records for ``self.element``. Records with `None` regions are
623 ignored.
624 replace : `bool`, optional
625 If `True` (`False` is default) one or more of the given records may
626 already exist and is being updated, so we need to delete any
627 existing overlap records first.
628 """
629 # Group records by family.governor value.
630 grouped: Dict[str, List[DimensionRecord]] = defaultdict(list)
631 for record in records:
632 grouped[getattr(record, self._governor.element.name)].append(record)
633 _LOG.debug(
634 "Precomputing new skypix overlaps for %s where %s in %s.",
635 self.element.name, self._governor.element.name, grouped.keys()
636 )
637 # Make sure the set of combinations to materialize does not change
638 # while we are materializing the ones we have, by locking the summary
639 # table. Because we aren't planning to write to the summary table,
640 # this could just be a SHARED lock instead of an EXCLUSIVE one, but
641 # there's no API for that right now.
642 with self._db.transaction(lock=[self._summaryTable]):
643 # Query for the skypix dimensions to be associated with each
644 # governor value.
645 gvCol = self._summaryTable.columns[self._governor.element.name]
646 sysCol = self._summaryTable.columns.skypix_system
647 lvlCol = self._summaryTable.columns.skypix_level
648 query = sqlalchemy.sql.select(
649 gvCol, sysCol, lvlCol,
650 ).select_from(
651 self._summaryTable
652 ).where(
653 gvCol.in_(list(grouped.keys()))
654 )
655 # Group results by governor value, then skypix system.
656 skypix: Dict[str, NamedKeyDict[SkyPixSystem, List[int]]] = {
657 gv: NamedKeyDict() for gv in grouped.keys()
658 }
659 for summaryRow in self._db.query(query).mappings():
660 system = self.element.universe.skypix[summaryRow[sysCol]]
661 skypix[summaryRow[gvCol]].setdefault(system, []).append(summaryRow[lvlCol])
662 if replace:
663 # Construct constraints for a DELETE query as a list of dicts.
664 # We include the skypix_system and skypix_level column values
665 # explicitly instead of just letting the query search for all
666 # of those related to the given records, because they are the
667 # first columns in the primary key, and hence searching with
668 # them will be way faster (and we don't want to add a new index
669 # just for this operation).
670 to_delete: List[Dict[str, Any]] = []
671 for gv, skypix_systems in skypix.items():
672 for system, skypix_levels in skypix_systems.items():
673 to_delete.extend(
674 {"skypix_system": system.name, "skypix_level": level, **record.dataId.byName()}
675 for record, level in itertools.product(grouped[gv], skypix_levels)
676 )
677 self._db.delete(
678 self._overlapTable,
679 ["skypix_system", "skypix_level"] + list(self.element.graph.required.names),
680 *to_delete,
681 )
682 overlapRecords: List[dict] = []
683 # Compute overlaps for one governor value at a time, but gather
684 # them all up for one insert.
685 for gv, group in grouped.items():
686 overlapRecords.extend(self._compute(group, skypix[gv], gv))
687 _LOG.debug(
688 "Inserting %d new skypix overlap rows for %s where %s in %s.",
689 len(overlapRecords), self.element.name, self._governor.element.name, grouped.keys()
690 )
691 self._db.insert(self._overlapTable, *overlapRecords)
693 def _compute(
694 self,
695 records: Sequence[DimensionRecord],
696 skypix: NamedKeyDict[SkyPixSystem, List[int]],
697 governorValue: str,
698 ) -> Iterator[dict]:
699 """Compute all overlap rows for a particular governor dimension value
700 and all of the skypix dimensions for which its overlaps are enabled.
702 This method should only be called by `insert`.
704 Parameters
705 ----------
706 records : `Sequence` [ `DimensionRecord` ]
707 Records for ``self.element``. Records with `None` regions are
708 ignored. All must have the governor value given.
709 skypix : `NamedKeyDict` [ `SkyPixSystem`, `list` [ `int` ] ]
710 Mapping containing all skypix systems and levels for which overlaps
711 should be computed, grouped by `SkyPixSystem`.
712 governorValue : `str`
713 Value of this element's governor dimension for which overlaps
714 should be computed. For example, if ``self.element`` is ``visit``,
715 this is an instrument name; if ``self.element`` is ``patch``, this
716 is a skymap name.
718 Yields
719 ------
720 row : `dict`
721 Dictionary representing an overlap row.
722 """
723 # Process input records one at time, computing all skypix indices for
724 # each.
725 for record in records:
726 if record.region is None:
727 continue
728 assert getattr(record, self._governor.element.name) == governorValue
729 for system, levels in skypix.items():
730 if not levels: 730 ↛ 731line 730 didn't jump to line 731, because the condition on line 730 was never true
731 continue
732 baseOverlapRecord = record.dataId.byName()
733 baseOverlapRecord["skypix_system"] = system.name
734 levels.sort(reverse=True)
735 # Start with the first level, which is the finest-grained one.
736 # Compute skypix envelope indices directly for that.
737 indices: Dict[int, Set[int]] = {levels[0]: set()}
738 for begin, end in system[levels[0]].pixelization.envelope(record.region):
739 indices[levels[0]].update(range(begin, end))
740 # Divide those indices by powers of 4 (and remove duplicates)
741 # work our way up to the last (coarsest) level.
742 for lastLevel, nextLevel in zip(levels[:-1], levels[1:]): 742 ↛ 743line 742 didn't jump to line 743, because the loop on line 742 never started
743 factor = 4**(lastLevel - nextLevel)
744 indices[nextLevel] = {index//factor for index in indices[lastLevel]}
745 for level in levels:
746 yield from (
747 {
748 "skypix_level": level,
749 "skypix_index": index,
750 **baseOverlapRecord, # type: ignore
751 } for index in indices[level]
752 )
754 def select(
755 self,
756 skypix: SkyPixDimension,
757 governorValues: Union[AbstractSet[str], EllipsisType],
758 ) -> sqlalchemy.sql.FromClause:
759 """Construct a subquery expression containing overlaps between the
760 given skypix dimension and governor values.
762 Parameters
763 ----------
764 skypix : `SkyPixDimension`
765 The skypix dimension (system and level) for which overlaps should
766 be materialized.
767 governorValues : `str`
768 Values of this element's governor dimension for which overlaps
769 should be returned. For example, if ``self.element`` is ``visit``,
770 this is a set of instrument names; if ``self.element`` is
771 ``patch``, this is a set of skymap names. If ``...`` all values
772 in the database are used (`GovernorDimensionRecordStorage.values`).
774 Returns
775 -------
776 subquery : `sqlalchemy.sql.FromClause`
777 A SELECT query with an alias, intended for use as a subquery, with
778 columns equal to ``self.element.required.names`` + ``skypix.name``.
779 """
780 if skypix != self.element.universe.commonSkyPix: 780 ↛ 785line 780 didn't jump to line 785
781 # We guarantee elsewhere that we always materialize all overlaps
782 # vs. commonSkyPix, but for everything else, we need to check that
783 # we have materialized this combination of governor values and
784 # skypix.
785 summaryWhere = [
786 self._summaryTable.columns.skypix_system == skypix.system.name,
787 self._summaryTable.columns.skypix_level == skypix.level,
788 ]
789 gvCol = self._summaryTable.columns[self._governor.element.name]
790 if governorValues is not Ellipsis:
791 summaryWhere.append(gvCol.in_(list(governorValues)))
792 summaryQuery = sqlalchemy.sql.select(
793 gvCol
794 ).select_from(
795 self._summaryTable
796 ).where(
797 sqlalchemy.sql.and_(*summaryWhere)
798 )
799 materializedGovernorValues = {row._mapping[gvCol] for row in self._db.query(summaryQuery)}
800 if governorValues is Ellipsis:
801 missingGovernorValues = self._governor.values - materializedGovernorValues
802 else:
803 missingGovernorValues = governorValues - materializedGovernorValues
804 if missingGovernorValues:
805 raise RuntimeError(
806 f"Query requires an overlap join between {skypix.name} and {self.element.name} "
807 f"(for {self._governor.element.name} in {missingGovernorValues}), but these "
808 f"have not been materialized."
809 )
810 columns = [self._overlapTable.columns.skypix_index.label(skypix.name)]
811 columns.extend(self._overlapTable.columns[name] for name in self.element.graph.required.names)
812 overlapWhere = [
813 self._overlapTable.columns.skypix_system == skypix.system.name,
814 self._overlapTable.columns.skypix_level == skypix.level,
815 ]
816 if governorValues is not Ellipsis: 816 ↛ 817line 816 didn't jump to line 817, because the condition on line 816 was never true
817 overlapWhere.append(
818 self._overlapTable.columns[self._governor.element.name].in_(list(governorValues))
819 )
820 overlapQuery = sqlalchemy.sql.select(
821 *columns
822 ).select_from(
823 self._overlapTable
824 ).where(
825 sqlalchemy.sql.and_(*overlapWhere)
826 )
827 return overlapQuery.alias(f"{self.element.name}_{skypix.name}_overlap")
829 def digestTables(self) -> Iterable[sqlalchemy.schema.Table]:
830 """Return tables used for schema digest.
832 Returns
833 -------
834 tables : `Iterable` [ `sqlalchemy.schema.Table` ]
835 Possibly empty set of tables for schema digest calculations.
836 """
837 return [self._summaryTable, self._overlapTable]