Coverage for python/lsst/daf/butler/registry/dimensions/table.py : 85%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ["TableDimensionRecordStorage"]
25from collections import defaultdict
26import itertools
27import logging
28from typing import (
29 AbstractSet,
30 Any,
31 Dict,
32 Iterable,
33 Iterator,
34 List,
35 Mapping,
36 Optional,
37 Sequence,
38 Set,
39 Union,
40)
42import sqlalchemy
44from ...core import (
45 addDimensionForeignKey,
46 DatabaseDimensionElement,
47 DataCoordinate,
48 DataCoordinateIterable,
49 ddl,
50 DimensionElement,
51 DimensionRecord,
52 GovernorDimension,
53 NamedKeyDict,
54 NamedKeyMapping,
55 NamedValueSet,
56 SimpleQuery,
57 SkyPixDimension,
58 SkyPixSystem,
59 SpatialRegionDatabaseRepresentation,
60 TimespanDatabaseRepresentation,
61)
62from ..interfaces import (
63 Database,
64 DatabaseDimensionOverlapStorage,
65 DatabaseDimensionRecordStorage,
66 GovernorDimensionRecordStorage,
67 StaticTablesContext,
68)
69from ..queries import QueryBuilder
70from ..wildcards import Ellipsis, EllipsisType
73_LOG = logging.getLogger(__name__)
76MAX_FETCH_CHUNK = 1000
77"""Maximum number of data IDs we fetch records at a time.
79Barring something database-engine-specific, this sets the size of the actual
80SQL query, not just the number of result rows, because the only way to query
81for multiple data IDs in a single SELECT query via SQLAlchemy is to have an OR
82term in the WHERE clause for each one.
83"""
86class TableDimensionRecordStorage(DatabaseDimensionRecordStorage):
87 """A record storage implementation uses a regular database table.
89 Parameters
90 ----------
91 db : `Database`
92 Interface to the database engine and namespace that will hold these
93 dimension records.
94 element : `DatabaseDimensionElement`
95 The element whose records this storage will manage.
96 table : `sqlalchemy.schema.Table`
97 The logical table for the element.
98 skyPixOverlap : `_SkyPixOverlapStorage`, optional
99 Object that manages the tables that hold materialized spatial overlap
100 joins to skypix dimensions. Should be `None` if (and only if)
101 ``element.spatial is None``.
102 """
103 def __init__(self, db: Database, element: DatabaseDimensionElement, *, table: sqlalchemy.schema.Table,
104 skyPixOverlap: Optional[_SkyPixOverlapStorage] = None):
105 self._db = db
106 self._table = table
107 self._element = element
108 self._fetchColumns: Dict[str, sqlalchemy.sql.ColumnElement] = {
109 dimension.name: self._table.columns[name]
110 for dimension, name in zip(self._element.dimensions,
111 self._element.RecordClass.fields.dimensions.names)
112 }
113 self._skyPixOverlap = skyPixOverlap
114 self._otherOverlaps: List[DatabaseDimensionOverlapStorage] = []
116 @classmethod
117 def initialize(
118 cls,
119 db: Database,
120 element: DatabaseDimensionElement, *,
121 context: Optional[StaticTablesContext] = None,
122 config: Mapping[str, Any],
123 governors: NamedKeyMapping[GovernorDimension, GovernorDimensionRecordStorage],
124 ) -> DatabaseDimensionRecordStorage:
125 # Docstring inherited from DatabaseDimensionRecordStorage.
126 spec = element.RecordClass.fields.makeTableSpec(
127 RegionReprClass=db.getSpatialRegionRepresentation(),
128 TimespanReprClass=db.getTimespanRepresentation(),
129 )
130 if context is not None: 130 ↛ 133line 130 didn't jump to line 133, because the condition on line 130 was never false
131 table = context.addTable(element.name, spec)
132 else:
133 table = db.ensureTableExists(element.name, spec)
134 skyPixOverlap: Optional[_SkyPixOverlapStorage]
135 if element.spatial is not None:
136 governor = governors[element.spatial.governor]
137 skyPixOverlap = _SkyPixOverlapStorage.initialize(
138 db,
139 element,
140 context=context,
141 governor=governor,
142 )
143 result = cls(db, element, table=table, skyPixOverlap=skyPixOverlap)
145 # Whenever anyone inserts a new governor dimension value, we want
146 # to enable overlaps for that value between this element and
147 # commonSkyPix.
148 def callback(record: DimensionRecord) -> None:
149 skyPixOverlap.enable( # type: ignore
150 result,
151 element.universe.commonSkyPix,
152 getattr(record, element.spatial.governor.primaryKey.name), # type: ignore
153 )
155 governor.registerInsertionListener(callback)
156 return result
157 else:
158 return cls(db, element, table=table)
160 @property
161 def element(self) -> DatabaseDimensionElement:
162 # Docstring inherited from DimensionRecordStorage.element.
163 return self._element
165 def clearCaches(self) -> None:
166 # Docstring inherited from DimensionRecordStorage.clearCaches.
167 pass
169 def join(
170 self,
171 builder: QueryBuilder, *,
172 regions: Optional[NamedKeyDict[DimensionElement, SpatialRegionDatabaseRepresentation]] = None,
173 timespans: Optional[NamedKeyDict[DimensionElement, TimespanDatabaseRepresentation]] = None,
174 ) -> None:
175 # Docstring inherited from DimensionRecordStorage.
176 if regions is not None:
177 dimensions = NamedValueSet(self.element.required)
178 dimensions.add(self.element.universe.commonSkyPix)
179 assert self._skyPixOverlap is not None
180 builder.joinTable(
181 self._skyPixOverlap.select(self.element.universe.commonSkyPix, Ellipsis),
182 dimensions,
183 )
184 regionsInTable = self._db.getSpatialRegionRepresentation().fromSelectable(self._table)
185 regions[self.element] = regionsInTable
186 joinOn = builder.startJoin(self._table, self.element.dimensions,
187 self.element.RecordClass.fields.dimensions.names)
188 if timespans is not None:
189 timespanInTable = self._db.getTimespanRepresentation().fromSelectable(self._table)
190 for timespanInQuery in timespans.values(): 190 ↛ 191line 190 didn't jump to line 191, because the loop on line 190 never started
191 joinOn.append(timespanInQuery.overlaps(timespanInTable))
192 timespans[self.element] = timespanInTable
193 builder.finishJoin(self._table, joinOn)
194 return self._table
196 def fetch(self, dataIds: DataCoordinateIterable) -> Iterable[DimensionRecord]:
197 # Docstring inherited from DimensionRecordStorage.fetch.
198 RecordClass = self.element.RecordClass
199 query = SimpleQuery()
200 query.columns.extend(self._table.columns[name] for name in RecordClass.fields.standard.names)
201 if self.element.spatial is not None:
202 query.columns.append(self._table.columns["region"])
203 if self.element.temporal is not None:
204 TimespanReprClass = self._db.getTimespanRepresentation()
205 query.columns.extend(self._table.columns[name] for name in TimespanReprClass.getFieldNames())
206 query.join(self._table)
207 dataIds.constrain(query, lambda name: self._fetchColumns[name])
208 for row in self._db.query(query.combine()):
209 values = row._asdict()
210 if self.element.temporal is not None:
211 values[TimespanDatabaseRepresentation.NAME] = TimespanReprClass.extract(values)
212 yield RecordClass(**values)
214 def insert(self, *records: DimensionRecord, replace: bool = False) -> None:
215 # Docstring inherited from DimensionRecordStorage.insert.
216 elementRows = [record.toDict() for record in records]
217 if self.element.temporal is not None:
218 TimespanReprClass = self._db.getTimespanRepresentation()
219 for row in elementRows:
220 timespan = row.pop(TimespanDatabaseRepresentation.NAME)
221 TimespanReprClass.update(timespan, result=row)
222 with self._db.transaction():
223 if replace:
224 self._db.replace(self._table, *elementRows)
225 else:
226 self._db.insert(self._table, *elementRows)
227 if self._skyPixOverlap is not None:
228 self._skyPixOverlap.insert(records, replace=replace)
230 def sync(self, record: DimensionRecord, update: bool = False) -> Union[bool, Dict[str, Any]]:
231 # Docstring inherited from DimensionRecordStorage.sync.
232 compared = record.toDict()
233 keys = {}
234 for name in record.fields.required.names:
235 keys[name] = compared.pop(name)
236 if self.element.temporal is not None:
237 TimespanReprClass = self._db.getTimespanRepresentation()
238 timespan = compared.pop(TimespanDatabaseRepresentation.NAME)
239 TimespanReprClass.update(timespan, result=compared)
240 with self._db.transaction():
241 _, inserted_or_updated = self._db.sync(
242 self._table,
243 keys=keys,
244 compared=compared,
245 update=update,
246 )
247 if inserted_or_updated and self._skyPixOverlap is not None:
248 if inserted_or_updated is True:
249 # Inserted a new row, so we just need to insert new overlap
250 # rows.
251 self._skyPixOverlap.insert([record])
252 elif "region" in inserted_or_updated: # type: ignore 252 ↛ 259line 252 didn't jump to line 259, because the condition on line 252 was never false
253 # Updated the region, so we need to delete old overlap rows
254 # and insert new ones.
255 # (mypy should be able to tell that inserted_or_updated
256 # must be a dict if we get to this clause, but it can't)
257 self._skyPixOverlap.insert([record], replace=True)
258 # We updated something other than a region.
259 return inserted_or_updated
261 def digestTables(self) -> Iterable[sqlalchemy.schema.Table]:
262 # Docstring inherited from DimensionRecordStorage.digestTables.
263 result = [self._table]
264 if self._skyPixOverlap is not None:
265 result.extend(self._skyPixOverlap.digestTables())
266 return result
268 def connect(self, overlaps: DatabaseDimensionOverlapStorage) -> None:
269 # Docstring inherited from DatabaseDimensionRecordStorage.
270 self._otherOverlaps.append(overlaps)
273class _SkyPixOverlapStorage:
274 """A helper object for `TableDimensionRecordStorage` that manages its
275 materialized overlaps with skypix dimensions.
277 New instances should be constructed by calling `initialize`, not by calling
278 the constructor directly.
280 Parameters
281 ----------
282 db : `Database`
283 Interface to the underlying database engine and namespace.
284 element : `DatabaseDimensionElement`
285 Dimension element whose overlaps are to be managed.
286 summaryTable : `sqlalchemy.schema.Table`
287 Table that records which combinations of skypix dimensions and
288 governor dimension values have materialized overlap rows.
289 overlapTable : `sqlalchemy.schema.Table`
290 Table containing the actual materialized overlap rows.
291 governor : `GovernorDimensionRecordStorage`
292 Record storage backend for this element's governor dimension.
294 Notes
295 -----
296 This class (and most importantly, the tables it relies on) can in principle
297 manage overlaps between with any skypix dimension, but at present it is
298 only being used to manage relationships with the special ``commonSkyPix``
299 dimension, because that's all the query system uses. Eventually, we expect
300 to require users to explicitly materialize all relationships they will
301 want to use in queries.
303 Other possible future improvements include:
305 - allowing finer-grained skypix dimensions to provide overlap rows for
306 coarser ones, by dividing indices by powers of 4 (and possibly doing
307 ``SELECT DISTINCT`` in the subquery to remove duplicates);
309 - allowing finer-grained database elements (e.g. patch) to provide overlap
310 rows for coarser ones (e.g. tract), by ignoring irrelevant columns
311 (e.g. the patch IDs) in the subquery (again, possible with
312 ``SELECT DISTINCT``).
314 But there's no point to doing any of that until the query system can
315 figure out how best to ask for overlap rows when an exact match isn't
316 available.
317 """
318 def __init__(
319 self,
320 db: Database,
321 element: DatabaseDimensionElement,
322 summaryTable: sqlalchemy.schema.Table,
323 overlapTable: sqlalchemy.schema.Table,
324 governor: GovernorDimensionRecordStorage,
325 ):
326 self._db = db
327 self.element = element
328 assert element.spatial is not None
329 self._summaryTable = summaryTable
330 self._overlapTable = overlapTable
331 self._governor = governor
333 @classmethod
334 def initialize(
335 cls,
336 db: Database,
337 element: DatabaseDimensionElement, *,
338 context: Optional[StaticTablesContext],
339 governor: GovernorDimensionRecordStorage,
340 ) -> _SkyPixOverlapStorage:
341 """Construct a new instance, creating tables as needed.
343 Parameters
344 ----------
345 db : `Database`
346 Interface to the underlying database engine and namespace.
347 element : `DatabaseDimensionElement`
348 Dimension element whose overlaps are to be managed.
349 context : `StaticTablesContext`, optional
350 If provided, an object to use to create any new tables. If not
351 provided, ``db.ensureTableExists`` should be used instead.
352 governor : `GovernorDimensionRecordStorage`
353 Record storage backend for this element's governor dimension.
354 """
355 if context is not None: 355 ↛ 358line 355 didn't jump to line 358, because the condition on line 355 was never false
356 op = context.addTable
357 else:
358 op = db.ensureTableExists
359 summaryTable = op(
360 cls._SUMMARY_TABLE_NAME_SPEC.format(element=element),
361 cls._makeSummaryTableSpec(element),
362 )
363 overlapTable = op(
364 cls._OVERLAP_TABLE_NAME_SPEC.format(element=element),
365 cls._makeOverlapTableSpec(element),
366 )
367 return _SkyPixOverlapStorage(db, element, summaryTable=summaryTable, overlapTable=overlapTable,
368 governor=governor)
370 _SUMMARY_TABLE_NAME_SPEC = "{element.name}_skypix_overlap_summary"
372 @classmethod
373 def _makeSummaryTableSpec(cls, element: DatabaseDimensionElement) -> ddl.TableSpec:
374 """Create a specification for the table that records which combinations
375 of skypix dimension and governor value have materialized overlaps.
377 Parameters
378 ----------
379 element : `DatabaseDimensionElement`
380 Dimension element whose overlaps are to be managed.
382 Returns
383 -------
384 tableSpec : `ddl.TableSpec`
385 Table specification.
386 """
387 assert element.spatial is not None
388 tableSpec = ddl.TableSpec(
389 fields=[
390 ddl.FieldSpec(
391 name="skypix_system",
392 dtype=sqlalchemy.String,
393 length=16,
394 nullable=False,
395 primaryKey=True,
396 ),
397 ddl.FieldSpec(
398 name="skypix_level",
399 dtype=sqlalchemy.SmallInteger,
400 nullable=False,
401 primaryKey=True,
402 ),
403 ]
404 )
405 addDimensionForeignKey(tableSpec, element.spatial.governor, primaryKey=True)
406 return tableSpec
408 _OVERLAP_TABLE_NAME_SPEC = "{element.name}_skypix_overlap"
410 @classmethod
411 def _makeOverlapTableSpec(cls, element: DatabaseDimensionElement) -> ddl.TableSpec:
412 """Create a specification for the table that holds materialized
413 overlap rows.
415 Parameters
416 ----------
417 element : `DatabaseDimensionElement`
418 Dimension element whose overlaps are to be managed.
420 Returns
421 -------
422 tableSpec : `ddl.TableSpec`
423 Table specification.
424 """
425 assert element.spatial is not None
426 tableSpec = ddl.TableSpec(
427 fields=[
428 ddl.FieldSpec(
429 name="skypix_system",
430 dtype=sqlalchemy.String,
431 length=16,
432 nullable=False,
433 primaryKey=True,
434 ),
435 ddl.FieldSpec(
436 name="skypix_level",
437 dtype=sqlalchemy.SmallInteger,
438 nullable=False,
439 primaryKey=True,
440 ),
441 # (more columns added below)
442 ],
443 unique=set(),
444 foreignKeys=[
445 # Foreign key to summary table. This makes sure we don't
446 # materialize any overlaps without remembering that we've done
447 # so in the summary table, though it can't prevent the converse
448 # of adding a summary row without adding overlap row (either of
449 # those is a logic bug, of course, but we want to be defensive
450 # about those). Using ON DELETE CASCADE, it'd be very easy to
451 # implement "disabling" an overlap materialization, because we
452 # can just delete the summary row.
453 # Note that the governor dimension column is added below, in
454 # the call to addDimensionForeignKey.
455 ddl.ForeignKeySpec(
456 cls._SUMMARY_TABLE_NAME_SPEC.format(element=element),
457 source=("skypix_system", "skypix_level", element.spatial.governor.name),
458 target=("skypix_system", "skypix_level", element.spatial.governor.name),
459 onDelete="CASCADE",
460 ),
461 ],
462 )
463 # Add fields for the standard element this class manages overlaps for.
464 # This is guaranteed to add a column for the governor dimension,
465 # because that's a required dependency of element.
466 for dimension in element.required:
467 addDimensionForeignKey(tableSpec, dimension, primaryKey=True)
468 # Add field for the actual skypix index. We do this later because I
469 # think we care (at least a bit) about the order in which the primary
470 # key is defined, in that we want a non-summary column like this one
471 # to appear after the governor dimension column.
472 tableSpec.fields.add(
473 ddl.FieldSpec(
474 name="skypix_index",
475 dtype=sqlalchemy.BigInteger,
476 nullable=False,
477 primaryKey=True,
478 )
479 )
480 return tableSpec
482 def enable(
483 self,
484 storage: TableDimensionRecordStorage,
485 skypix: SkyPixDimension,
486 governorValue: str,
487 ) -> None:
488 """Enable materialization of overlaps between a skypix dimension
489 and the records of ``self.element`` with a particular governor value.
491 Parameters
492 ----------
493 storage : `TableDimensionRecordStorage`
494 Storage object for the records of ``self.element``.
495 skypix : `SkyPixDimension`
496 The skypix dimension (system and level) for which overlaps should
497 be materialized.
498 governorValue : `str`
499 Value of this element's governor dimension for which overlaps
500 should be materialized. For example, if ``self.element`` is
501 ``visit``, this is an instrument name; if ``self.element`` is
502 ``patch``, this is a skymap name.
504 Notes
505 -----
506 If there are existing rows for the given ``governorValue``, overlap
507 rows for them will be immediately computed and inserted. At present,
508 that never happens, because we only enable overlaps with
509 `DimensionUniverse.commonSkyPix`, and that happens immediately after
510 each governor row is inserted (and there can't be any patch rows,
511 for example, until after the corresponding skymap row is inserted).
513 After calling `enable` for a particular combination, any new records
514 for ``self.element`` that are inserted will automatically be
515 accompanied by overlap records (via calls to `insert` made
516 by `TableDimensionRecordStorage` methods).
517 """
518 # Because we're essentially materializing a view in Python, we
519 # aggressively lock all tables we're reading and writing in order to be
520 # sure nothing gets out of sync. This may not be the most efficient
521 # approach possible, but we'll focus on correct before we focus on
522 # fast, and enabling a new overlap combination should be a very rare
523 # operation anyway, and never one we do in parallel.
524 with self._db.transaction(lock=[self._governor.table, storage._table,
525 self._summaryTable, self._overlapTable]):
526 result, inserted = self._db.sync(
527 self._summaryTable,
528 keys={
529 "skypix_system": skypix.system.name,
530 "skypix_level": skypix.level,
531 self._governor.element.name: governorValue,
532 },
533 )
534 if inserted: 534 ↛ 544line 534 didn't jump to line 544, because the condition on line 534 was never false
535 _LOG.debug(
536 "Precomputing initial overlaps for %s vs %s for %s=%s",
537 skypix.name,
538 self.element.name,
539 self._governor.element.name,
540 governorValue
541 )
542 self._fill(storage=storage, skypix=skypix, governorValue=governorValue)
543 else:
544 _LOG.debug(
545 "Overlaps already precomputed for %s vs %s for %s=%s",
546 skypix.name,
547 self.element.name,
548 self._governor.element.name,
549 governorValue
550 )
552 def _fill(
553 self, *,
554 storage: TableDimensionRecordStorage,
555 skypix: SkyPixDimension,
556 governorValue: str,
557 ) -> None:
558 """Insert overlap records for a newly-enabled combination of skypix
559 dimension and governor value.
561 This method should only be called by `enable`.
563 Parameters
564 ----------
565 storage : `TableDimensionRecordStorage`
566 Storage object for the records of ``self.element``.
567 skypix : `SkyPixDimension`
568 The skypix dimension (system and level) for which overlaps should
569 be materialized.
570 governorValue : `str`
571 Value of this element's governor dimension for which overlaps
572 should be materialized. For example, if ``self.element`` is
573 ``visit``, this is an instrument name; if ``self.element`` is
574 ``patch``, this is a skymap name.
575 """
576 overlapRecords: List[dict] = []
577 # `DimensionRecordStorage.fetch` as defined by the ABC expects to be
578 # given iterables of data IDs that correspond to that element's graph
579 # (e.g. {instrument, visit, detector}), not just some subset of it
580 # (e.g. {instrument}). But we know the implementation of `fetch` for
581 # `TableDimensionRecordStorage will use this iterable to do exactly
582 # what we want.
583 governorDataId = DataCoordinate.standardize({self._governor.element.name: governorValue},
584 graph=self._governor.element.graph)
585 for record in storage.fetch(DataCoordinateIterable.fromScalar(governorDataId)): 585 ↛ 586line 585 didn't jump to line 586, because the loop on line 585 never started
586 if record.region is None:
587 continue
588 baseOverlapRecord = record.dataId.byName()
589 baseOverlapRecord["skypix_system"] = skypix.system.name
590 baseOverlapRecord["skypix_level"] = skypix.level
591 for begin, end in skypix.pixelization.envelope(record.region):
592 overlapRecords.extend(
593 dict(baseOverlapRecord, skypix_index=index) for index in range(begin, end)
594 )
595 _LOG.debug(
596 "Inserting %d initial overlap rows for %s vs %s for %s=%r",
597 len(overlapRecords),
598 skypix.name,
599 self.element.name,
600 self._governor.element.name,
601 governorValue,
602 )
603 self._db.insert(self._overlapTable, *overlapRecords)
605 def insert(self, records: Sequence[DimensionRecord], replace: bool = False) -> None:
606 """Insert overlaps for a sequence of ``self.element`` records that
607 have just been inserted.
609 This must be called by any method that inserts records for that
610 element (i.e. `TableDimensionRecordStorage.insert` and
611 `TableDimensionRecordStorage.sync`), within the same transaction.
613 Parameters
614 ----------
615 records : `Sequence` [ `DimensionRecord` ]
616 Records for ``self.element``. Records with `None` regions are
617 ignored.
618 replace : `bool`, optional
619 If `True` (`False` is default) one or more of the given records may
620 already exist and is being updated, so we need to delete any
621 existing overlap records first.
622 """
623 # Group records by family.governor value.
624 grouped: Dict[str, List[DimensionRecord]] = defaultdict(list)
625 for record in records:
626 grouped[getattr(record, self._governor.element.name)].append(record)
627 _LOG.debug(
628 "Precomputing new skypix overlaps for %s where %s in %s.",
629 self.element.name, self._governor.element.name, grouped.keys()
630 )
631 # Make sure the set of combinations to materialize does not change
632 # while we are materializing the ones we have, by locking the summary
633 # table. Because we aren't planning to write to the summary table,
634 # this could just be a SHARED lock instead of an EXCLUSIVE one, but
635 # there's no API for that right now.
636 with self._db.transaction(lock=[self._summaryTable]):
637 # Query for the skypix dimensions to be associated with each
638 # governor value.
639 gvCol = self._summaryTable.columns[self._governor.element.name]
640 sysCol = self._summaryTable.columns.skypix_system
641 lvlCol = self._summaryTable.columns.skypix_level
642 query = sqlalchemy.sql.select(
643 gvCol, sysCol, lvlCol,
644 ).select_from(
645 self._summaryTable
646 ).where(
647 gvCol.in_(list(grouped.keys()))
648 )
649 # Group results by governor value, then skypix system.
650 skypix: Dict[str, NamedKeyDict[SkyPixSystem, List[int]]] = {
651 gv: NamedKeyDict() for gv in grouped.keys()
652 }
653 for summaryRow in self._db.query(query).mappings():
654 system = self.element.universe.skypix[summaryRow[sysCol]]
655 skypix[summaryRow[gvCol]].setdefault(system, []).append(summaryRow[lvlCol])
656 if replace:
657 # Construct constraints for a DELETE query as a list of dicts.
658 # We include the skypix_system and skypix_level column values
659 # explicitly instead of just letting the query search for all
660 # of those related to the given records, because they are the
661 # first columns in the primary key, and hence searching with
662 # them will be way faster (and we don't want to add a new index
663 # just for this operation).
664 to_delete: List[Dict[str, Any]] = []
665 for gv, skypix_systems in skypix.items():
666 for system, skypix_levels in skypix_systems.items():
667 to_delete.extend(
668 {"skypix_system": system.name, "skypix_level": level, **record.dataId.byName()}
669 for record, level in itertools.product(grouped[gv], skypix_levels)
670 )
671 self._db.delete(
672 self._overlapTable,
673 ["skypix_system", "skypix_level"] + list(self.element.graph.required.names),
674 *to_delete,
675 )
676 overlapRecords: List[dict] = []
677 # Compute overlaps for one governor value at a time, but gather
678 # them all up for one insert.
679 for gv, group in grouped.items():
680 overlapRecords.extend(self._compute(group, skypix[gv], gv))
681 _LOG.debug(
682 "Inserting %d new skypix overlap rows for %s where %s in %s.",
683 len(overlapRecords), self.element.name, self._governor.element.name, grouped.keys()
684 )
685 self._db.insert(self._overlapTable, *overlapRecords)
687 def _compute(
688 self,
689 records: Sequence[DimensionRecord],
690 skypix: NamedKeyDict[SkyPixSystem, List[int]],
691 governorValue: str,
692 ) -> Iterator[dict]:
693 """Compute all overlap rows for a particular governor dimension value
694 and all of the skypix dimensions for which its overlaps are enabled.
696 This method should only be called by `insert`.
698 Parameters
699 ----------
700 records : `Sequence` [ `DimensionRecord` ]
701 Records for ``self.element``. Records with `None` regions are
702 ignored. All must have the governor value given.
703 skypix : `NamedKeyDict` [ `SkyPixSystem`, `list` [ `int` ] ]
704 Mapping containing all skypix systems and levels for which overlaps
705 should be computed, grouped by `SkyPixSystem`.
706 governorValue : `str`
707 Value of this element's governor dimension for which overlaps
708 should be computed. For example, if ``self.element`` is ``visit``,
709 this is an instrument name; if ``self.element`` is ``patch``, this
710 is a skymap name.
712 Yields
713 ------
714 row : `dict`
715 Dictionary representing an overlap row.
716 """
717 # Process input records one at time, computing all skypix indices for
718 # each.
719 for record in records:
720 if record.region is None:
721 continue
722 assert getattr(record, self._governor.element.name) == governorValue
723 for system, levels in skypix.items():
724 if not levels: 724 ↛ 725line 724 didn't jump to line 725, because the condition on line 724 was never true
725 continue
726 baseOverlapRecord = record.dataId.byName()
727 baseOverlapRecord["skypix_system"] = system.name
728 levels.sort(reverse=True)
729 # Start with the first level, which is the finest-grained one.
730 # Compute skypix envelope indices directly for that.
731 indices: Dict[int, Set[int]] = {levels[0]: set()}
732 for begin, end in system[levels[0]].pixelization.envelope(record.region):
733 indices[levels[0]].update(range(begin, end))
734 # Divide those indices by powers of 4 (and remove duplicates)
735 # work our way up to the last (coarsest) level.
736 for lastLevel, nextLevel in zip(levels[:-1], levels[1:]): 736 ↛ 737line 736 didn't jump to line 737, because the loop on line 736 never started
737 factor = 4**(lastLevel - nextLevel)
738 indices[nextLevel] = {index//factor for index in indices[lastLevel]}
739 for level in levels:
740 yield from (
741 {
742 "skypix_level": level,
743 "skypix_index": index,
744 **baseOverlapRecord, # type: ignore
745 } for index in indices[level]
746 )
748 def select(
749 self,
750 skypix: SkyPixDimension,
751 governorValues: Union[AbstractSet[str], EllipsisType],
752 ) -> sqlalchemy.sql.FromClause:
753 """Construct a subquery expression containing overlaps between the
754 given skypix dimension and governor values.
756 Parameters
757 ----------
758 skypix : `SkyPixDimension`
759 The skypix dimension (system and level) for which overlaps should
760 be materialized.
761 governorValues : `str`
762 Values of this element's governor dimension for which overlaps
763 should be returned. For example, if ``self.element`` is ``visit``,
764 this is a set of instrument names; if ``self.element`` is
765 ``patch``, this is a set of skymap names. If ``...`` all values
766 in the database are used (`GovernorDimensionRecordStorage.values`).
768 Returns
769 -------
770 subquery : `sqlalchemy.sql.FromClause`
771 A SELECT query with an alias, intended for use as a subquery, with
772 columns equal to ``self.element.required.names`` + ``skypix.name``.
773 """
774 if skypix != self.element.universe.commonSkyPix: 774 ↛ 779line 774 didn't jump to line 779
775 # We guarantee elsewhere that we always materialize all overlaps
776 # vs. commonSkyPix, but for everything else, we need to check that
777 # we have materialized this combination of governor values and
778 # skypix.
779 summaryWhere = [
780 self._summaryTable.columns.skypix_system == skypix.system.name,
781 self._summaryTable.columns.skypix_level == skypix.level,
782 ]
783 gvCol = self._summaryTable.columns[self._governor.element.name]
784 if governorValues is not Ellipsis:
785 summaryWhere.append(gvCol.in_(list(governorValues)))
786 summaryQuery = sqlalchemy.sql.select(
787 gvCol
788 ).select_from(
789 self._summaryTable
790 ).where(
791 sqlalchemy.sql.and_(*summaryWhere)
792 )
793 materializedGovernorValues = {row._mapping[gvCol] for row in self._db.query(summaryQuery)}
794 if governorValues is Ellipsis:
795 missingGovernorValues = self._governor.values - materializedGovernorValues
796 else:
797 missingGovernorValues = governorValues - materializedGovernorValues
798 if missingGovernorValues:
799 raise RuntimeError(
800 f"Query requires an overlap join between {skypix.name} and {self.element.name} "
801 f"(for {self._governor.element.name} in {missingGovernorValues}), but these "
802 f"have not been materialized."
803 )
804 columns = [self._overlapTable.columns.skypix_index.label(skypix.name)]
805 columns.extend(self._overlapTable.columns[name] for name in self.element.graph.required.names)
806 overlapWhere = [
807 self._overlapTable.columns.skypix_system == skypix.system.name,
808 self._overlapTable.columns.skypix_level == skypix.level,
809 ]
810 if governorValues is not Ellipsis: 810 ↛ 811line 810 didn't jump to line 811, because the condition on line 810 was never true
811 overlapWhere.append(
812 self._overlapTable.columns[self._governor.element.name].in_(list(governorValues))
813 )
814 overlapQuery = sqlalchemy.sql.select(
815 *columns
816 ).select_from(
817 self._overlapTable
818 ).where(
819 sqlalchemy.sql.and_(*overlapWhere)
820 )
821 return overlapQuery.alias(f"{self.element.name}_{skypix.name}_overlap")
823 def digestTables(self) -> Iterable[sqlalchemy.schema.Table]:
824 """Return tables used for schema digest.
826 Returns
827 -------
828 tables : `Iterable` [ `sqlalchemy.schema.Table` ]
829 Possibly empty set of tables for schema digest calculations.
830 """
831 return [self._summaryTable, self._overlapTable]