Coverage for python/lsst/daf/butler/registry/dimensions/table.py: 86%
236 statements
« prev ^ index » next coverage.py v6.4.2, created at 2022-07-15 02:34 -0700
« prev ^ index » next coverage.py v6.4.2, created at 2022-07-15 02:34 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ["TableDimensionRecordStorage"]
25import itertools
26import logging
27import warnings
28from collections import defaultdict
29from typing import AbstractSet, Any, Dict, Iterable, Iterator, List, Mapping, Optional, Sequence, Set, Union
31import sqlalchemy
33from ...core import (
34 DatabaseDimensionElement,
35 DataCoordinate,
36 DataCoordinateIterable,
37 DimensionElement,
38 DimensionRecord,
39 GovernorDimension,
40 NamedKeyDict,
41 NamedKeyMapping,
42 NamedValueSet,
43 SimpleQuery,
44 SkyPixDimension,
45 SkyPixSystem,
46 SpatialRegionDatabaseRepresentation,
47 TimespanDatabaseRepresentation,
48 addDimensionForeignKey,
49 ddl,
50)
51from ..interfaces import (
52 Database,
53 DatabaseDimensionOverlapStorage,
54 DatabaseDimensionRecordStorage,
55 GovernorDimensionRecordStorage,
56 StaticTablesContext,
57)
58from ..queries import QueryBuilder
59from ..wildcards import Ellipsis, EllipsisType
61_LOG = logging.getLogger(__name__)
64MAX_FETCH_CHUNK = 1000
65"""Maximum number of data IDs we fetch records at a time.
67Barring something database-engine-specific, this sets the size of the actual
68SQL query, not just the number of result rows, because the only way to query
69for multiple data IDs in a single SELECT query via SQLAlchemy is to have an OR
70term in the WHERE clause for each one.
71"""
74class TableDimensionRecordStorage(DatabaseDimensionRecordStorage):
75 """A record storage implementation uses a regular database table.
77 Parameters
78 ----------
79 db : `Database`
80 Interface to the database engine and namespace that will hold these
81 dimension records.
82 element : `DatabaseDimensionElement`
83 The element whose records this storage will manage.
84 table : `sqlalchemy.schema.Table`
85 The logical table for the element.
86 skyPixOverlap : `_SkyPixOverlapStorage`, optional
87 Object that manages the tables that hold materialized spatial overlap
88 joins to skypix dimensions. Should be `None` if (and only if)
89 ``element.spatial is None``.
90 """
92 def __init__(
93 self,
94 db: Database,
95 element: DatabaseDimensionElement,
96 *,
97 table: sqlalchemy.schema.Table,
98 skyPixOverlap: Optional[_SkyPixOverlapStorage] = None,
99 ):
100 self._db = db
101 self._table = table
102 self._element = element
103 self._fetchColumns: Dict[str, sqlalchemy.sql.ColumnElement] = {
104 dimension.name: self._table.columns[name]
105 for dimension, name in zip(
106 self._element.dimensions, self._element.RecordClass.fields.dimensions.names
107 )
108 }
109 self._skyPixOverlap = skyPixOverlap
110 self._otherOverlaps: List[DatabaseDimensionOverlapStorage] = []
112 @classmethod
113 def initialize(
114 cls,
115 db: Database,
116 element: DatabaseDimensionElement,
117 *,
118 context: Optional[StaticTablesContext] = None,
119 config: Mapping[str, Any],
120 governors: NamedKeyMapping[GovernorDimension, GovernorDimensionRecordStorage],
121 ) -> DatabaseDimensionRecordStorage:
122 # Docstring inherited from DatabaseDimensionRecordStorage.
123 spec = element.RecordClass.fields.makeTableSpec(
124 RegionReprClass=db.getSpatialRegionRepresentation(),
125 TimespanReprClass=db.getTimespanRepresentation(),
126 )
127 if context is not None: 127 ↛ 130line 127 didn't jump to line 130, because the condition on line 127 was never false
128 table = context.addTable(element.name, spec)
129 else:
130 table = db.ensureTableExists(element.name, spec)
131 skyPixOverlap: Optional[_SkyPixOverlapStorage]
132 if element.spatial is not None:
133 governor = governors[element.spatial.governor]
134 skyPixOverlap = _SkyPixOverlapStorage.initialize(
135 db,
136 element,
137 context=context,
138 governor=governor,
139 )
140 result = cls(db, element, table=table, skyPixOverlap=skyPixOverlap)
142 # Whenever anyone inserts a new governor dimension value, we want
143 # to enable overlaps for that value between this element and
144 # commonSkyPix.
145 def callback(record: DimensionRecord) -> None:
146 skyPixOverlap.enable( # type: ignore
147 result,
148 element.universe.commonSkyPix,
149 getattr(record, element.spatial.governor.primaryKey.name), # type: ignore
150 )
152 governor.registerInsertionListener(callback)
153 return result
154 else:
155 return cls(db, element, table=table)
157 @property
158 def element(self) -> DatabaseDimensionElement:
159 # Docstring inherited from DimensionRecordStorage.element.
160 return self._element
162 def clearCaches(self) -> None:
163 # Docstring inherited from DimensionRecordStorage.clearCaches.
164 pass
166 def join(
167 self,
168 builder: QueryBuilder,
169 *,
170 regions: Optional[NamedKeyDict[DimensionElement, SpatialRegionDatabaseRepresentation]] = None,
171 timespans: Optional[NamedKeyDict[DimensionElement, TimespanDatabaseRepresentation]] = None,
172 ) -> None:
173 # Docstring inherited from DimensionRecordStorage.
174 if regions is not None:
175 dimensions = NamedValueSet(self.element.required)
176 dimensions.add(self.element.universe.commonSkyPix)
177 assert self._skyPixOverlap is not None
178 builder.joinTable(
179 self._skyPixOverlap.select(self.element.universe.commonSkyPix, Ellipsis),
180 dimensions,
181 )
182 regionsInTable = self._db.getSpatialRegionRepresentation().fromSelectable(self._table)
183 regions[self.element] = regionsInTable
184 joinOn = builder.startJoin(
185 self._table, self.element.dimensions, self.element.RecordClass.fields.dimensions.names
186 )
187 if timespans is not None:
188 timespanInTable = self._db.getTimespanRepresentation().fromSelectable(self._table)
189 for timespanInQuery in timespans.values(): 189 ↛ 190line 189 didn't jump to line 190, because the loop on line 189 never started
190 joinOn.append(timespanInQuery.overlaps(timespanInTable))
191 timespans[self.element] = timespanInTable
192 builder.finishJoin(self._table, joinOn)
193 return self._table
195 def fetch(self, dataIds: DataCoordinateIterable) -> Iterable[DimensionRecord]:
196 # Docstring inherited from DimensionRecordStorage.fetch.
197 RecordClass = self.element.RecordClass
198 query = SimpleQuery()
199 query.columns.extend(self._table.columns[name] for name in RecordClass.fields.standard.names)
200 if self.element.spatial is not None:
201 query.columns.append(self._table.columns["region"])
202 if self.element.temporal is not None:
203 TimespanReprClass = self._db.getTimespanRepresentation()
204 query.columns.extend(self._table.columns[name] for name in TimespanReprClass.getFieldNames())
205 query.join(self._table)
206 dataIds.constrain(query, lambda name: self._fetchColumns[name])
207 with warnings.catch_warnings():
208 # Some of our generated queries may contain cartesian joins, this
209 # is not a serious issue as it is properly constrained, so we want
210 # to suppress sqlalchemy warnings.
211 warnings.filterwarnings(
212 "ignore",
213 message="SELECT statement has a cartesian product",
214 category=sqlalchemy.exc.SAWarning,
215 )
216 for row in self._db.query(query.combine()):
217 values = row._asdict()
218 if self.element.temporal is not None:
219 values[TimespanDatabaseRepresentation.NAME] = TimespanReprClass.extract(values)
220 yield RecordClass(**values)
222 def insert(self, *records: DimensionRecord, replace: bool = False, skip_existing: bool = False) -> None:
223 # Docstring inherited from DimensionRecordStorage.insert.
224 elementRows = [record.toDict() for record in records]
225 if self.element.temporal is not None:
226 TimespanReprClass = self._db.getTimespanRepresentation()
227 for row in elementRows:
228 timespan = row.pop(TimespanDatabaseRepresentation.NAME)
229 TimespanReprClass.update(timespan, result=row)
230 with self._db.transaction():
231 if replace:
232 self._db.replace(self._table, *elementRows)
233 elif skip_existing:
234 self._db.ensure(self._table, *elementRows, primary_key_only=True)
235 else:
236 self._db.insert(self._table, *elementRows)
237 if self._skyPixOverlap is not None:
238 self._skyPixOverlap.insert(records, replace=replace, skip_existing=skip_existing)
240 def sync(self, record: DimensionRecord, update: bool = False) -> Union[bool, Dict[str, Any]]:
241 # Docstring inherited from DimensionRecordStorage.sync.
242 compared = record.toDict()
243 keys = {}
244 for name in record.fields.required.names:
245 keys[name] = compared.pop(name)
246 if self.element.temporal is not None:
247 TimespanReprClass = self._db.getTimespanRepresentation()
248 timespan = compared.pop(TimespanDatabaseRepresentation.NAME)
249 TimespanReprClass.update(timespan, result=compared)
250 with self._db.transaction():
251 _, inserted_or_updated = self._db.sync(
252 self._table,
253 keys=keys,
254 compared=compared,
255 update=update,
256 )
257 if inserted_or_updated and self._skyPixOverlap is not None:
258 if inserted_or_updated is True:
259 # Inserted a new row, so we just need to insert new overlap
260 # rows.
261 self._skyPixOverlap.insert([record])
262 elif "region" in inserted_or_updated: 262 ↛ 250line 262 didn't jump to line 250
263 # Updated the region, so we need to delete old overlap rows
264 # and insert new ones.
265 # (mypy should be able to tell that inserted_or_updated
266 # must be a dict if we get to this clause, but it can't)
267 self._skyPixOverlap.insert([record], replace=True)
268 # We updated something other than a region.
269 return inserted_or_updated
271 def digestTables(self) -> Iterable[sqlalchemy.schema.Table]:
272 # Docstring inherited from DimensionRecordStorage.digestTables.
273 result = [self._table]
274 if self._skyPixOverlap is not None:
275 result.extend(self._skyPixOverlap.digestTables())
276 return result
278 def connect(self, overlaps: DatabaseDimensionOverlapStorage) -> None:
279 # Docstring inherited from DatabaseDimensionRecordStorage.
280 self._otherOverlaps.append(overlaps)
283class _SkyPixOverlapStorage:
284 """A helper object for `TableDimensionRecordStorage` that manages its
285 materialized overlaps with skypix dimensions.
287 New instances should be constructed by calling `initialize`, not by calling
288 the constructor directly.
290 Parameters
291 ----------
292 db : `Database`
293 Interface to the underlying database engine and namespace.
294 element : `DatabaseDimensionElement`
295 Dimension element whose overlaps are to be managed.
296 summaryTable : `sqlalchemy.schema.Table`
297 Table that records which combinations of skypix dimensions and
298 governor dimension values have materialized overlap rows.
299 overlapTable : `sqlalchemy.schema.Table`
300 Table containing the actual materialized overlap rows.
301 governor : `GovernorDimensionRecordStorage`
302 Record storage backend for this element's governor dimension.
304 Notes
305 -----
306 This class (and most importantly, the tables it relies on) can in principle
307 manage overlaps between with any skypix dimension, but at present it is
308 only being used to manage relationships with the special ``commonSkyPix``
309 dimension, because that's all the query system uses. Eventually, we expect
310 to require users to explicitly materialize all relationships they will
311 want to use in queries.
313 Other possible future improvements include:
315 - allowing finer-grained skypix dimensions to provide overlap rows for
316 coarser ones, by dividing indices by powers of 4 (and possibly doing
317 ``SELECT DISTINCT`` in the subquery to remove duplicates);
319 - allowing finer-grained database elements (e.g. patch) to provide overlap
320 rows for coarser ones (e.g. tract), by ignoring irrelevant columns
321 (e.g. the patch IDs) in the subquery (again, possible with
322 ``SELECT DISTINCT``).
324 But there's no point to doing any of that until the query system can
325 figure out how best to ask for overlap rows when an exact match isn't
326 available.
327 """
329 def __init__(
330 self,
331 db: Database,
332 element: DatabaseDimensionElement,
333 summaryTable: sqlalchemy.schema.Table,
334 overlapTable: sqlalchemy.schema.Table,
335 governor: GovernorDimensionRecordStorage,
336 ):
337 self._db = db
338 self.element = element
339 assert element.spatial is not None
340 self._summaryTable = summaryTable
341 self._overlapTable = overlapTable
342 self._governor = governor
344 @classmethod
345 def initialize(
346 cls,
347 db: Database,
348 element: DatabaseDimensionElement,
349 *,
350 context: Optional[StaticTablesContext],
351 governor: GovernorDimensionRecordStorage,
352 ) -> _SkyPixOverlapStorage:
353 """Construct a new instance, creating tables as needed.
355 Parameters
356 ----------
357 db : `Database`
358 Interface to the underlying database engine and namespace.
359 element : `DatabaseDimensionElement`
360 Dimension element whose overlaps are to be managed.
361 context : `StaticTablesContext`, optional
362 If provided, an object to use to create any new tables. If not
363 provided, ``db.ensureTableExists`` should be used instead.
364 governor : `GovernorDimensionRecordStorage`
365 Record storage backend for this element's governor dimension.
366 """
367 if context is not None: 367 ↛ 370line 367 didn't jump to line 370, because the condition on line 367 was never false
368 op = context.addTable
369 else:
370 op = db.ensureTableExists
371 summaryTable = op(
372 cls._SUMMARY_TABLE_NAME_SPEC.format(element=element),
373 cls._makeSummaryTableSpec(element),
374 )
375 overlapTable = op(
376 cls._OVERLAP_TABLE_NAME_SPEC.format(element=element),
377 cls._makeOverlapTableSpec(element),
378 )
379 return _SkyPixOverlapStorage(
380 db, element, summaryTable=summaryTable, overlapTable=overlapTable, governor=governor
381 )
383 _SUMMARY_TABLE_NAME_SPEC = "{element.name}_skypix_overlap_summary"
385 @classmethod
386 def _makeSummaryTableSpec(cls, element: DatabaseDimensionElement) -> ddl.TableSpec:
387 """Create a specification for the table that records which combinations
388 of skypix dimension and governor value have materialized overlaps.
390 Parameters
391 ----------
392 element : `DatabaseDimensionElement`
393 Dimension element whose overlaps are to be managed.
395 Returns
396 -------
397 tableSpec : `ddl.TableSpec`
398 Table specification.
399 """
400 assert element.spatial is not None
401 tableSpec = ddl.TableSpec(
402 fields=[
403 ddl.FieldSpec(
404 name="skypix_system",
405 dtype=sqlalchemy.String,
406 length=16,
407 nullable=False,
408 primaryKey=True,
409 ),
410 ddl.FieldSpec(
411 name="skypix_level",
412 dtype=sqlalchemy.SmallInteger,
413 nullable=False,
414 primaryKey=True,
415 ),
416 ]
417 )
418 addDimensionForeignKey(tableSpec, element.spatial.governor, primaryKey=True)
419 return tableSpec
421 _OVERLAP_TABLE_NAME_SPEC = "{element.name}_skypix_overlap"
423 @classmethod
424 def _makeOverlapTableSpec(cls, element: DatabaseDimensionElement) -> ddl.TableSpec:
425 """Create a specification for the table that holds materialized
426 overlap rows.
428 Parameters
429 ----------
430 element : `DatabaseDimensionElement`
431 Dimension element whose overlaps are to be managed.
433 Returns
434 -------
435 tableSpec : `ddl.TableSpec`
436 Table specification.
437 """
438 assert element.spatial is not None
439 tableSpec = ddl.TableSpec(
440 fields=[
441 ddl.FieldSpec(
442 name="skypix_system",
443 dtype=sqlalchemy.String,
444 length=16,
445 nullable=False,
446 primaryKey=True,
447 ),
448 ddl.FieldSpec(
449 name="skypix_level",
450 dtype=sqlalchemy.SmallInteger,
451 nullable=False,
452 primaryKey=True,
453 ),
454 # (more columns added below)
455 ],
456 unique=set(),
457 indexes={
458 # This index has the same fields as the PK, in a different
459 # order, to facilitate queries that know skypix_index and want
460 # to find the other element.
461 (
462 "skypix_system",
463 "skypix_level",
464 "skypix_index",
465 )
466 + tuple(element.graph.required.names),
467 },
468 foreignKeys=[
469 # Foreign key to summary table. This makes sure we don't
470 # materialize any overlaps without remembering that we've done
471 # so in the summary table, though it can't prevent the converse
472 # of adding a summary row without adding overlap row (either of
473 # those is a logic bug, of course, but we want to be defensive
474 # about those). Using ON DELETE CASCADE, it'd be very easy to
475 # implement "disabling" an overlap materialization, because we
476 # can just delete the summary row.
477 # Note that the governor dimension column is added below, in
478 # the call to addDimensionForeignKey.
479 ddl.ForeignKeySpec(
480 cls._SUMMARY_TABLE_NAME_SPEC.format(element=element),
481 source=("skypix_system", "skypix_level", element.spatial.governor.name),
482 target=("skypix_system", "skypix_level", element.spatial.governor.name),
483 onDelete="CASCADE",
484 ),
485 ],
486 )
487 # Add fields for the standard element this class manages overlaps for.
488 # This is guaranteed to add a column for the governor dimension,
489 # because that's a required dependency of element.
490 for dimension in element.required:
491 addDimensionForeignKey(tableSpec, dimension, primaryKey=True)
492 # Add field for the actual skypix index. We do this later because I
493 # think we care (at least a bit) about the order in which the primary
494 # key is defined, in that we want a non-summary column like this one
495 # to appear after the governor dimension column.
496 tableSpec.fields.add(
497 ddl.FieldSpec(
498 name="skypix_index",
499 dtype=sqlalchemy.BigInteger,
500 nullable=False,
501 primaryKey=True,
502 )
503 )
504 return tableSpec
506 def enable(
507 self,
508 storage: TableDimensionRecordStorage,
509 skypix: SkyPixDimension,
510 governorValue: str,
511 ) -> None:
512 """Enable materialization of overlaps between a skypix dimension
513 and the records of ``self.element`` with a particular governor value.
515 Parameters
516 ----------
517 storage : `TableDimensionRecordStorage`
518 Storage object for the records of ``self.element``.
519 skypix : `SkyPixDimension`
520 The skypix dimension (system and level) for which overlaps should
521 be materialized.
522 governorValue : `str`
523 Value of this element's governor dimension for which overlaps
524 should be materialized. For example, if ``self.element`` is
525 ``visit``, this is an instrument name; if ``self.element`` is
526 ``patch``, this is a skymap name.
528 Notes
529 -----
530 If there are existing rows for the given ``governorValue``, overlap
531 rows for them will be immediately computed and inserted. At present,
532 that never happens, because we only enable overlaps with
533 `DimensionUniverse.commonSkyPix`, and that happens immediately after
534 each governor row is inserted (and there can't be any patch rows,
535 for example, until after the corresponding skymap row is inserted).
537 After calling `enable` for a particular combination, any new records
538 for ``self.element`` that are inserted will automatically be
539 accompanied by overlap records (via calls to `insert` made
540 by `TableDimensionRecordStorage` methods).
541 """
542 # Because we're essentially materializing a view in Python, we
543 # aggressively lock all tables we're reading and writing in order to be
544 # sure nothing gets out of sync. This may not be the most efficient
545 # approach possible, but we'll focus on correct before we focus on
546 # fast, and enabling a new overlap combination should be a very rare
547 # operation anyway, and never one we do in parallel.
548 with self._db.transaction(
549 lock=[self._governor.table, storage._table, self._summaryTable, self._overlapTable]
550 ):
551 result, inserted = self._db.sync(
552 self._summaryTable,
553 keys={
554 "skypix_system": skypix.system.name,
555 "skypix_level": skypix.level,
556 self._governor.element.name: governorValue,
557 },
558 )
559 if inserted:
560 _LOG.debug(
561 "Precomputing initial overlaps for %s vs %s for %s=%s",
562 skypix.name,
563 self.element.name,
564 self._governor.element.name,
565 governorValue,
566 )
567 self._fill(storage=storage, skypix=skypix, governorValue=governorValue)
568 else:
569 _LOG.debug(
570 "Overlaps already precomputed for %s vs %s for %s=%s",
571 skypix.name,
572 self.element.name,
573 self._governor.element.name,
574 governorValue,
575 )
577 def _fill(
578 self,
579 *,
580 storage: TableDimensionRecordStorage,
581 skypix: SkyPixDimension,
582 governorValue: str,
583 ) -> None:
584 """Insert overlap records for a newly-enabled combination of skypix
585 dimension and governor value.
587 This method should only be called by `enable`.
589 Parameters
590 ----------
591 storage : `TableDimensionRecordStorage`
592 Storage object for the records of ``self.element``.
593 skypix : `SkyPixDimension`
594 The skypix dimension (system and level) for which overlaps should
595 be materialized.
596 governorValue : `str`
597 Value of this element's governor dimension for which overlaps
598 should be materialized. For example, if ``self.element`` is
599 ``visit``, this is an instrument name; if ``self.element`` is
600 ``patch``, this is a skymap name.
601 """
602 overlapRecords: List[dict] = []
603 # `DimensionRecordStorage.fetch` as defined by the ABC expects to be
604 # given iterables of data IDs that correspond to that element's graph
605 # (e.g. {instrument, visit, detector}), not just some subset of it
606 # (e.g. {instrument}). But we know the implementation of `fetch` for
607 # `TableDimensionRecordStorage will use this iterable to do exactly
608 # what we want.
609 governorDataId = DataCoordinate.standardize(
610 {self._governor.element.name: governorValue}, graph=self._governor.element.graph
611 )
612 for record in storage.fetch(DataCoordinateIterable.fromScalar(governorDataId)): 612 ↛ 613line 612 didn't jump to line 613, because the loop on line 612 never started
613 if record.region is None:
614 continue
615 baseOverlapRecord = record.dataId.byName()
616 baseOverlapRecord["skypix_system"] = skypix.system.name
617 baseOverlapRecord["skypix_level"] = skypix.level
618 for begin, end in skypix.pixelization.envelope(record.region):
619 overlapRecords.extend(
620 dict(baseOverlapRecord, skypix_index=index) for index in range(begin, end)
621 )
622 _LOG.debug(
623 "Inserting %d initial overlap rows for %s vs %s for %s=%r",
624 len(overlapRecords),
625 skypix.name,
626 self.element.name,
627 self._governor.element.name,
628 governorValue,
629 )
630 self._db.insert(self._overlapTable, *overlapRecords)
632 def insert(
633 self, records: Sequence[DimensionRecord], replace: bool = False, skip_existing: bool = False
634 ) -> None:
635 """Insert overlaps for a sequence of ``self.element`` records that
636 have just been inserted.
638 This must be called by any method that inserts records for that
639 element (i.e. `TableDimensionRecordStorage.insert` and
640 `TableDimensionRecordStorage.sync`), within the same transaction.
642 Parameters
643 ----------
644 records : `Sequence` [ `DimensionRecord` ]
645 Records for ``self.element``. Records with `None` regions are
646 ignored.
647 replace : `bool`, optional
648 If `True` (`False` is default) one or more of the given records may
649 already exist and is being updated, so we need to delete any
650 existing overlap records first.
651 skip_existing : `bool`, optional
652 If `True` (`False` is default), skip insertion if a record with
653 the same primary key values already exists.
654 """
655 # Group records by family.governor value.
656 grouped: Dict[str, List[DimensionRecord]] = defaultdict(list)
657 for record in records:
658 grouped[getattr(record, self._governor.element.name)].append(record)
659 _LOG.debug(
660 "Precomputing new skypix overlaps for %s where %s in %s.",
661 self.element.name,
662 self._governor.element.name,
663 grouped.keys(),
664 )
665 # Make sure the set of combinations to materialize does not change
666 # while we are materializing the ones we have, by locking the summary
667 # table. Because we aren't planning to write to the summary table,
668 # this could just be a SHARED lock instead of an EXCLUSIVE one, but
669 # there's no API for that right now.
670 with self._db.transaction(lock=[self._summaryTable]):
671 # Query for the skypix dimensions to be associated with each
672 # governor value.
673 gvCol = self._summaryTable.columns[self._governor.element.name]
674 sysCol = self._summaryTable.columns.skypix_system
675 lvlCol = self._summaryTable.columns.skypix_level
676 query = (
677 sqlalchemy.sql.select(
678 gvCol,
679 sysCol,
680 lvlCol,
681 )
682 .select_from(self._summaryTable)
683 .where(gvCol.in_(list(grouped.keys())))
684 )
685 # Group results by governor value, then skypix system.
686 skypix: Dict[str, NamedKeyDict[SkyPixSystem, List[int]]] = {
687 gv: NamedKeyDict() for gv in grouped.keys()
688 }
689 for summaryRow in self._db.query(query).mappings():
690 system = self.element.universe.skypix[summaryRow[sysCol]]
691 skypix[summaryRow[gvCol]].setdefault(system, []).append(summaryRow[lvlCol])
692 if replace:
693 # Construct constraints for a DELETE query as a list of dicts.
694 # We include the skypix_system and skypix_level column values
695 # explicitly instead of just letting the query search for all
696 # of those related to the given records, because they are the
697 # first columns in the primary key, and hence searching with
698 # them will be way faster (and we don't want to add a new index
699 # just for this operation).
700 to_delete: List[Dict[str, Any]] = []
701 for gv, skypix_systems in skypix.items():
702 for system, skypix_levels in skypix_systems.items():
703 to_delete.extend(
704 {"skypix_system": system.name, "skypix_level": level, **record.dataId.byName()}
705 for record, level in itertools.product(grouped[gv], skypix_levels)
706 )
707 self._db.delete(
708 self._overlapTable,
709 ["skypix_system", "skypix_level"] + list(self.element.graph.required.names),
710 *to_delete,
711 )
712 overlapRecords: List[dict] = []
713 # Compute overlaps for one governor value at a time, but gather
714 # them all up for one insert.
715 for gv, group in grouped.items():
716 overlapRecords.extend(self._compute(group, skypix[gv], gv))
717 _LOG.debug(
718 "Inserting %d new skypix overlap rows for %s where %s in %s.",
719 len(overlapRecords),
720 self.element.name,
721 self._governor.element.name,
722 grouped.keys(),
723 )
724 if skip_existing:
725 self._db.ensure(self._overlapTable, *overlapRecords, primary_key_only=True)
726 else:
727 self._db.insert(self._overlapTable, *overlapRecords)
729 def _compute(
730 self,
731 records: Sequence[DimensionRecord],
732 skypix: NamedKeyDict[SkyPixSystem, List[int]],
733 governorValue: str,
734 ) -> Iterator[dict]:
735 """Compute all overlap rows for a particular governor dimension value
736 and all of the skypix dimensions for which its overlaps are enabled.
738 This method should only be called by `insert`.
740 Parameters
741 ----------
742 records : `Sequence` [ `DimensionRecord` ]
743 Records for ``self.element``. Records with `None` regions are
744 ignored. All must have the governor value given.
745 skypix : `NamedKeyDict` [ `SkyPixSystem`, `list` [ `int` ] ]
746 Mapping containing all skypix systems and levels for which overlaps
747 should be computed, grouped by `SkyPixSystem`.
748 governorValue : `str`
749 Value of this element's governor dimension for which overlaps
750 should be computed. For example, if ``self.element`` is ``visit``,
751 this is an instrument name; if ``self.element`` is ``patch``, this
752 is a skymap name.
754 Yields
755 ------
756 row : `dict`
757 Dictionary representing an overlap row.
758 """
759 # Process input records one at time, computing all skypix indices for
760 # each.
761 for record in records:
762 if record.region is None:
763 continue
764 assert getattr(record, self._governor.element.name) == governorValue
765 for system, levels in skypix.items():
766 if not levels: 766 ↛ 767line 766 didn't jump to line 767, because the condition on line 766 was never true
767 continue
768 baseOverlapRecord = record.dataId.byName()
769 baseOverlapRecord["skypix_system"] = system.name
770 levels.sort(reverse=True)
771 # Start with the first level, which is the finest-grained one.
772 # Compute skypix envelope indices directly for that.
773 indices: Dict[int, Set[int]] = {levels[0]: set()}
774 for begin, end in system[levels[0]].pixelization.envelope(record.region):
775 indices[levels[0]].update(range(begin, end))
776 # Divide those indices by powers of 4 (and remove duplicates)
777 # work our way up to the last (coarsest) level.
778 for lastLevel, nextLevel in zip(levels[:-1], levels[1:]): 778 ↛ 779line 778 didn't jump to line 779, because the loop on line 778 never started
779 factor = 4 ** (lastLevel - nextLevel)
780 indices[nextLevel] = {index // factor for index in indices[lastLevel]}
781 for level in levels:
782 yield from (
783 {
784 "skypix_level": level,
785 "skypix_index": index,
786 **baseOverlapRecord, # type: ignore
787 }
788 for index in indices[level]
789 )
791 def select(
792 self,
793 skypix: SkyPixDimension,
794 governorValues: Union[AbstractSet[str], EllipsisType],
795 ) -> sqlalchemy.sql.FromClause:
796 """Construct a subquery expression containing overlaps between the
797 given skypix dimension and governor values.
799 Parameters
800 ----------
801 skypix : `SkyPixDimension`
802 The skypix dimension (system and level) for which overlaps should
803 be materialized.
804 governorValues : `str`
805 Values of this element's governor dimension for which overlaps
806 should be returned. For example, if ``self.element`` is ``visit``,
807 this is a set of instrument names; if ``self.element`` is
808 ``patch``, this is a set of skymap names. If ``...`` all values
809 in the database are used (`GovernorDimensionRecordStorage.values`).
811 Returns
812 -------
813 subquery : `sqlalchemy.sql.FromClause`
814 A SELECT query with an alias, intended for use as a subquery, with
815 columns equal to ``self.element.required.names`` + ``skypix.name``.
816 """
817 if skypix != self.element.universe.commonSkyPix: 817 ↛ 822line 817 didn't jump to line 822
818 # We guarantee elsewhere that we always materialize all overlaps
819 # vs. commonSkyPix, but for everything else, we need to check that
820 # we have materialized this combination of governor values and
821 # skypix.
822 summaryWhere = [
823 self._summaryTable.columns.skypix_system == skypix.system.name,
824 self._summaryTable.columns.skypix_level == skypix.level,
825 ]
826 gvCol = self._summaryTable.columns[self._governor.element.name]
827 if governorValues is not Ellipsis:
828 summaryWhere.append(gvCol.in_(list(governorValues)))
829 summaryQuery = (
830 sqlalchemy.sql.select(gvCol)
831 .select_from(self._summaryTable)
832 .where(sqlalchemy.sql.and_(*summaryWhere))
833 )
834 materializedGovernorValues = {row._mapping[gvCol] for row in self._db.query(summaryQuery)}
835 if governorValues is Ellipsis:
836 missingGovernorValues = self._governor.values - materializedGovernorValues
837 else:
838 missingGovernorValues = governorValues - materializedGovernorValues
839 if missingGovernorValues:
840 raise RuntimeError(
841 f"Query requires an overlap join between {skypix.name} and {self.element.name} "
842 f"(for {self._governor.element.name} in {missingGovernorValues}), but these "
843 f"have not been materialized."
844 )
845 columns = [self._overlapTable.columns.skypix_index.label(skypix.name)]
846 columns.extend(self._overlapTable.columns[name] for name in self.element.graph.required.names)
847 overlapWhere = [
848 self._overlapTable.columns.skypix_system == skypix.system.name,
849 self._overlapTable.columns.skypix_level == skypix.level,
850 ]
851 if governorValues is not Ellipsis: 851 ↛ 852line 851 didn't jump to line 852, because the condition on line 851 was never true
852 overlapWhere.append(
853 self._overlapTable.columns[self._governor.element.name].in_(list(governorValues))
854 )
855 overlapQuery = (
856 sqlalchemy.sql.select(*columns)
857 .select_from(self._overlapTable)
858 .where(sqlalchemy.sql.and_(*overlapWhere))
859 )
860 return overlapQuery.alias(f"{self.element.name}_{skypix.name}_overlap")
862 def digestTables(self) -> Iterable[sqlalchemy.schema.Table]:
863 """Return tables used for schema digest.
865 Returns
866 -------
867 tables : `Iterable` [ `sqlalchemy.schema.Table` ]
868 Possibly empty set of tables for schema digest calculations.
869 """
870 return [self._summaryTable, self._overlapTable]