Coverage for python/lsst/daf/butler/registry/dimensions/table.py: 86%
236 statements
« prev ^ index » next coverage.py v6.4.4, created at 2022-09-22 02:04 -0700
« prev ^ index » next coverage.py v6.4.4, created at 2022-09-22 02:04 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ["TableDimensionRecordStorage"]
25import itertools
26import logging
27import warnings
28from collections import defaultdict
29from typing import AbstractSet, Any, Dict, Iterable, Iterator, List, Mapping, Optional, Sequence, Set, Union
31import sqlalchemy
32from lsst.utils.ellipsis import Ellipsis, EllipsisType
34from ...core import (
35 DatabaseDimensionElement,
36 DataCoordinate,
37 DataCoordinateIterable,
38 DimensionElement,
39 DimensionRecord,
40 GovernorDimension,
41 NamedKeyDict,
42 NamedKeyMapping,
43 NamedValueSet,
44 SimpleQuery,
45 SkyPixDimension,
46 SkyPixSystem,
47 TimespanDatabaseRepresentation,
48 addDimensionForeignKey,
49 ddl,
50)
51from ..interfaces import (
52 Database,
53 DatabaseDimensionOverlapStorage,
54 DatabaseDimensionRecordStorage,
55 GovernorDimensionRecordStorage,
56 StaticTablesContext,
57)
58from ..queries import QueryBuilder
60_LOG = logging.getLogger(__name__)
63MAX_FETCH_CHUNK = 1000
64"""Maximum number of data IDs we fetch records at a time.
66Barring something database-engine-specific, this sets the size of the actual
67SQL query, not just the number of result rows, because the only way to query
68for multiple data IDs in a single SELECT query via SQLAlchemy is to have an OR
69term in the WHERE clause for each one.
70"""
73class TableDimensionRecordStorage(DatabaseDimensionRecordStorage):
74 """A record storage implementation uses a regular database table.
76 Parameters
77 ----------
78 db : `Database`
79 Interface to the database engine and namespace that will hold these
80 dimension records.
81 element : `DatabaseDimensionElement`
82 The element whose records this storage will manage.
83 table : `sqlalchemy.schema.Table`
84 The logical table for the element.
85 skyPixOverlap : `_SkyPixOverlapStorage`, optional
86 Object that manages the tables that hold materialized spatial overlap
87 joins to skypix dimensions. Should be `None` if (and only if)
88 ``element.spatial is None``.
89 """
91 def __init__(
92 self,
93 db: Database,
94 element: DatabaseDimensionElement,
95 *,
96 table: sqlalchemy.schema.Table,
97 skyPixOverlap: Optional[_SkyPixOverlapStorage] = None,
98 ):
99 self._db = db
100 self._table = table
101 self._element = element
102 self._fetchColumns: Dict[str, sqlalchemy.sql.ColumnElement] = {
103 dimension.name: self._table.columns[name]
104 for dimension, name in zip(
105 self._element.dimensions, self._element.RecordClass.fields.dimensions.names
106 )
107 }
108 self._skyPixOverlap = skyPixOverlap
109 self._otherOverlaps: List[DatabaseDimensionOverlapStorage] = []
111 @classmethod
112 def initialize(
113 cls,
114 db: Database,
115 element: DatabaseDimensionElement,
116 *,
117 context: Optional[StaticTablesContext] = None,
118 config: Mapping[str, Any],
119 governors: NamedKeyMapping[GovernorDimension, GovernorDimensionRecordStorage],
120 ) -> DatabaseDimensionRecordStorage:
121 # Docstring inherited from DatabaseDimensionRecordStorage.
122 spec = element.RecordClass.fields.makeTableSpec(TimespanReprClass=db.getTimespanRepresentation())
123 if context is not None: 123 ↛ 126line 123 didn't jump to line 126, because the condition on line 123 was never false
124 table = context.addTable(element.name, spec)
125 else:
126 table = db.ensureTableExists(element.name, spec)
127 skyPixOverlap: Optional[_SkyPixOverlapStorage]
128 if element.spatial is not None:
129 governor = governors[element.spatial.governor]
130 skyPixOverlap = _SkyPixOverlapStorage.initialize(
131 db,
132 element,
133 context=context,
134 governor=governor,
135 )
136 result = cls(db, element, table=table, skyPixOverlap=skyPixOverlap)
138 # Whenever anyone inserts a new governor dimension value, we want
139 # to enable overlaps for that value between this element and
140 # commonSkyPix.
141 def callback(record: DimensionRecord) -> None:
142 skyPixOverlap.enable( # type: ignore
143 result,
144 element.universe.commonSkyPix,
145 getattr(record, element.spatial.governor.primaryKey.name), # type: ignore
146 )
148 governor.registerInsertionListener(callback)
149 return result
150 else:
151 return cls(db, element, table=table)
153 @property
154 def element(self) -> DatabaseDimensionElement:
155 # Docstring inherited from DimensionRecordStorage.element.
156 return self._element
158 def clearCaches(self) -> None:
159 # Docstring inherited from DimensionRecordStorage.clearCaches.
160 pass
162 def join(
163 self,
164 builder: QueryBuilder,
165 *,
166 regions: Optional[NamedKeyDict[DimensionElement, sqlalchemy.sql.ColumnElement]] = None,
167 timespans: Optional[NamedKeyDict[DimensionElement, TimespanDatabaseRepresentation]] = None,
168 ) -> None:
169 # Docstring inherited from DimensionRecordStorage.
170 if regions is not None:
171 dimensions = NamedValueSet(self.element.required)
172 dimensions.add(self.element.universe.commonSkyPix)
173 assert self._skyPixOverlap is not None
174 builder.joinTable(
175 self._skyPixOverlap.select(self.element.universe.commonSkyPix, Ellipsis),
176 dimensions,
177 )
178 regionsInTable = self._table.columns["region"]
179 regions[self.element] = regionsInTable
180 joinOn = builder.startJoin(
181 self._table, self.element.dimensions, self.element.RecordClass.fields.dimensions.names
182 )
183 if timespans is not None:
184 timespanInTable = self._db.getTimespanRepresentation().from_columns(self._table.columns)
185 for timespanInQuery in timespans.values(): 185 ↛ 186line 185 didn't jump to line 186, because the loop on line 185 never started
186 joinOn.append(timespanInQuery.overlaps(timespanInTable))
187 timespans[self.element] = timespanInTable
188 builder.finishJoin(self._table, joinOn)
189 return self._table
191 def fetch(self, dataIds: DataCoordinateIterable) -> Iterable[DimensionRecord]:
192 # Docstring inherited from DimensionRecordStorage.fetch.
193 RecordClass = self.element.RecordClass
194 query = SimpleQuery()
195 query.columns.extend(self._table.columns[name] for name in RecordClass.fields.standard.names)
196 if self.element.spatial is not None:
197 query.columns.append(self._table.columns["region"])
198 if self.element.temporal is not None:
199 TimespanReprClass = self._db.getTimespanRepresentation()
200 query.columns.extend(self._table.columns[name] for name in TimespanReprClass.getFieldNames())
201 query.join(self._table)
202 dataIds.constrain(query, lambda name: self._fetchColumns[name])
203 with warnings.catch_warnings():
204 # Some of our generated queries may contain cartesian joins, this
205 # is not a serious issue as it is properly constrained, so we want
206 # to suppress sqlalchemy warnings.
207 warnings.filterwarnings(
208 "ignore",
209 message="SELECT statement has a cartesian product",
210 category=sqlalchemy.exc.SAWarning,
211 )
212 for row in self._db.query(query.combine()):
213 values = row._asdict()
214 if self.element.temporal is not None:
215 values[TimespanDatabaseRepresentation.NAME] = TimespanReprClass.extract(values)
216 yield RecordClass(**values)
218 def insert(self, *records: DimensionRecord, replace: bool = False, skip_existing: bool = False) -> None:
219 # Docstring inherited from DimensionRecordStorage.insert.
220 elementRows = [record.toDict() for record in records]
221 if self.element.temporal is not None:
222 TimespanReprClass = self._db.getTimespanRepresentation()
223 for row in elementRows:
224 timespan = row.pop(TimespanDatabaseRepresentation.NAME)
225 TimespanReprClass.update(timespan, result=row)
226 with self._db.transaction():
227 if replace:
228 self._db.replace(self._table, *elementRows)
229 elif skip_existing:
230 self._db.ensure(self._table, *elementRows, primary_key_only=True)
231 else:
232 self._db.insert(self._table, *elementRows)
233 if self._skyPixOverlap is not None:
234 self._skyPixOverlap.insert(records, replace=replace, skip_existing=skip_existing)
236 def sync(self, record: DimensionRecord, update: bool = False) -> Union[bool, Dict[str, Any]]:
237 # Docstring inherited from DimensionRecordStorage.sync.
238 compared = record.toDict()
239 keys = {}
240 for name in record.fields.required.names:
241 keys[name] = compared.pop(name)
242 if self.element.temporal is not None:
243 TimespanReprClass = self._db.getTimespanRepresentation()
244 timespan = compared.pop(TimespanDatabaseRepresentation.NAME)
245 TimespanReprClass.update(timespan, result=compared)
246 with self._db.transaction():
247 _, inserted_or_updated = self._db.sync(
248 self._table,
249 keys=keys,
250 compared=compared,
251 update=update,
252 )
253 if inserted_or_updated and self._skyPixOverlap is not None:
254 if inserted_or_updated is True:
255 # Inserted a new row, so we just need to insert new overlap
256 # rows.
257 self._skyPixOverlap.insert([record])
258 elif "region" in inserted_or_updated: 258 ↛ 246line 258 didn't jump to line 246
259 # Updated the region, so we need to delete old overlap rows
260 # and insert new ones.
261 # (mypy should be able to tell that inserted_or_updated
262 # must be a dict if we get to this clause, but it can't)
263 self._skyPixOverlap.insert([record], replace=True)
264 # We updated something other than a region.
265 return inserted_or_updated
267 def digestTables(self) -> Iterable[sqlalchemy.schema.Table]:
268 # Docstring inherited from DimensionRecordStorage.digestTables.
269 result = [self._table]
270 if self._skyPixOverlap is not None:
271 result.extend(self._skyPixOverlap.digestTables())
272 return result
274 def connect(self, overlaps: DatabaseDimensionOverlapStorage) -> None:
275 # Docstring inherited from DatabaseDimensionRecordStorage.
276 self._otherOverlaps.append(overlaps)
279class _SkyPixOverlapStorage:
280 """A helper object for `TableDimensionRecordStorage` that manages its
281 materialized overlaps with skypix dimensions.
283 New instances should be constructed by calling `initialize`, not by calling
284 the constructor directly.
286 Parameters
287 ----------
288 db : `Database`
289 Interface to the underlying database engine and namespace.
290 element : `DatabaseDimensionElement`
291 Dimension element whose overlaps are to be managed.
292 summaryTable : `sqlalchemy.schema.Table`
293 Table that records which combinations of skypix dimensions and
294 governor dimension values have materialized overlap rows.
295 overlapTable : `sqlalchemy.schema.Table`
296 Table containing the actual materialized overlap rows.
297 governor : `GovernorDimensionRecordStorage`
298 Record storage backend for this element's governor dimension.
300 Notes
301 -----
302 This class (and most importantly, the tables it relies on) can in principle
303 manage overlaps between with any skypix dimension, but at present it is
304 only being used to manage relationships with the special ``commonSkyPix``
305 dimension, because that's all the query system uses. Eventually, we expect
306 to require users to explicitly materialize all relationships they will
307 want to use in queries.
309 Other possible future improvements include:
311 - allowing finer-grained skypix dimensions to provide overlap rows for
312 coarser ones, by dividing indices by powers of 4 (and possibly doing
313 ``SELECT DISTINCT`` in the subquery to remove duplicates);
315 - allowing finer-grained database elements (e.g. patch) to provide overlap
316 rows for coarser ones (e.g. tract), by ignoring irrelevant columns
317 (e.g. the patch IDs) in the subquery (again, possible with
318 ``SELECT DISTINCT``).
320 But there's no point to doing any of that until the query system can
321 figure out how best to ask for overlap rows when an exact match isn't
322 available.
323 """
325 def __init__(
326 self,
327 db: Database,
328 element: DatabaseDimensionElement,
329 summaryTable: sqlalchemy.schema.Table,
330 overlapTable: sqlalchemy.schema.Table,
331 governor: GovernorDimensionRecordStorage,
332 ):
333 self._db = db
334 self.element = element
335 assert element.spatial is not None
336 self._summaryTable = summaryTable
337 self._overlapTable = overlapTable
338 self._governor = governor
340 @classmethod
341 def initialize(
342 cls,
343 db: Database,
344 element: DatabaseDimensionElement,
345 *,
346 context: Optional[StaticTablesContext],
347 governor: GovernorDimensionRecordStorage,
348 ) -> _SkyPixOverlapStorage:
349 """Construct a new instance, creating tables as needed.
351 Parameters
352 ----------
353 db : `Database`
354 Interface to the underlying database engine and namespace.
355 element : `DatabaseDimensionElement`
356 Dimension element whose overlaps are to be managed.
357 context : `StaticTablesContext`, optional
358 If provided, an object to use to create any new tables. If not
359 provided, ``db.ensureTableExists`` should be used instead.
360 governor : `GovernorDimensionRecordStorage`
361 Record storage backend for this element's governor dimension.
362 """
363 if context is not None: 363 ↛ 366line 363 didn't jump to line 366, because the condition on line 363 was never false
364 op = context.addTable
365 else:
366 op = db.ensureTableExists
367 summaryTable = op(
368 cls._SUMMARY_TABLE_NAME_SPEC.format(element=element),
369 cls._makeSummaryTableSpec(element),
370 )
371 overlapTable = op(
372 cls._OVERLAP_TABLE_NAME_SPEC.format(element=element),
373 cls._makeOverlapTableSpec(element),
374 )
375 return _SkyPixOverlapStorage(
376 db, element, summaryTable=summaryTable, overlapTable=overlapTable, governor=governor
377 )
379 _SUMMARY_TABLE_NAME_SPEC = "{element.name}_skypix_overlap_summary"
381 @classmethod
382 def _makeSummaryTableSpec(cls, element: DatabaseDimensionElement) -> ddl.TableSpec:
383 """Create a specification for the table that records which combinations
384 of skypix dimension and governor value have materialized overlaps.
386 Parameters
387 ----------
388 element : `DatabaseDimensionElement`
389 Dimension element whose overlaps are to be managed.
391 Returns
392 -------
393 tableSpec : `ddl.TableSpec`
394 Table specification.
395 """
396 assert element.spatial is not None
397 tableSpec = ddl.TableSpec(
398 fields=[
399 ddl.FieldSpec(
400 name="skypix_system",
401 dtype=sqlalchemy.String,
402 length=16,
403 nullable=False,
404 primaryKey=True,
405 ),
406 ddl.FieldSpec(
407 name="skypix_level",
408 dtype=sqlalchemy.SmallInteger,
409 nullable=False,
410 primaryKey=True,
411 ),
412 ]
413 )
414 addDimensionForeignKey(tableSpec, element.spatial.governor, primaryKey=True)
415 return tableSpec
417 _OVERLAP_TABLE_NAME_SPEC = "{element.name}_skypix_overlap"
419 @classmethod
420 def _makeOverlapTableSpec(cls, element: DatabaseDimensionElement) -> ddl.TableSpec:
421 """Create a specification for the table that holds materialized
422 overlap rows.
424 Parameters
425 ----------
426 element : `DatabaseDimensionElement`
427 Dimension element whose overlaps are to be managed.
429 Returns
430 -------
431 tableSpec : `ddl.TableSpec`
432 Table specification.
433 """
434 assert element.spatial is not None
435 tableSpec = ddl.TableSpec(
436 fields=[
437 ddl.FieldSpec(
438 name="skypix_system",
439 dtype=sqlalchemy.String,
440 length=16,
441 nullable=False,
442 primaryKey=True,
443 ),
444 ddl.FieldSpec(
445 name="skypix_level",
446 dtype=sqlalchemy.SmallInteger,
447 nullable=False,
448 primaryKey=True,
449 ),
450 # (more columns added below)
451 ],
452 unique=set(),
453 indexes={
454 # This index has the same fields as the PK, in a different
455 # order, to facilitate queries that know skypix_index and want
456 # to find the other element.
457 (
458 "skypix_system",
459 "skypix_level",
460 "skypix_index",
461 )
462 + tuple(element.graph.required.names),
463 },
464 foreignKeys=[
465 # Foreign key to summary table. This makes sure we don't
466 # materialize any overlaps without remembering that we've done
467 # so in the summary table, though it can't prevent the converse
468 # of adding a summary row without adding overlap row (either of
469 # those is a logic bug, of course, but we want to be defensive
470 # about those). Using ON DELETE CASCADE, it'd be very easy to
471 # implement "disabling" an overlap materialization, because we
472 # can just delete the summary row.
473 # Note that the governor dimension column is added below, in
474 # the call to addDimensionForeignKey.
475 ddl.ForeignKeySpec(
476 cls._SUMMARY_TABLE_NAME_SPEC.format(element=element),
477 source=("skypix_system", "skypix_level", element.spatial.governor.name),
478 target=("skypix_system", "skypix_level", element.spatial.governor.name),
479 onDelete="CASCADE",
480 ),
481 ],
482 )
483 # Add fields for the standard element this class manages overlaps for.
484 # This is guaranteed to add a column for the governor dimension,
485 # because that's a required dependency of element.
486 for dimension in element.required:
487 addDimensionForeignKey(tableSpec, dimension, primaryKey=True)
488 # Add field for the actual skypix index. We do this later because I
489 # think we care (at least a bit) about the order in which the primary
490 # key is defined, in that we want a non-summary column like this one
491 # to appear after the governor dimension column.
492 tableSpec.fields.add(
493 ddl.FieldSpec(
494 name="skypix_index",
495 dtype=sqlalchemy.BigInteger,
496 nullable=False,
497 primaryKey=True,
498 )
499 )
500 return tableSpec
502 def enable(
503 self,
504 storage: TableDimensionRecordStorage,
505 skypix: SkyPixDimension,
506 governorValue: str,
507 ) -> None:
508 """Enable materialization of overlaps between a skypix dimension
509 and the records of ``self.element`` with a particular governor value.
511 Parameters
512 ----------
513 storage : `TableDimensionRecordStorage`
514 Storage object for the records of ``self.element``.
515 skypix : `SkyPixDimension`
516 The skypix dimension (system and level) for which overlaps should
517 be materialized.
518 governorValue : `str`
519 Value of this element's governor dimension for which overlaps
520 should be materialized. For example, if ``self.element`` is
521 ``visit``, this is an instrument name; if ``self.element`` is
522 ``patch``, this is a skymap name.
524 Notes
525 -----
526 If there are existing rows for the given ``governorValue``, overlap
527 rows for them will be immediately computed and inserted. At present,
528 that never happens, because we only enable overlaps with
529 `DimensionUniverse.commonSkyPix`, and that happens immediately after
530 each governor row is inserted (and there can't be any patch rows,
531 for example, until after the corresponding skymap row is inserted).
533 After calling `enable` for a particular combination, any new records
534 for ``self.element`` that are inserted will automatically be
535 accompanied by overlap records (via calls to `insert` made
536 by `TableDimensionRecordStorage` methods).
537 """
538 # Because we're essentially materializing a view in Python, we
539 # aggressively lock all tables we're reading and writing in order to be
540 # sure nothing gets out of sync. This may not be the most efficient
541 # approach possible, but we'll focus on correct before we focus on
542 # fast, and enabling a new overlap combination should be a very rare
543 # operation anyway, and never one we do in parallel.
544 with self._db.transaction(
545 lock=[self._governor.table, storage._table, self._summaryTable, self._overlapTable]
546 ):
547 result, inserted = self._db.sync(
548 self._summaryTable,
549 keys={
550 "skypix_system": skypix.system.name,
551 "skypix_level": skypix.level,
552 self._governor.element.name: governorValue,
553 },
554 )
555 if inserted:
556 _LOG.debug(
557 "Precomputing initial overlaps for %s vs %s for %s=%s",
558 skypix.name,
559 self.element.name,
560 self._governor.element.name,
561 governorValue,
562 )
563 self._fill(storage=storage, skypix=skypix, governorValue=governorValue)
564 else:
565 _LOG.debug(
566 "Overlaps already precomputed for %s vs %s for %s=%s",
567 skypix.name,
568 self.element.name,
569 self._governor.element.name,
570 governorValue,
571 )
573 def _fill(
574 self,
575 *,
576 storage: TableDimensionRecordStorage,
577 skypix: SkyPixDimension,
578 governorValue: str,
579 ) -> None:
580 """Insert overlap records for a newly-enabled combination of skypix
581 dimension and governor value.
583 This method should only be called by `enable`.
585 Parameters
586 ----------
587 storage : `TableDimensionRecordStorage`
588 Storage object for the records of ``self.element``.
589 skypix : `SkyPixDimension`
590 The skypix dimension (system and level) for which overlaps should
591 be materialized.
592 governorValue : `str`
593 Value of this element's governor dimension for which overlaps
594 should be materialized. For example, if ``self.element`` is
595 ``visit``, this is an instrument name; if ``self.element`` is
596 ``patch``, this is a skymap name.
597 """
598 overlapRecords: List[dict] = []
599 # `DimensionRecordStorage.fetch` as defined by the ABC expects to be
600 # given iterables of data IDs that correspond to that element's graph
601 # (e.g. {instrument, visit, detector}), not just some subset of it
602 # (e.g. {instrument}). But we know the implementation of `fetch` for
603 # `TableDimensionRecordStorage will use this iterable to do exactly
604 # what we want.
605 governorDataId = DataCoordinate.standardize(
606 {self._governor.element.name: governorValue}, graph=self._governor.element.graph
607 )
608 for record in storage.fetch(DataCoordinateIterable.fromScalar(governorDataId)): 608 ↛ 609line 608 didn't jump to line 609, because the loop on line 608 never started
609 if record.region is None:
610 continue
611 baseOverlapRecord = record.dataId.byName()
612 baseOverlapRecord["skypix_system"] = skypix.system.name
613 baseOverlapRecord["skypix_level"] = skypix.level
614 for begin, end in skypix.pixelization.envelope(record.region):
615 overlapRecords.extend(
616 dict(baseOverlapRecord, skypix_index=index) for index in range(begin, end)
617 )
618 _LOG.debug(
619 "Inserting %d initial overlap rows for %s vs %s for %s=%r",
620 len(overlapRecords),
621 skypix.name,
622 self.element.name,
623 self._governor.element.name,
624 governorValue,
625 )
626 self._db.insert(self._overlapTable, *overlapRecords)
628 def insert(
629 self, records: Sequence[DimensionRecord], replace: bool = False, skip_existing: bool = False
630 ) -> None:
631 """Insert overlaps for a sequence of ``self.element`` records that
632 have just been inserted.
634 This must be called by any method that inserts records for that
635 element (i.e. `TableDimensionRecordStorage.insert` and
636 `TableDimensionRecordStorage.sync`), within the same transaction.
638 Parameters
639 ----------
640 records : `Sequence` [ `DimensionRecord` ]
641 Records for ``self.element``. Records with `None` regions are
642 ignored.
643 replace : `bool`, optional
644 If `True` (`False` is default) one or more of the given records may
645 already exist and is being updated, so we need to delete any
646 existing overlap records first.
647 skip_existing : `bool`, optional
648 If `True` (`False` is default), skip insertion if a record with
649 the same primary key values already exists.
650 """
651 # Group records by family.governor value.
652 grouped: Dict[str, List[DimensionRecord]] = defaultdict(list)
653 for record in records:
654 grouped[getattr(record, self._governor.element.name)].append(record)
655 _LOG.debug(
656 "Precomputing new skypix overlaps for %s where %s in %s.",
657 self.element.name,
658 self._governor.element.name,
659 grouped.keys(),
660 )
661 # Make sure the set of combinations to materialize does not change
662 # while we are materializing the ones we have, by locking the summary
663 # table. Because we aren't planning to write to the summary table,
664 # this could just be a SHARED lock instead of an EXCLUSIVE one, but
665 # there's no API for that right now.
666 with self._db.transaction(lock=[self._summaryTable]):
667 # Query for the skypix dimensions to be associated with each
668 # governor value.
669 gvCol = self._summaryTable.columns[self._governor.element.name]
670 sysCol = self._summaryTable.columns.skypix_system
671 lvlCol = self._summaryTable.columns.skypix_level
672 query = (
673 sqlalchemy.sql.select(
674 gvCol,
675 sysCol,
676 lvlCol,
677 )
678 .select_from(self._summaryTable)
679 .where(gvCol.in_(list(grouped.keys())))
680 )
681 # Group results by governor value, then skypix system.
682 skypix: Dict[str, NamedKeyDict[SkyPixSystem, List[int]]] = {
683 gv: NamedKeyDict() for gv in grouped.keys()
684 }
685 for summaryRow in self._db.query(query).mappings():
686 system = self.element.universe.skypix[summaryRow[sysCol]]
687 skypix[summaryRow[gvCol]].setdefault(system, []).append(summaryRow[lvlCol])
688 if replace:
689 # Construct constraints for a DELETE query as a list of dicts.
690 # We include the skypix_system and skypix_level column values
691 # explicitly instead of just letting the query search for all
692 # of those related to the given records, because they are the
693 # first columns in the primary key, and hence searching with
694 # them will be way faster (and we don't want to add a new index
695 # just for this operation).
696 to_delete: List[Dict[str, Any]] = []
697 for gv, skypix_systems in skypix.items():
698 for system, skypix_levels in skypix_systems.items():
699 to_delete.extend(
700 {"skypix_system": system.name, "skypix_level": level, **record.dataId.byName()}
701 for record, level in itertools.product(grouped[gv], skypix_levels)
702 )
703 self._db.delete(
704 self._overlapTable,
705 ["skypix_system", "skypix_level"] + list(self.element.graph.required.names),
706 *to_delete,
707 )
708 overlapRecords: List[dict] = []
709 # Compute overlaps for one governor value at a time, but gather
710 # them all up for one insert.
711 for gv, group in grouped.items():
712 overlapRecords.extend(self._compute(group, skypix[gv], gv))
713 _LOG.debug(
714 "Inserting %d new skypix overlap rows for %s where %s in %s.",
715 len(overlapRecords),
716 self.element.name,
717 self._governor.element.name,
718 grouped.keys(),
719 )
720 if skip_existing:
721 self._db.ensure(self._overlapTable, *overlapRecords, primary_key_only=True)
722 else:
723 self._db.insert(self._overlapTable, *overlapRecords)
725 def _compute(
726 self,
727 records: Sequence[DimensionRecord],
728 skypix: NamedKeyDict[SkyPixSystem, List[int]],
729 governorValue: str,
730 ) -> Iterator[dict]:
731 """Compute all overlap rows for a particular governor dimension value
732 and all of the skypix dimensions for which its overlaps are enabled.
734 This method should only be called by `insert`.
736 Parameters
737 ----------
738 records : `Sequence` [ `DimensionRecord` ]
739 Records for ``self.element``. Records with `None` regions are
740 ignored. All must have the governor value given.
741 skypix : `NamedKeyDict` [ `SkyPixSystem`, `list` [ `int` ] ]
742 Mapping containing all skypix systems and levels for which overlaps
743 should be computed, grouped by `SkyPixSystem`.
744 governorValue : `str`
745 Value of this element's governor dimension for which overlaps
746 should be computed. For example, if ``self.element`` is ``visit``,
747 this is an instrument name; if ``self.element`` is ``patch``, this
748 is a skymap name.
750 Yields
751 ------
752 row : `dict`
753 Dictionary representing an overlap row.
754 """
755 # Process input records one at time, computing all skypix indices for
756 # each.
757 for record in records:
758 if record.region is None:
759 continue
760 assert getattr(record, self._governor.element.name) == governorValue
761 for system, levels in skypix.items():
762 if not levels: 762 ↛ 763line 762 didn't jump to line 763, because the condition on line 762 was never true
763 continue
764 baseOverlapRecord = record.dataId.byName()
765 baseOverlapRecord["skypix_system"] = system.name
766 levels.sort(reverse=True)
767 # Start with the first level, which is the finest-grained one.
768 # Compute skypix envelope indices directly for that.
769 indices: Dict[int, Set[int]] = {levels[0]: set()}
770 for begin, end in system[levels[0]].pixelization.envelope(record.region):
771 indices[levels[0]].update(range(begin, end))
772 # Divide those indices by powers of 4 (and remove duplicates)
773 # work our way up to the last (coarsest) level.
774 for lastLevel, nextLevel in zip(levels[:-1], levels[1:]): 774 ↛ 775line 774 didn't jump to line 775, because the loop on line 774 never started
775 factor = 4 ** (lastLevel - nextLevel)
776 indices[nextLevel] = {index // factor for index in indices[lastLevel]}
777 for level in levels:
778 yield from (
779 {
780 "skypix_level": level,
781 "skypix_index": index,
782 **baseOverlapRecord, # type: ignore
783 }
784 for index in indices[level]
785 )
787 def select(
788 self,
789 skypix: SkyPixDimension,
790 governorValues: Union[AbstractSet[str], EllipsisType],
791 ) -> sqlalchemy.sql.FromClause:
792 """Construct a subquery expression containing overlaps between the
793 given skypix dimension and governor values.
795 Parameters
796 ----------
797 skypix : `SkyPixDimension`
798 The skypix dimension (system and level) for which overlaps should
799 be materialized.
800 governorValues : `str`
801 Values of this element's governor dimension for which overlaps
802 should be returned. For example, if ``self.element`` is ``visit``,
803 this is a set of instrument names; if ``self.element`` is
804 ``patch``, this is a set of skymap names. If ``...`` all values
805 in the database are used (`GovernorDimensionRecordStorage.values`).
807 Returns
808 -------
809 subquery : `sqlalchemy.sql.FromClause`
810 A SELECT query with an alias, intended for use as a subquery, with
811 columns equal to ``self.element.required.names`` + ``skypix.name``.
812 """
813 if skypix != self.element.universe.commonSkyPix: 813 ↛ 818line 813 didn't jump to line 818
814 # We guarantee elsewhere that we always materialize all overlaps
815 # vs. commonSkyPix, but for everything else, we need to check that
816 # we have materialized this combination of governor values and
817 # skypix.
818 summaryWhere = [
819 self._summaryTable.columns.skypix_system == skypix.system.name,
820 self._summaryTable.columns.skypix_level == skypix.level,
821 ]
822 gvCol = self._summaryTable.columns[self._governor.element.name]
823 if governorValues is not Ellipsis:
824 summaryWhere.append(gvCol.in_(list(governorValues)))
825 summaryQuery = (
826 sqlalchemy.sql.select(gvCol)
827 .select_from(self._summaryTable)
828 .where(sqlalchemy.sql.and_(*summaryWhere))
829 )
830 materializedGovernorValues = {row._mapping[gvCol] for row in self._db.query(summaryQuery)}
831 if governorValues is Ellipsis:
832 missingGovernorValues = self._governor.values - materializedGovernorValues
833 else:
834 missingGovernorValues = governorValues - materializedGovernorValues
835 if missingGovernorValues:
836 raise RuntimeError(
837 f"Query requires an overlap join between {skypix.name} and {self.element.name} "
838 f"(for {self._governor.element.name} in {missingGovernorValues}), but these "
839 f"have not been materialized."
840 )
841 columns = [self._overlapTable.columns.skypix_index.label(skypix.name)]
842 columns.extend(self._overlapTable.columns[name] for name in self.element.graph.required.names)
843 overlapWhere = [
844 self._overlapTable.columns.skypix_system == skypix.system.name,
845 self._overlapTable.columns.skypix_level == skypix.level,
846 ]
847 if governorValues is not Ellipsis: 847 ↛ 848line 847 didn't jump to line 848, because the condition on line 847 was never true
848 overlapWhere.append(
849 self._overlapTable.columns[self._governor.element.name].in_(list(governorValues))
850 )
851 overlapQuery = (
852 sqlalchemy.sql.select(*columns)
853 .select_from(self._overlapTable)
854 .where(sqlalchemy.sql.and_(*overlapWhere))
855 )
856 return overlapQuery.alias(f"{self.element.name}_{skypix.name}_overlap")
858 def digestTables(self) -> Iterable[sqlalchemy.schema.Table]:
859 """Return tables used for schema digest.
861 Returns
862 -------
863 tables : `Iterable` [ `sqlalchemy.schema.Table` ]
864 Possibly empty set of tables for schema digest calculations.
865 """
866 return [self._summaryTable, self._overlapTable]