Coverage for python/lsst/daf/butler/registry/dimensions/table.py : 82%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ["TableDimensionRecordStorage"]
25from collections import defaultdict
26import logging
27from typing import (
28 AbstractSet,
29 Any,
30 Dict,
31 Iterable,
32 Iterator,
33 List,
34 Mapping,
35 Optional,
36 Sequence,
37 Set,
38 Union,
39)
41import sqlalchemy
43from ...core import (
44 addDimensionForeignKey,
45 DatabaseDimensionElement,
46 DataCoordinate,
47 DataCoordinateIterable,
48 ddl,
49 DimensionElement,
50 DimensionRecord,
51 GovernorDimension,
52 NamedKeyDict,
53 NamedKeyMapping,
54 NamedValueSet,
55 SimpleQuery,
56 SkyPixDimension,
57 SkyPixSystem,
58 SpatialRegionDatabaseRepresentation,
59 TimespanDatabaseRepresentation,
60)
61from ..interfaces import (
62 Database,
63 DatabaseDimensionOverlapStorage,
64 DatabaseDimensionRecordStorage,
65 GovernorDimensionRecordStorage,
66 StaticTablesContext,
67)
68from ..queries import QueryBuilder
69from ..wildcards import Ellipsis, EllipsisType
72_LOG = logging.getLogger(__name__)
75MAX_FETCH_CHUNK = 1000
76"""Maximum number of data IDs we fetch records at a time.
78Barring something database-engine-specific, this sets the size of the actual
79SQL query, not just the number of result rows, because the only way to query
80for multiple data IDs in a single SELECT query via SQLAlchemy is to have an OR
81term in the WHERE clause for each one.
82"""
85class TableDimensionRecordStorage(DatabaseDimensionRecordStorage):
86 """A record storage implementation uses a regular database table.
88 Parameters
89 ----------
90 db : `Database`
91 Interface to the database engine and namespace that will hold these
92 dimension records.
93 element : `DatabaseDimensionElement`
94 The element whose records this storage will manage.
95 table : `sqlalchemy.schema.Table`
96 The logical table for the element.
97 skyPixOverlap : `_SkyPixOverlapStorage`, optional
98 Object that manages the tables that hold materialized spatial overlap
99 joins to skypix dimensions. Should be `None` if (and only if)
100 ``element.spatial is None``.
101 """
102 def __init__(self, db: Database, element: DatabaseDimensionElement, *, table: sqlalchemy.schema.Table,
103 skyPixOverlap: Optional[_SkyPixOverlapStorage] = None):
104 self._db = db
105 self._table = table
106 self._element = element
107 self._fetchColumns: Dict[str, sqlalchemy.sql.ColumnElement] = {
108 dimension.name: self._table.columns[name]
109 for dimension, name in zip(self._element.dimensions,
110 self._element.RecordClass.fields.dimensions.names)
111 }
112 self._skyPixOverlap = skyPixOverlap
113 self._otherOverlaps: List[DatabaseDimensionOverlapStorage] = []
115 @classmethod
116 def initialize(
117 cls,
118 db: Database,
119 element: DatabaseDimensionElement, *,
120 context: Optional[StaticTablesContext] = None,
121 config: Mapping[str, Any],
122 governors: NamedKeyMapping[GovernorDimension, GovernorDimensionRecordStorage],
123 ) -> DatabaseDimensionRecordStorage:
124 # Docstring inherited from DatabaseDimensionRecordStorage.
125 spec = element.RecordClass.fields.makeTableSpec(
126 RegionReprClass=db.getSpatialRegionRepresentation(),
127 TimespanReprClass=db.getTimespanRepresentation(),
128 )
129 if context is not None: 129 ↛ 132line 129 didn't jump to line 132, because the condition on line 129 was never false
130 table = context.addTable(element.name, spec)
131 else:
132 table = db.ensureTableExists(element.name, spec)
133 skyPixOverlap: Optional[_SkyPixOverlapStorage]
134 if element.spatial is not None:
135 governor = governors[element.spatial.governor]
136 skyPixOverlap = _SkyPixOverlapStorage.initialize(
137 db,
138 element,
139 context=context,
140 governor=governor,
141 )
142 result = cls(db, element, table=table, skyPixOverlap=skyPixOverlap)
144 # Whenever anyone inserts a new governor dimension value, we want
145 # to enable overlaps for that value between this element and
146 # commonSkyPix.
147 def callback(record: DimensionRecord) -> None:
148 skyPixOverlap.enable( # type: ignore
149 result,
150 element.universe.commonSkyPix,
151 getattr(record, element.spatial.governor.primaryKey.name), # type: ignore
152 )
154 governor.registerInsertionListener(callback)
155 return result
156 else:
157 return cls(db, element, table=table)
159 @property
160 def element(self) -> DatabaseDimensionElement:
161 # Docstring inherited from DimensionRecordStorage.element.
162 return self._element
164 def clearCaches(self) -> None:
165 # Docstring inherited from DimensionRecordStorage.clearCaches.
166 pass
168 def join(
169 self,
170 builder: QueryBuilder, *,
171 regions: Optional[NamedKeyDict[DimensionElement, SpatialRegionDatabaseRepresentation]] = None,
172 timespans: Optional[NamedKeyDict[DimensionElement, TimespanDatabaseRepresentation]] = None,
173 ) -> None:
174 # Docstring inherited from DimensionRecordStorage.
175 if regions is not None:
176 dimensions = NamedValueSet(self.element.required)
177 dimensions.add(self.element.universe.commonSkyPix)
178 assert self._skyPixOverlap is not None
179 builder.joinTable(
180 self._skyPixOverlap.select(self.element.universe.commonSkyPix, Ellipsis),
181 dimensions,
182 )
183 regionsInTable = self._db.getSpatialRegionRepresentation().fromSelectable(self._table)
184 regions[self.element] = regionsInTable
185 joinOn = builder.startJoin(self._table, self.element.dimensions,
186 self.element.RecordClass.fields.dimensions.names)
187 if timespans is not None:
188 timespanInTable = self._db.getTimespanRepresentation().fromSelectable(self._table)
189 for timespanInQuery in timespans.values(): 189 ↛ 190line 189 didn't jump to line 190, because the loop on line 189 never started
190 joinOn.append(timespanInQuery.overlaps(timespanInTable))
191 timespans[self.element] = timespanInTable
192 builder.finishJoin(self._table, joinOn)
193 return self._table
195 def fetch(self, dataIds: DataCoordinateIterable) -> Iterable[DimensionRecord]:
196 # Docstring inherited from DimensionRecordStorage.fetch.
197 RecordClass = self.element.RecordClass
198 query = SimpleQuery()
199 query.columns.extend(self._table.columns[name] for name in RecordClass.fields.standard.names)
200 if self.element.spatial is not None:
201 query.columns.append(self._table.columns["region"])
202 if self.element.temporal is not None:
203 TimespanReprClass = self._db.getTimespanRepresentation()
204 query.columns.extend(self._table.columns[name] for name in TimespanReprClass.getFieldNames())
205 query.join(self._table)
206 dataIds.constrain(query, lambda name: self._fetchColumns[name])
207 for row in self._db.query(query.combine()):
208 values = dict(row)
209 if self.element.temporal is not None:
210 values[TimespanDatabaseRepresentation.NAME] = TimespanReprClass.extract(values)
211 yield RecordClass(**values)
213 def insert(self, *records: DimensionRecord) -> None:
214 # Docstring inherited from DimensionRecordStorage.insert.
215 elementRows = [record.toDict() for record in records]
216 if self.element.temporal is not None:
217 TimespanReprClass = self._db.getTimespanRepresentation()
218 for row in elementRows:
219 timespan = row.pop(TimespanDatabaseRepresentation.NAME)
220 TimespanReprClass.update(timespan, result=row)
221 with self._db.transaction():
222 self._db.insert(self._table, *elementRows)
223 if self._skyPixOverlap is not None:
224 self._skyPixOverlap.insert(records)
226 def sync(self, record: DimensionRecord) -> bool:
227 # Docstring inherited from DimensionRecordStorage.sync.
228 compared = record.toDict()
229 keys = {}
230 for name in record.fields.required.names:
231 keys[name] = compared.pop(name)
232 if self.element.temporal is not None: 232 ↛ 233line 232 didn't jump to line 233, because the condition on line 232 was never true
233 TimespanReprClass = self._db.getTimespanRepresentation()
234 timespan = compared.pop(TimespanDatabaseRepresentation.NAME)
235 TimespanReprClass.update(timespan, result=compared)
236 with self._db.transaction():
237 _, inserted = self._db.sync(
238 self._table,
239 keys=keys,
240 compared=compared,
241 )
242 if inserted and self._skyPixOverlap is not None: 242 ↛ 243line 242 didn't jump to line 243, because the condition on line 242 was never true
243 self._skyPixOverlap.insert([record])
244 return inserted
246 def digestTables(self) -> Iterable[sqlalchemy.schema.Table]:
247 # Docstring inherited from DimensionRecordStorage.digestTables.
248 result = [self._table]
249 if self._skyPixOverlap is not None:
250 result.extend(self._skyPixOverlap.digestTables())
251 return result
253 def connect(self, overlaps: DatabaseDimensionOverlapStorage) -> None:
254 # Docstring inherited from DatabaseDimensionRecordStorage.
255 self._otherOverlaps.append(overlaps)
258class _SkyPixOverlapStorage:
259 """A helper object for `TableDimensionRecordStorage` that manages its
260 materialized overlaps with skypix dimensions.
262 New instances should be constructed by calling `initialize`, not by calling
263 the constructor directly.
265 Parameters
266 ----------
267 db : `Database`
268 Interface to the underlying database engine and namespace.
269 element : `DatabaseDimensionElement`
270 Dimension element whose overlaps are to be managed.
271 summaryTable : `sqlalchemy.schema.Table`
272 Table that records which combinations of skypix dimensions and
273 governor dimension values have materialized overlap rows.
274 overlapTable : `sqlalchemy.schema.Table`
275 Table containing the actual materialized overlap rows.
276 governor : `GovernorDimensionRecordStorage`
277 Record storage backend for this element's governor dimension.
279 Notes
280 -----
281 This class (and most importantly, the tables it relies on) can in principle
282 manage overlaps between with any skypix dimension, but at present it is
283 only being used to manage relationships with the special ``commonSkyPix``
284 dimension, because that's all the query system uses. Eventually, we expect
285 to require users to explicitly materialize all relationships they will
286 want to use in queries.
288 Other possible future improvements include:
290 - allowing finer-grained skypix dimensions to provide overlap rows for
291 coarser ones, by dividing indices by powers of 4 (and possibly doing
292 ``SELECT DISTINCT`` in the subquery to remove duplicates);
294 - allowing finer-grained database elements (e.g. patch) to provide overlap
295 rows for coarser ones (e.g. tract), by ignoring irrelevant columns
296 (e.g. the patch IDs) in the subquery (again, possible with
297 ``SELECT DISTINCT``).
299 But there's no point to doing any of that until the query system can
300 figure out how best to ask for overlap rows when an exact match isn't
301 available.
302 """
303 def __init__(
304 self,
305 db: Database,
306 element: DatabaseDimensionElement,
307 summaryTable: sqlalchemy.schema.Table,
308 overlapTable: sqlalchemy.schema.Table,
309 governor: GovernorDimensionRecordStorage,
310 ):
311 self._db = db
312 self.element = element
313 assert element.spatial is not None
314 self._summaryTable = summaryTable
315 self._overlapTable = overlapTable
316 self._governor = governor
318 @classmethod
319 def initialize(
320 cls,
321 db: Database,
322 element: DatabaseDimensionElement, *,
323 context: Optional[StaticTablesContext],
324 governor: GovernorDimensionRecordStorage,
325 ) -> _SkyPixOverlapStorage:
326 """Construct a new instance, creating tables as needed.
328 Parameters
329 ----------
330 db : `Database`
331 Interface to the underlying database engine and namespace.
332 element : `DatabaseDimensionElement`
333 Dimension element whose overlaps are to be managed.
334 context : `StaticTablesContext`, optional
335 If provided, an object to use to create any new tables. If not
336 provided, ``db.ensureTableExists`` should be used instead.
337 governor : `GovernorDimensionRecordStorage`
338 Record storage backend for this element's governor dimension.
339 """
340 if context is not None: 340 ↛ 343line 340 didn't jump to line 343, because the condition on line 340 was never false
341 op = context.addTable
342 else:
343 op = db.ensureTableExists
344 summaryTable = op(
345 cls._SUMMARY_TABLE_NAME_SPEC.format(element=element),
346 cls._makeSummaryTableSpec(element),
347 )
348 overlapTable = op(
349 cls._OVERLAP_TABLE_NAME_SPEC.format(element=element),
350 cls._makeOverlapTableSpec(element),
351 )
352 return _SkyPixOverlapStorage(db, element, summaryTable=summaryTable, overlapTable=overlapTable,
353 governor=governor)
355 _SUMMARY_TABLE_NAME_SPEC = "{element.name}_skypix_overlap_summary"
357 @classmethod
358 def _makeSummaryTableSpec(cls, element: DatabaseDimensionElement) -> ddl.TableSpec:
359 """Create a specification for the table that records which combinations
360 of skypix dimension and governor value have materialized overlaps.
362 Parameters
363 ----------
364 element : `DatabaseDimensionElement`
365 Dimension element whose overlaps are to be managed.
367 Returns
368 -------
369 tableSpec : `ddl.TableSpec`
370 Table specification.
371 """
372 assert element.spatial is not None
373 tableSpec = ddl.TableSpec(
374 fields=[
375 ddl.FieldSpec(
376 name="skypix_system",
377 dtype=sqlalchemy.String,
378 length=16,
379 nullable=False,
380 primaryKey=True,
381 ),
382 ddl.FieldSpec(
383 name="skypix_level",
384 dtype=sqlalchemy.SmallInteger,
385 nullable=False,
386 primaryKey=True,
387 ),
388 ]
389 )
390 addDimensionForeignKey(tableSpec, element.spatial.governor, primaryKey=True)
391 return tableSpec
393 _OVERLAP_TABLE_NAME_SPEC = "{element.name}_skypix_overlap"
395 @classmethod
396 def _makeOverlapTableSpec(cls, element: DatabaseDimensionElement) -> ddl.TableSpec:
397 """Create a specification for the table that holds materialized
398 overlap rows.
400 Parameters
401 ----------
402 element : `DatabaseDimensionElement`
403 Dimension element whose overlaps are to be managed.
405 Returns
406 -------
407 tableSpec : `ddl.TableSpec`
408 Table specification.
409 """
410 assert element.spatial is not None
411 tableSpec = ddl.TableSpec(
412 fields=[
413 ddl.FieldSpec(
414 name="skypix_system",
415 dtype=sqlalchemy.String,
416 length=16,
417 nullable=False,
418 primaryKey=True,
419 ),
420 ddl.FieldSpec(
421 name="skypix_level",
422 dtype=sqlalchemy.SmallInteger,
423 nullable=False,
424 primaryKey=True,
425 ),
426 # (more columns added below)
427 ],
428 unique=set(),
429 foreignKeys=[
430 # Foreign key to summary table. This makes sure we don't
431 # materialize any overlaps without remembering that we've done
432 # so in the summary table, though it can't prevent the converse
433 # of adding a summary row without adding overlap row (either of
434 # those is a logic bug, of course, but we want to be defensive
435 # about those). Using ON DELETE CASCADE, it'd be very easy to
436 # implement "disabling" an overlap materialization, because we
437 # can just delete the summary row.
438 # Note that the governor dimension column is added below, in
439 # the call to addDimensionForeignKey.
440 ddl.ForeignKeySpec(
441 cls._SUMMARY_TABLE_NAME_SPEC.format(element=element),
442 source=("skypix_system", "skypix_level", element.spatial.governor.name),
443 target=("skypix_system", "skypix_level", element.spatial.governor.name),
444 onDelete="CASCADE",
445 ),
446 ],
447 )
448 # Add fields for the standard element this class manages overlaps for.
449 # This is guaranteed to add a column for the governor dimension,
450 # because that's a required dependency of element.
451 for dimension in element.required:
452 addDimensionForeignKey(tableSpec, dimension, primaryKey=True)
453 # Add field for the actual skypix index. We do this later because I
454 # think we care (at least a bit) about the order in which the primary
455 # key is defined, in that we want a non-summary column like this one
456 # to appear after the governor dimension column.
457 tableSpec.fields.add(
458 ddl.FieldSpec(
459 name="skypix_index",
460 dtype=sqlalchemy.BigInteger,
461 nullable=False,
462 primaryKey=True,
463 )
464 )
465 return tableSpec
467 def enable(
468 self,
469 storage: TableDimensionRecordStorage,
470 skypix: SkyPixDimension,
471 governorValue: str,
472 ) -> None:
473 """Enable materialization of overlaps between a skypix dimension
474 and the records of ``self.element`` with a particular governor value.
476 Parameters
477 ----------
478 storage : `TableDimensionRecordStorage`
479 Storage object for the records of ``self.element``.
480 skypix : `SkyPixDimension`
481 The skypix dimension (system and level) for which overlaps should
482 be materialized.
483 governorValue : `str`
484 Value of this element's governor dimension for which overlaps
485 should be materialized. For example, if ``self.element`` is
486 ``visit``, this is an instrument name; if ``self.element`` is
487 ``patch``, this is a skymap name.
489 Notes
490 -----
491 If there are existing rows for the given ``governorValue``, overlap
492 rows for them will be immediately computed and inserted. At present,
493 that never happens, because we only enable overlaps with
494 `DimensionUniverse.commonSkyPix`, and that happens immediately after
495 each governor row is inserted (and there can't be any patch rows,
496 for example, until after the corresponding skymap row is inserted).
498 After calling `enable` for a particular combination, any new records
499 for ``self.element`` that are inserted will automatically be
500 accompanied by overlap records (via calls to `insert` made
501 by `TableDimensionRecordStorage` methods).
502 """
503 # Because we're essentially materializing a view in Python, we
504 # aggressively lock all tables we're reading and writing in order to be
505 # sure nothing gets out of sync. This may not be the most efficient
506 # approach possible, but we'll focus on correct before we focus on
507 # fast, and enabling a new overlap combination should be a very rare
508 # operation anyway, and never one we do in parallel.
509 with self._db.transaction(lock=[self._governor.table, storage._table,
510 self._summaryTable, self._overlapTable]):
511 result, inserted = self._db.sync(
512 self._summaryTable,
513 keys={
514 "skypix_system": skypix.system.name,
515 "skypix_level": skypix.level,
516 self._governor.element.name: governorValue,
517 },
518 )
519 if inserted: 519 ↛ 529line 519 didn't jump to line 529, because the condition on line 519 was never false
520 _LOG.debug(
521 "Precomputing initial overlaps for %s vs %s for %s=%s",
522 skypix.name,
523 self.element.name,
524 self._governor.element.name,
525 governorValue
526 )
527 self._fill(storage=storage, skypix=skypix, governorValue=governorValue)
528 else:
529 _LOG.debug(
530 "Overlaps already precomputed for %s vs %s for %s=%s",
531 skypix.name,
532 self.element.name,
533 self._governor.element.name,
534 governorValue
535 )
537 def _fill(
538 self, *,
539 storage: TableDimensionRecordStorage,
540 skypix: SkyPixDimension,
541 governorValue: str,
542 ) -> None:
543 """Insert overlap records for a newly-enabled combination of skypix
544 dimension and governor value.
546 This method should only be called by `enable`.
548 Parameters
549 ----------
550 storage : `TableDimensionRecordStorage`
551 Storage object for the records of ``self.element``.
552 skypix : `SkyPixDimension`
553 The skypix dimension (system and level) for which overlaps should
554 be materialized.
555 governorValue : `str`
556 Value of this element's governor dimension for which overlaps
557 should be materialized. For example, if ``self.element`` is
558 ``visit``, this is an instrument name; if ``self.element`` is
559 ``patch``, this is a skymap name.
560 """
561 overlapRecords: List[dict] = []
562 # `DimensionRecordStorage.fetch` as defined by the ABC expects to be
563 # given iterables of data IDs that correspond to that element's graph
564 # (e.g. {instrument, visit, detector}), not just some subset of it
565 # (e.g. {instrument}). But we know the implementation of `fetch` for
566 # `TableDimensionRecordStorage will use this iterable to do exactly
567 # what we want.
568 governorDataId = DataCoordinate.standardize({self._governor.element.name: governorValue},
569 graph=self._governor.element.graph)
570 for record in storage.fetch(DataCoordinateIterable.fromScalar(governorDataId)): 570 ↛ 571line 570 didn't jump to line 571, because the loop on line 570 never started
571 if record.region is None:
572 continue
573 baseOverlapRecord = record.dataId.byName()
574 baseOverlapRecord["skypix_system"] = skypix.system.name
575 baseOverlapRecord["skypix_level"] = skypix.level
576 for begin, end in skypix.pixelization.envelope(record.region):
577 overlapRecords.extend(
578 dict(baseOverlapRecord, skypix_index=index) for index in range(begin, end)
579 )
580 _LOG.debug(
581 "Inserting %d initial overlap rows for %s vs %s for %s=%r",
582 len(overlapRecords),
583 skypix.name,
584 self.element.name,
585 self._governor.element.name,
586 governorValue,
587 )
588 self._db.insert(self._overlapTable, *overlapRecords)
590 def insert(self, records: Sequence[DimensionRecord]) -> None:
591 """Insert overlaps for a sequence of ``self.element`` records that
592 have just been inserted.
594 This must be called by any method that inserts records for that
595 element (i.e. `TableDimensionRecordStorage.insert` and
596 `TableDimensionRecordStorage.sync`), within the same transaction.
598 Parameters
599 ----------
600 records : `Sequence` [ `DimensionRecord` ]
601 Records for ``self.element``. Records with `None` regions are
602 ignored.
603 """
604 # Group records by family.governor value.
605 grouped: Dict[str, List[DimensionRecord]] = defaultdict(list)
606 for record in records:
607 grouped[getattr(record, self._governor.element.name)].append(record)
608 _LOG.debug(
609 "Precomputing new skypix overlaps for %s where %s in %s.",
610 self.element.name, self._governor.element.name, grouped.keys()
611 )
612 # Make sure the set of combinations to materialize does not change
613 # while we are materializing the ones we have, by locking the summary
614 # table. Because we aren't planning to write to the summary table,
615 # this could just be a SHARED lock instead of an EXCLUSIVE one, but
616 # there's no API for that right now.
617 with self._db.transaction(lock=[self._summaryTable]):
618 # Query for the skypix dimensions to be associated with each
619 # governor value.
620 gvCol = self._summaryTable.columns[self._governor.element.name]
621 sysCol = self._summaryTable.columns.skypix_system
622 lvlCol = self._summaryTable.columns.skypix_level
623 query = sqlalchemy.sql.select(
624 [gvCol, sysCol, lvlCol],
625 ).select_from(
626 self._summaryTable
627 ).where(
628 gvCol.in_(list(grouped.keys()))
629 )
630 # Group results by governor value, then skypix system.
631 skypix: Dict[str, NamedKeyDict[SkyPixSystem, List[int]]] = {
632 gv: NamedKeyDict() for gv in grouped.keys()
633 }
634 for summaryRow in self._db.query(query):
635 system = self.element.universe.skypix[summaryRow[sysCol]]
636 skypix[summaryRow[gvCol]].setdefault(system, []).append(summaryRow[lvlCol])
637 overlapRecords: List[dict] = []
638 # Compute overlaps for one governor value at a time, but gather
639 # them all up for one insert.
640 for gv, group in grouped.items():
641 overlapRecords.extend(self._compute(group, skypix[gv], gv))
642 _LOG.debug(
643 "Inserting %d new skypix overlap rows for %s where %s in %s.",
644 len(overlapRecords), self.element.name, self._governor.element.name, grouped.keys()
645 )
646 self._db.insert(self._overlapTable, *overlapRecords)
648 def _compute(
649 self,
650 records: Sequence[DimensionRecord],
651 skypix: NamedKeyDict[SkyPixSystem, List[int]],
652 governorValue: str,
653 ) -> Iterator[dict]:
654 """Compute all overlap rows for a particular governor dimension value
655 and all of the skypix dimensions for which its overlaps are enabled.
657 This method should only be called by `insert`.
659 Parameters
660 ----------
661 records : `Sequence` [ `DimensionRecord` ]
662 Records for ``self.element``. Records with `None` regions are
663 ignored. All must have the governor value given.
664 skypix : `NamedKeyDict` [ `SkyPixSystem`, `list` [ `int` ] ]
665 Mapping containing all skypix systems and levels for which overlaps
666 should be computed, grouped by `SkyPixSystem`.
667 governorValue : `str`
668 Value of this element's governor dimension for which overlaps
669 should be computed. For example, if ``self.element`` is ``visit``,
670 this is an instrument name; if ``self.element`` is ``patch``, this
671 is a skymap name.
673 Yields
674 ------
675 row : `dict`
676 Dictionary representing an overlap row.
677 """
678 # Process input records one at time, computing all skypix indices for
679 # each.
680 for record in records:
681 if record.region is None:
682 continue
683 assert getattr(record, self._governor.element.name) == governorValue
684 for system, levels in skypix.items():
685 if not levels: 685 ↛ 686line 685 didn't jump to line 686, because the condition on line 685 was never true
686 continue
687 baseOverlapRecord = record.dataId.byName()
688 baseOverlapRecord["skypix_system"] = system.name
689 levels.sort(reverse=True)
690 # Start with the first level, which is the finest-grained one.
691 # Compute skypix envelope indices directly for that.
692 indices: Dict[int, Set[int]] = {levels[0]: set()}
693 for begin, end in system[levels[0]].pixelization.envelope(record.region):
694 indices[levels[0]].update(range(begin, end))
695 # Divide those indices by powers of 4 (and remove duplicates)
696 # work our way up to the last (coarsest) level.
697 for lastLevel, nextLevel in zip(levels[:-1], levels[1:]): 697 ↛ 698line 697 didn't jump to line 698, because the loop on line 697 never started
698 factor = 4**(lastLevel - nextLevel)
699 indices[nextLevel] = {index//factor for index in indices[lastLevel]}
700 for level in levels:
701 yield from (
702 {
703 "skypix_level": level,
704 "skypix_index": index,
705 **baseOverlapRecord, # type: ignore
706 } for index in indices[level]
707 )
709 def select(
710 self,
711 skypix: SkyPixDimension,
712 governorValues: Union[AbstractSet[str], EllipsisType],
713 ) -> sqlalchemy.sql.FromClause:
714 """Construct a subquery expression containing overlaps between the
715 given skypix dimension and governor values.
717 Parameters
718 ----------
719 skypix : `SkyPixDimension`
720 The skypix dimension (system and level) for which overlaps should
721 be materialized.
722 governorValues : `str`
723 Values of this element's governor dimension for which overlaps
724 should be returned. For example, if ``self.element`` is ``visit``,
725 this is a set of instrument names; if ``self.element`` is
726 ``patch``, this is a set of skymap names. If ``...`` all values
727 in the database are used (`GovernorDimensionRecordStorage.values`).
729 Returns
730 -------
731 subquery : `sqlalchemy.sql.FromClause`
732 A SELECT query with an alias, intended for use as a subquery, with
733 columns equal to ``self.element.required.names`` + ``skypix.name``.
734 """
735 if skypix != self.element.universe.commonSkyPix: 735 ↛ 740line 735 didn't jump to line 740
736 # We guarantee elsewhere that we always materialize all overlaps
737 # vs. commonSkyPix, but for everything else, we need to check that
738 # we have materialized this combination of governor values and
739 # skypix.
740 summaryWhere = [
741 self._summaryTable.columns.skypix_system == skypix.system.name,
742 self._summaryTable.columns.skypix_level == skypix.level,
743 ]
744 gvCol = self._summaryTable.columns[self._governor.element.name]
745 if governorValues is not Ellipsis:
746 summaryWhere.append(gvCol.in_(list(governorValues)))
747 summaryQuery = sqlalchemy.sql.select(
748 [gvCol]
749 ).select_from(
750 self._summaryTable
751 ).where(
752 sqlalchemy.sql.and_(*summaryWhere)
753 )
754 materializedGovernorValues = {row[gvCol] for row in self._db.query(summaryQuery)}
755 if governorValues is Ellipsis:
756 missingGovernorValues = self._governor.values - materializedGovernorValues
757 else:
758 missingGovernorValues = governorValues - materializedGovernorValues
759 if missingGovernorValues:
760 raise RuntimeError(
761 f"Query requires an overlap join between {skypix.name} and {self.element.name} "
762 f"(for {self._governor.element.name} in {missingGovernorValues}), but these "
763 f"have not been materialized."
764 )
765 columns = [self._overlapTable.columns.skypix_index.label(skypix.name)]
766 columns.extend(self._overlapTable.columns[name] for name in self.element.graph.required.names)
767 overlapWhere = [
768 self._overlapTable.columns.skypix_system == skypix.system.name,
769 self._overlapTable.columns.skypix_level == skypix.level,
770 ]
771 if governorValues is not Ellipsis: 771 ↛ 772line 771 didn't jump to line 772, because the condition on line 771 was never true
772 overlapWhere.append(
773 self._overlapTable.columns[self._governor.element.name].in_(list(governorValues))
774 )
775 overlapQuery = sqlalchemy.sql.select(
776 columns
777 ).select_from(
778 self._overlapTable
779 ).where(
780 sqlalchemy.sql.and_(*overlapWhere)
781 )
782 return overlapQuery.alias(f"{self.element.name}_{skypix.name}_overlap")
784 def digestTables(self) -> Iterable[sqlalchemy.schema.Table]:
785 """Return tables used for schema digest.
787 Returns
788 -------
789 tables : `Iterable` [ `sqlalchemy.schema.Table` ]
790 Possibly empty set of tables for schema digest calculations.
791 """
792 return [self._summaryTable, self._overlapTable]