Coverage for python/lsst/daf/butler/registry/dimensions/table.py : 80%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ["TableDimensionRecordStorage"]
25from collections import defaultdict
26import logging
27from typing import (
28 AbstractSet,
29 Dict,
30 Iterable,
31 Iterator,
32 List,
33 Optional,
34 Sequence,
35 Set,
36 Union,
37)
39import sqlalchemy
41from ...core import (
42 addDimensionForeignKey,
43 Config,
44 DatabaseDimensionElement,
45 DataCoordinate,
46 DataCoordinateIterable,
47 ddl,
48 DimensionElement,
49 DimensionRecord,
50 GovernorDimension,
51 NamedKeyDict,
52 NamedKeyMapping,
53 NamedValueSet,
54 REGION_FIELD_SPEC,
55 SimpleQuery,
56 SkyPixDimension,
57 SkyPixSystem,
58 TimespanDatabaseRepresentation,
59)
60from ..interfaces import (
61 Database,
62 DatabaseDimensionOverlapStorage,
63 DatabaseDimensionRecordStorage,
64 GovernorDimensionRecordStorage,
65 StaticTablesContext,
66)
67from ..queries import QueryBuilder
68from ..wildcards import Ellipsis, EllipsisType
71_LOG = logging.getLogger(__name__)
74MAX_FETCH_CHUNK = 1000
75"""Maximum number of data IDs we fetch records at a time.
77Barring something database-engine-specific, this sets the size of the actual
78SQL query, not just the number of result rows, because the only way to query
79for multiple data IDs in a single SELECT query via SQLAlchemy is to have an OR
80term in the WHERE clause for each one.
81"""
84class TableDimensionRecordStorage(DatabaseDimensionRecordStorage):
85 """A record storage implementation uses a regular database table.
87 Parameters
88 ----------
89 db : `Database`
90 Interface to the database engine and namespace that will hold these
91 dimension records.
92 element : `DatabaseDimensionElement`
93 The element whose records this storage will manage.
94 table : `sqlalchemy.schema.Table`
95 The logical table for the element.
96 skyPixOverlap : `_SkyPixOverlapStorage`, optional
97 Object that manages the tables that hold materialized spatial overlap
98 joins to skypix dimensions. Should be `None` if (and only if)
99 ``element.spatial is None``.
100 """
101 def __init__(self, db: Database, element: DatabaseDimensionElement, *, table: sqlalchemy.schema.Table,
102 skyPixOverlap: Optional[_SkyPixOverlapStorage] = None):
103 self._db = db
104 self._table = table
105 self._element = element
106 self._fetchColumns: Dict[str, sqlalchemy.sql.ColumnElement] = {
107 dimension.name: self._table.columns[name]
108 for dimension, name in zip(self._element.dimensions,
109 self._element.RecordClass.fields.dimensions.names)
110 }
111 self._skyPixOverlap = skyPixOverlap
112 self._otherOverlaps: List[DatabaseDimensionOverlapStorage] = []
114 @classmethod
115 def initialize(
116 cls,
117 db: Database,
118 element: DatabaseDimensionElement, *,
119 context: Optional[StaticTablesContext] = None,
120 config: Config,
121 governors: NamedKeyMapping[GovernorDimension, GovernorDimensionRecordStorage],
122 ) -> DatabaseDimensionRecordStorage:
123 # Docstring inherited from DatabaseDimensionRecordStorage.
124 spec = element.RecordClass.fields.makeTableSpec(tsRepr=db.getTimespanRepresentation())
125 if context is not None: 125 ↛ 128line 125 didn't jump to line 128, because the condition on line 125 was never false
126 table = context.addTable(element.name, spec)
127 else:
128 table = db.ensureTableExists(element.name, spec)
129 skyPixOverlap: Optional[_SkyPixOverlapStorage]
130 if element.spatial is not None:
131 governor = governors[element.spatial.governor]
132 skyPixOverlap = _SkyPixOverlapStorage.initialize(
133 db,
134 element,
135 context=context,
136 governor=governor,
137 )
138 result = cls(db, element, table=table, skyPixOverlap=skyPixOverlap)
140 # Whenever anyone inserts a new governor dimension value, we want
141 # to enable overlaps for that value between this element and
142 # commonSkyPix.
143 def callback(record: DimensionRecord) -> None:
144 skyPixOverlap.enable( # type: ignore
145 result,
146 element.universe.commonSkyPix,
147 getattr(record, element.spatial.governor.primaryKey.name), # type: ignore
148 )
150 governor.registerInsertionListener(callback)
151 return result
152 else:
153 return cls(db, element, table=table)
155 @property
156 def element(self) -> DatabaseDimensionElement:
157 # Docstring inherited from DimensionRecordStorage.element.
158 return self._element
160 def clearCaches(self) -> None:
161 # Docstring inherited from DimensionRecordStorage.clearCaches.
162 pass
164 def join(
165 self,
166 builder: QueryBuilder, *,
167 regions: Optional[NamedKeyDict[DimensionElement, sqlalchemy.sql.ColumnElement]] = None,
168 timespans: Optional[NamedKeyDict[DimensionElement, TimespanDatabaseRepresentation]] = None,
169 ) -> None:
170 # Docstring inherited from DimensionRecordStorage.
171 if regions is not None:
172 dimensions = NamedValueSet(self.element.required)
173 dimensions.add(self.element.universe.commonSkyPix)
174 assert self._skyPixOverlap is not None
175 builder.joinTable(
176 self._skyPixOverlap.select(self.element.universe.commonSkyPix, Ellipsis),
177 dimensions,
178 )
179 regions[self.element] = self._table.columns[REGION_FIELD_SPEC.name]
180 joinOn = builder.startJoin(self._table, self.element.dimensions,
181 self.element.RecordClass.fields.dimensions.names)
182 if timespans is not None: 182 ↛ 183line 182 didn't jump to line 183, because the condition on line 182 was never true
183 timespanInTable = self._db.getTimespanRepresentation().fromSelectable(self._table)
184 for timespanInQuery in timespans.values():
185 joinOn.append(timespanInQuery.overlaps(timespanInTable))
186 timespans[self.element] = timespanInTable
187 builder.finishJoin(self._table, joinOn)
188 return self._table
190 def fetch(self, dataIds: DataCoordinateIterable) -> Iterable[DimensionRecord]:
191 # Docstring inherited from DimensionRecordStorage.fetch.
192 RecordClass = self.element.RecordClass
193 query = SimpleQuery()
194 query.columns.extend(self._table.columns[name] for name in RecordClass.fields.standard.names)
195 if self.element.spatial is not None:
196 query.columns.append(self._table.columns["region"])
197 if self.element.temporal is not None:
198 tsRepr = self._db.getTimespanRepresentation()
199 query.columns.extend(self._table.columns[name] for name in tsRepr.getFieldNames())
200 query.join(self._table)
201 dataIds.constrain(query, lambda name: self._fetchColumns[name])
202 for row in self._db.query(query.combine()):
203 values = dict(row)
204 if self.element.temporal is not None:
205 values[TimespanDatabaseRepresentation.NAME] = tsRepr.extract(values)
206 yield RecordClass(**values)
208 def insert(self, *records: DimensionRecord) -> None:
209 # Docstring inherited from DimensionRecordStorage.insert.
210 elementRows = [record.toDict() for record in records]
211 if self.element.temporal is not None:
212 tsRepr = self._db.getTimespanRepresentation()
213 for row in elementRows:
214 timespan = row.pop(TimespanDatabaseRepresentation.NAME)
215 tsRepr.update(timespan, result=row)
216 with self._db.transaction():
217 self._db.insert(self._table, *elementRows)
218 if self._skyPixOverlap is not None:
219 self._skyPixOverlap.insert(records)
221 def sync(self, record: DimensionRecord) -> bool:
222 # Docstring inherited from DimensionRecordStorage.sync.
223 compared = record.toDict()
224 keys = {}
225 for name in record.fields.required.names:
226 keys[name] = compared.pop(name)
227 if self.element.temporal is not None: 227 ↛ 228line 227 didn't jump to line 228, because the condition on line 227 was never true
228 tsRepr = self._db.getTimespanRepresentation()
229 timespan = compared.pop(TimespanDatabaseRepresentation.NAME)
230 tsRepr.update(timespan, result=compared)
231 with self._db.transaction():
232 _, inserted = self._db.sync(
233 self._table,
234 keys=keys,
235 compared=compared,
236 )
237 if inserted and self._skyPixOverlap is not None: 237 ↛ 238line 237 didn't jump to line 238, because the condition on line 237 was never true
238 self._skyPixOverlap.insert([record])
239 return inserted
241 def digestTables(self) -> Iterable[sqlalchemy.schema.Table]:
242 # Docstring inherited from DimensionRecordStorage.digestTables.
243 result = [self._table]
244 if self._skyPixOverlap is not None:
245 result.extend(self._skyPixOverlap.digestTables())
246 return result
248 def connect(self, overlaps: DatabaseDimensionOverlapStorage) -> None:
249 # Docstring inherited from DatabaseDimensionRecordStorage.
250 self._otherOverlaps.append(overlaps)
253class _SkyPixOverlapStorage:
254 """A helper object for `TableDimensionRecordStorage` that manages its
255 materialized overlaps with skypix dimensions.
257 New instances should be constructed by calling `initialize`, not by calling
258 the constructor directly.
260 Parameters
261 ----------
262 db : `Database`
263 Interface to the underlying database engine and namespace.
264 element : `DatabaseDimensionElement`
265 Dimension element whose overlaps are to be managed.
266 summaryTable : `sqlalchemy.schema.Table`
267 Table that records which combinations of skypix dimensions and
268 governor dimension values have materialized overlap rows.
269 overlapTable : `sqlalchemy.schema.Table`
270 Table containing the actual materialized overlap rows.
271 governor : `GovernorDimensionRecordStorage`
272 Record storage backend for this element's governor dimension.
274 Notes
275 -----
276 This class (and most importantly, the tables it relies on) can in principle
277 manage overlaps between with any skypix dimension, but at present it is
278 only being used to manage relationships with the special ``commonSkyPix``
279 dimension, because that's all the query system uses. Eventually, we expect
280 to require users to explicitly materialize all relationships they will
281 want to use in queries.
283 Other possible future improvements include:
285 - allowing finer-grained skypix dimensions to provide overlap rows for
286 coarser ones, by dividing indices by powers of 4 (and possibly doing
287 ``SELECT DISTINCT`` in the subquery to remove duplicates);
289 - allowing finer-grained database elements (e.g. patch) to provide overlap
290 rows for coarser ones (e.g. tract), by ignoring irrelevant columns
291 (e.g. the patch IDs) in the subquery (again, possible with
292 ``SELECT DISTINCT``).
294 But there's no point to doing any of that until the query system can
295 figure out how best to ask for overlap rows when an exact match isn't
296 available.
297 """
298 def __init__(
299 self,
300 db: Database,
301 element: DatabaseDimensionElement,
302 summaryTable: sqlalchemy.schema.Table,
303 overlapTable: sqlalchemy.schema.Table,
304 governor: GovernorDimensionRecordStorage,
305 ):
306 self._db = db
307 self.element = element
308 assert element.spatial is not None
309 self._summaryTable = summaryTable
310 self._overlapTable = overlapTable
311 self._governor = governor
313 @classmethod
314 def initialize(
315 cls,
316 db: Database,
317 element: DatabaseDimensionElement, *,
318 context: Optional[StaticTablesContext],
319 governor: GovernorDimensionRecordStorage,
320 ) -> _SkyPixOverlapStorage:
321 """Construct a new instance, creating tables as needed.
323 Parameters
324 ----------
325 db : `Database`
326 Interface to the underlying database engine and namespace.
327 element : `DatabaseDimensionElement`
328 Dimension element whose overlaps are to be managed.
329 context : `StaticTablesContext`, optional
330 If provided, an object to use to create any new tables. If not
331 provided, ``db.ensureTableExists`` should be used instead.
332 governor : `GovernorDimensionRecordStorage`
333 Record storage backend for this element's governor dimension.
334 """
335 if context is not None: 335 ↛ 338line 335 didn't jump to line 338, because the condition on line 335 was never false
336 op = context.addTable
337 else:
338 op = db.ensureTableExists
339 summaryTable = op(
340 cls._SUMMARY_TABLE_NAME_SPEC.format(element=element),
341 cls._makeSummaryTableSpec(element),
342 )
343 overlapTable = op(
344 cls._OVERLAP_TABLE_NAME_SPEC.format(element=element),
345 cls._makeOverlapTableSpec(element),
346 )
347 return _SkyPixOverlapStorage(db, element, summaryTable=summaryTable, overlapTable=overlapTable,
348 governor=governor)
350 _SUMMARY_TABLE_NAME_SPEC = "{element.name}_skypix_overlap_summary"
352 @classmethod
353 def _makeSummaryTableSpec(cls, element: DatabaseDimensionElement) -> ddl.TableSpec:
354 """Create a specification for the table that records which combinations
355 of skypix dimension and governor value have materialized overlaps.
357 Parameters
358 ----------
359 element : `DatabaseDimensionElement`
360 Dimension element whose overlaps are to be managed.
362 Returns
363 -------
364 tableSpec : `ddl.TableSpec`
365 Table specification.
366 """
367 assert element.spatial is not None
368 tableSpec = ddl.TableSpec(
369 fields=[
370 ddl.FieldSpec(
371 name="skypix_system",
372 dtype=sqlalchemy.String,
373 length=16,
374 nullable=False,
375 primaryKey=True,
376 ),
377 ddl.FieldSpec(
378 name="skypix_level",
379 dtype=sqlalchemy.SmallInteger,
380 nullable=False,
381 primaryKey=True,
382 ),
383 ]
384 )
385 addDimensionForeignKey(tableSpec, element.spatial.governor, primaryKey=True)
386 return tableSpec
388 _OVERLAP_TABLE_NAME_SPEC = "{element.name}_skypix_overlap"
390 @classmethod
391 def _makeOverlapTableSpec(cls, element: DatabaseDimensionElement) -> ddl.TableSpec:
392 """Create a specification for the table that holds materialized
393 overlap rows.
395 Parameters
396 ----------
397 element : `DatabaseDimensionElement`
398 Dimension element whose overlaps are to be managed.
400 Returns
401 -------
402 tableSpec : `ddl.TableSpec`
403 Table specification.
404 """
405 assert element.spatial is not None
406 tableSpec = ddl.TableSpec(
407 fields=[
408 ddl.FieldSpec(
409 name="skypix_system",
410 dtype=sqlalchemy.String,
411 length=16,
412 nullable=False,
413 primaryKey=True,
414 ),
415 ddl.FieldSpec(
416 name="skypix_level",
417 dtype=sqlalchemy.SmallInteger,
418 nullable=False,
419 primaryKey=True,
420 ),
421 # (more columns added below)
422 ],
423 unique=set(),
424 foreignKeys=[
425 # Foreign key to summary table. This makes sure we don't
426 # materialize any overlaps without remembering that we've done
427 # so in the summary table, though it can't prevent the converse
428 # of adding a summary row without adding overlap row (either of
429 # those is a logic bug, of course, but we want to be defensive
430 # about those). Using ON DELETE CASCADE, it'd be very easy to
431 # implement "disabling" an overlap materialization, because we
432 # can just delete the summary row.
433 # Note that the governor dimension column is added below, in
434 # the call to addDimensionForeignKey.
435 ddl.ForeignKeySpec(
436 cls._SUMMARY_TABLE_NAME_SPEC.format(element=element),
437 source=("skypix_system", "skypix_level", element.spatial.governor.name),
438 target=("skypix_system", "skypix_level", element.spatial.governor.name),
439 onDelete="CASCADE",
440 ),
441 ],
442 )
443 # Add fields for the standard element this class manages overlaps for.
444 # This is guaranteed to add a column for the governor dimension,
445 # because that's a required dependency of element.
446 for dimension in element.required:
447 addDimensionForeignKey(tableSpec, dimension, primaryKey=True)
448 # Add field for the actual skypix index. We do this later because I
449 # think we care (at least a bit) about the order in which the primary
450 # key is defined, in that we want a non-summary column like this one
451 # to appear after the governor dimension column.
452 tableSpec.fields.add(
453 ddl.FieldSpec(
454 name="skypix_index",
455 dtype=sqlalchemy.BigInteger,
456 nullable=False,
457 primaryKey=True,
458 )
459 )
460 return tableSpec
462 def enable(
463 self,
464 storage: TableDimensionRecordStorage,
465 skypix: SkyPixDimension,
466 governorValue: str,
467 ) -> None:
468 """Enable materialization of overlaps between a skypix dimension
469 and the records of ``self.element`` with a particular governor value.
471 Parameters
472 ----------
473 storage : `TableDimensionRecordStorage`
474 Storage object for the records of ``self.element``.
475 skypix : `SkyPixDimension`
476 The skypix dimension (system and level) for which overlaps should
477 be materialized.
478 governorValue : `str`
479 Value of this element's governor dimension for which overlaps
480 should be materialized. For example, if ``self.element`` is
481 ``visit``, this is an instrument name; if ``self.element`` is
482 ``patch``, this is a skymap name.
484 Notes
485 -----
486 If there are existing rows for the given ``governorValue``, overlap
487 rows for them will be immediately computed and inserted. At present,
488 that never happens, because we only enable overlaps with
489 `DimensionUniverse.commonSkyPix`, and that happens immediately after
490 each governor row is inserted (and there can't be any patch rows,
491 for example, until after the corresponding skymap row is inserted).
493 After calling `enable` for a particular combination, any new records
494 for ``self.element`` that are inserted will automatically be
495 accompanied by overlap records (via calls to `insert` made
496 by `TableDimensionRecordStorage` methods).
497 """
498 # Because we're essentially materializing a view in Python, we
499 # aggressively lock all tables we're reading and writing in order to be
500 # sure nothing gets out of sync. This may not be the most efficient
501 # approach possible, but we'll focus on correct before we focus on
502 # fast, and enabling a new overlap combination should be a very rare
503 # operation anyway, and never one we do in parallel.
504 with self._db.transaction(lock=[self._governor.table, storage._table,
505 self._summaryTable, self._overlapTable]):
506 result, inserted = self._db.sync(
507 self._summaryTable,
508 keys={
509 "skypix_system": skypix.system.name,
510 "skypix_level": skypix.level,
511 self._governor.element.name: governorValue,
512 },
513 )
514 if inserted: 514 ↛ 524line 514 didn't jump to line 524, because the condition on line 514 was never false
515 _LOG.debug(
516 "Precomputing initial overlaps for %s vs %s for %s=%s",
517 skypix.name,
518 self.element.name,
519 self._governor.element.name,
520 governorValue
521 )
522 self._fill(storage=storage, skypix=skypix, governorValue=governorValue)
523 else:
524 _LOG.debug(
525 "Overlaps already precomputed for %s vs %s for %s=%s",
526 skypix.name,
527 self.element.name,
528 self._governor.element.name,
529 governorValue
530 )
532 def _fill(
533 self, *,
534 storage: TableDimensionRecordStorage,
535 skypix: SkyPixDimension,
536 governorValue: str,
537 ) -> None:
538 """Insert overlap records for a newly-enabled combination of skypix
539 dimension and governor value.
541 This method should only be called by `enable`.
543 Parameters
544 ----------
545 storage : `TableDimensionRecordStorage`
546 Storage object for the records of ``self.element``.
547 skypix : `SkyPixDimension`
548 The skypix dimension (system and level) for which overlaps should
549 be materialized.
550 governorValue : `str`
551 Value of this element's governor dimension for which overlaps
552 should be materialized. For example, if ``self.element`` is
553 ``visit``, this is an instrument name; if ``self.element`` is
554 ``patch``, this is a skymap name.
555 """
556 overlapRecords: List[dict] = []
557 # `DimensionRecordStorage.fetch` as defined by the ABC expects to be
558 # given iterables of data IDs that correspond to that element's graph
559 # (e.g. {instrument, visit, detector}), not just some subset of it
560 # (e.g. {instrument}). But we know the implementation of `fetch` for
561 # `TableDimensionRecordStorage will use this iterable to do exactly
562 # what we want.
563 governorDataId = DataCoordinate.standardize({self._governor.element.name: governorValue},
564 graph=self._governor.element.graph)
565 for record in storage.fetch(DataCoordinateIterable.fromScalar(governorDataId)): 565 ↛ 566line 565 didn't jump to line 566, because the loop on line 565 never started
566 if record.region is None: # type: ignore
567 continue
568 baseOverlapRecord = record.dataId.byName()
569 baseOverlapRecord["skypix_system"] = skypix.system.name
570 baseOverlapRecord["skypix_level"] = skypix.level
571 for begin, end in skypix.pixelization.envelope(record.region): # type: ignore
572 overlapRecords.extend(
573 dict(baseOverlapRecord, skypix_index=index) for index in range(begin, end)
574 )
575 _LOG.debug(
576 "Inserting %d initial overlap rows for %s vs %s for %s=%r",
577 len(overlapRecords),
578 skypix.name,
579 self.element.name,
580 self._governor.element.name,
581 governorValue,
582 )
583 self._db.insert(self._overlapTable, *overlapRecords)
585 def insert(self, records: Sequence[DimensionRecord]) -> None:
586 """Insert overlaps for a sequence of ``self.element`` records that
587 have just been inserted.
589 This must be called by any method that inserts records for that
590 element (i.e. `TableDimensionRecordStorage.insert` and
591 `TableDimensionRecordStorage.sync`), within the same transaction.
593 Parameters
594 ----------
595 records : `Sequence` [ `DimensionRecord` ]
596 Records for ``self.element``. Records with `None` regions are
597 ignored.
598 """
599 # Group records by family.governor value.
600 grouped: Dict[str, List[DimensionRecord]] = defaultdict(list)
601 for record in records:
602 grouped[getattr(record, self._governor.element.name)].append(record)
603 _LOG.debug(
604 "Precomputing new skypix overlaps for %s where %s in %s.",
605 self.element.name, self._governor.element.name, grouped.keys()
606 )
607 # Make sure the set of combinations to materialize does not change
608 # while we are materializing the ones we have, by locking the summary
609 # table. Because we aren't planning to write to the summary table,
610 # this could just be a SHARED lock instead of an EXCLUSIVE one, but
611 # there's no API for that right now.
612 with self._db.transaction(lock=[self._summaryTable]):
613 # Query for the skypix dimensions to be associated with each
614 # governor value.
615 gvCol = self._summaryTable.columns[self._governor.element.name]
616 sysCol = self._summaryTable.columns.skypix_system
617 lvlCol = self._summaryTable.columns.skypix_level
618 query = sqlalchemy.sql.select(
619 [gvCol, sysCol, lvlCol],
620 ).select_from(
621 self._summaryTable
622 ).where(
623 gvCol.in_(list(grouped.keys()))
624 )
625 # Group results by governor value, then skypix system.
626 skypix: Dict[str, NamedKeyDict[SkyPixSystem, List[int]]] = {
627 gv: NamedKeyDict() for gv in grouped.keys()
628 }
629 for summaryRow in self._db.query(query):
630 system = self.element.universe.skypix[summaryRow[sysCol]]
631 skypix[summaryRow[gvCol]].setdefault(system, []).append(summaryRow[lvlCol])
632 overlapRecords: List[dict] = []
633 # Compute overlaps for one governor value at a time, but gather
634 # them all up for one insert.
635 for gv, group in grouped.items():
636 overlapRecords.extend(self._compute(group, skypix[gv], gv))
637 _LOG.debug(
638 "Inserting %d new skypix overlap rows for %s where %s in %s.",
639 len(overlapRecords), self.element.name, self._governor.element.name, grouped.keys()
640 )
641 self._db.insert(self._overlapTable, *overlapRecords)
643 def _compute(
644 self,
645 records: Sequence[DimensionRecord],
646 skypix: NamedKeyDict[SkyPixSystem, List[int]],
647 governorValue: str,
648 ) -> Iterator[dict]:
649 """Compute all overlap rows for a particular governor dimension value
650 and all of the skypix dimensions for which its overlaps are enabled.
652 This method should only be called by `insert`.
654 Parameters
655 ----------
656 records : `Sequence` [ `DimensionRecord` ]
657 Records for ``self.element``. Records with `None` regions are
658 ignored. All must have the governor value given.
659 skypix : `NamedKeyDict` [ `SkyPixSystem`, `list` [ `int` ] ]
660 Mapping containing all skypix systems and levels for which overlaps
661 should be computed, grouped by `SkyPixSystem`.
662 governorValue : `str`
663 Value of this element's governor dimension for which overlaps
664 should be computed. For example, if ``self.element`` is ``visit``,
665 this is an instrument name; if ``self.element`` is ``patch``, this
666 is a skymap name.
668 Yields
669 ------
670 row : `dict`
671 Dictionary representing an overlap row.
672 """
673 # Process input records one at time, computing all skypix indices for
674 # each.
675 for record in records:
676 if record.region is None: # type: ignore
677 continue
678 assert getattr(record, self._governor.element.name) == governorValue
679 for system, levels in skypix.items():
680 if not levels: 680 ↛ 681line 680 didn't jump to line 681, because the condition on line 680 was never true
681 continue
682 baseOverlapRecord = record.dataId.byName()
683 baseOverlapRecord["skypix_system"] = system.name
684 levels.sort(reverse=True)
685 # Start with the first level, which is the finest-grained one.
686 # Compute skypix envelope indices directly for that.
687 indices: Dict[int, Set[int]] = {levels[0]: set()}
688 for begin, end in system[levels[0]].pixelization.envelope(record.region): # type: ignore
689 indices[levels[0]].update(range(begin, end))
690 # Divide those indices by powers of 4 (and remove duplicates)
691 # work our way up to the last (coarsest) level.
692 for lastLevel, nextLevel in zip(levels[:-1], levels[1:]): 692 ↛ 693line 692 didn't jump to line 693, because the loop on line 692 never started
693 factor = 4**(lastLevel - nextLevel)
694 indices[nextLevel] = {index//factor for index in indices[lastLevel]}
695 for level in levels:
696 yield from (
697 {
698 "skypix_level": level,
699 "skypix_index": index,
700 **baseOverlapRecord, # type: ignore
701 } for index in indices[level]
702 )
704 def select(
705 self,
706 skypix: SkyPixDimension,
707 governorValues: Union[AbstractSet[str], EllipsisType],
708 ) -> sqlalchemy.sql.FromClause:
709 """Construct a subquery expression containing overlaps between the
710 given skypix dimension and governor values.
712 Parameters
713 ----------
714 skypix : `SkyPixDimension`
715 The skypix dimension (system and level) for which overlaps should
716 be materialized.
717 governorValues : `str`
718 Values of this element's governor dimension for which overlaps
719 should be returned. For example, if ``self.element`` is ``visit``,
720 this is a set of instrument names; if ``self.element`` is
721 ``patch``, this is a set of skymap names. If ``...`` all values
722 in the database are used (`GovernorDimensionRecordStorage.values`).
724 Returns
725 -------
726 subquery : `sqlalchemy.sql.FromClause`
727 A SELECT query with an alias, intended for use as a subquery, with
728 columns equal to ``self.element.required.names`` + ``skypix.name``.
729 """
730 if skypix != self.element.universe.commonSkyPix: 730 ↛ 735line 730 didn't jump to line 735
731 # We guarantee elsewhere that we always materialize all overlaps
732 # vs. commonSkyPix, but for everything else, we need to check that
733 # we have materialized this combination of governor values and
734 # skypix.
735 summaryWhere = [
736 self._summaryTable.columns.skypix_system == skypix.system.name,
737 self._summaryTable.columns.skypix_level == skypix.level,
738 ]
739 gvCol = self._summaryTable.columns[self._governor.element.name]
740 if governorValues is not Ellipsis:
741 summaryWhere.append(gvCol.in_(list(governorValues)))
742 summaryQuery = sqlalchemy.sql.select(
743 [gvCol]
744 ).select_from(
745 self._summaryTable
746 ).where(
747 sqlalchemy.sql.and_(*summaryWhere)
748 )
749 materializedGovernorValues = {row[gvCol] for row in self._db.query(summaryQuery)}
750 if governorValues is Ellipsis:
751 missingGovernorValues = self._governor.values - materializedGovernorValues
752 else:
753 missingGovernorValues = governorValues - materializedGovernorValues
754 if missingGovernorValues:
755 raise RuntimeError(
756 f"Query requires an overlap join between {skypix.name} and {self.element.name} "
757 f"(for {self._governor.element.name} in {missingGovernorValues}), but these "
758 f"have not been materialized."
759 )
760 columns = [self._overlapTable.columns.skypix_index.label(skypix.name)]
761 columns.extend(self._overlapTable.columns[name] for name in self.element.graph.required.names)
762 overlapWhere = [
763 self._overlapTable.columns.skypix_system == skypix.system.name,
764 self._overlapTable.columns.skypix_level == skypix.level,
765 ]
766 if governorValues is not Ellipsis: 766 ↛ 767line 766 didn't jump to line 767, because the condition on line 766 was never true
767 overlapWhere.append(
768 self._overlapTable.columns[self._governor.element.name].in_(list(governorValues))
769 )
770 overlapQuery = sqlalchemy.sql.select(
771 columns
772 ).select_from(
773 self._overlapTable
774 ).where(
775 sqlalchemy.sql.and_(*overlapWhere)
776 )
777 return overlapQuery.alias(f"{self.element.name}_{skypix.name}_overlap")
779 def digestTables(self) -> Iterable[sqlalchemy.schema.Table]:
780 """Return tables used for schema digest.
782 Returns
783 -------
784 tables : `Iterable` [ `sqlalchemy.schema.Table` ]
785 Possibly empty set of tables for schema digest calculations.
786 """
787 return [self._summaryTable, self._overlapTable]