Coverage for python/lsst/daf/butler/registry/dimensions/table.py: 92%
181 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-04-04 02:05 -0700
« prev ^ index » next coverage.py v6.5.0, created at 2023-04-04 02:05 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ["TableDimensionRecordStorage"]
25import dataclasses
26import logging
27from collections.abc import Mapping, Sequence, Set
28from typing import Any
30import sqlalchemy
31from lsst.daf.relation import Join, Relation, sql
33from ...core import (
34 DatabaseDimensionElement,
35 DataCoordinate,
36 DimensionElement,
37 DimensionKeyColumnTag,
38 DimensionRecord,
39 GovernorDimension,
40 LogicalColumn,
41 NamedKeyMapping,
42 SkyPixDimension,
43 TimespanDatabaseRepresentation,
44 addDimensionForeignKey,
45 ddl,
46)
47from .. import queries
48from ..interfaces import (
49 Database,
50 DatabaseDimensionOverlapStorage,
51 DatabaseDimensionRecordStorage,
52 GovernorDimensionRecordStorage,
53 StaticTablesContext,
54)
56_LOG = logging.getLogger(__name__)
59MAX_FETCH_CHUNK = 1000
60"""Maximum number of data IDs we fetch records at a time.
62Barring something database-engine-specific, this sets the size of the actual
63SQL query, not just the number of result rows, because the only way to query
64for multiple data IDs in a single SELECT query via SQLAlchemy is to have an OR
65term in the WHERE clause for each one.
66"""
69class TableDimensionRecordStorage(DatabaseDimensionRecordStorage):
70 """A record storage implementation uses a regular database table.
72 Parameters
73 ----------
74 db : `Database`
75 Interface to the database engine and namespace that will hold these
76 dimension records.
77 element : `DatabaseDimensionElement`
78 The element whose records this storage will manage.
79 table : `sqlalchemy.schema.Table`
80 The logical table for the element.
81 skypix_overlap_tables : `_SkyPixOverlapTables`, optional
82 Object that manages the tables that hold materialized spatial overlap
83 joins to skypix dimensions. Should be `None` if (and only if)
84 ``element.spatial is None``.
85 """
87 def __init__(
88 self,
89 db: Database,
90 element: DatabaseDimensionElement,
91 *,
92 table: sqlalchemy.schema.Table,
93 skypix_overlap_tables: _SkyPixOverlapTables | None = None,
94 ):
95 self._db = db
96 self._table = table
97 self._element = element
98 self._fetchColumns: dict[str, sqlalchemy.sql.ColumnElement] = {
99 dimension.name: self._table.columns[name]
100 for dimension, name in zip(
101 self._element.dimensions, self._element.RecordClass.fields.dimensions.names
102 )
103 }
104 self._skypix_overlap_tables = skypix_overlap_tables
105 self._otherOverlaps: dict[str, DatabaseDimensionOverlapStorage] = {}
107 @classmethod
108 def initialize(
109 cls,
110 db: Database,
111 element: DatabaseDimensionElement,
112 *,
113 context: StaticTablesContext | None = None,
114 config: Mapping[str, Any],
115 governors: NamedKeyMapping[GovernorDimension, GovernorDimensionRecordStorage],
116 view_target: DatabaseDimensionRecordStorage | None = None,
117 ) -> DatabaseDimensionRecordStorage:
118 # Docstring inherited from DatabaseDimensionRecordStorage.
119 assert view_target is None, f"Storage for {element} is not a view."
120 spec = element.RecordClass.fields.makeTableSpec(TimespanReprClass=db.getTimespanRepresentation())
121 if context is not None: 121 ↛ 124line 121 didn't jump to line 124, because the condition on line 121 was never false
122 table = context.addTable(element.name, spec)
123 else:
124 table = db.ensureTableExists(element.name, spec)
125 if element.spatial is not None:
126 governor = governors[element.spatial.governor]
127 skypix_overlap_tables = _SkyPixOverlapTables.initialize(db, element, context=context)
128 result = cls(db, element, table=table, skypix_overlap_tables=skypix_overlap_tables)
129 governor.registerInsertionListener(result._on_governor_insert)
130 return result
131 else:
132 return cls(db, element, table=table)
134 @property
135 def element(self) -> DatabaseDimensionElement:
136 # Docstring inherited from DimensionRecordStorage.element.
137 return self._element
139 def clearCaches(self) -> None:
140 # Docstring inherited from DimensionRecordStorage.clearCaches.
141 pass
143 def make_relation(self, context: queries.SqlQueryContext) -> Relation:
144 # Docstring inherited from DimensionRecordStorage.
145 payload = self._build_sql_payload(self._table, context.column_types)
146 return context.sql_engine.make_leaf(
147 payload.columns_available.keys(),
148 name=self.element.name,
149 payload=payload,
150 )
152 def fetch_one(self, data_id: DataCoordinate, context: queries.SqlQueryContext) -> DimensionRecord | None:
153 # Docstring inherited from DimensionRecordStorage.
154 from .. import queries
156 relation = self.join(context.make_initial_relation(), Join(), context).with_rows_satisfying(
157 context.make_data_coordinate_predicate(data_id, full=False)
158 )[0:1]
159 rows = list(context.fetch_iterable(relation))
160 if not rows:
161 return None
162 reader = queries.DimensionRecordReader(self._element)
163 return reader.read(rows[0])
165 def insert(self, *records: DimensionRecord, replace: bool = False, skip_existing: bool = False) -> None:
166 # Docstring inherited from DimensionRecordStorage.insert.
167 elementRows = [record.toDict() for record in records]
168 if self.element.temporal is not None:
169 TimespanReprClass = self._db.getTimespanRepresentation()
170 for row in elementRows:
171 timespan = row.pop(TimespanDatabaseRepresentation.NAME)
172 TimespanReprClass.update(timespan, result=row)
173 with self._db.transaction():
174 if replace:
175 self._db.replace(self._table, *elementRows)
176 elif skip_existing:
177 self._db.ensure(self._table, *elementRows, primary_key_only=True)
178 else:
179 self._db.insert(self._table, *elementRows)
180 if self._skypix_overlap_tables is not None:
181 self._insert_skypix_overlaps(records, replace=replace, skip_existing=skip_existing)
183 def sync(self, record: DimensionRecord, update: bool = False) -> bool | dict[str, Any]:
184 # Docstring inherited from DimensionRecordStorage.sync.
185 compared = record.toDict()
186 keys = {}
187 for name in record.fields.required.names:
188 keys[name] = compared.pop(name)
189 if self.element.temporal is not None:
190 TimespanReprClass = self._db.getTimespanRepresentation()
191 timespan = compared.pop(TimespanDatabaseRepresentation.NAME)
192 TimespanReprClass.update(timespan, result=compared)
193 with self._db.transaction():
194 _, inserted_or_updated = self._db.sync(
195 self._table,
196 keys=keys,
197 compared=compared,
198 update=update,
199 )
200 if inserted_or_updated and self._skypix_overlap_tables is not None:
201 if inserted_or_updated is True:
202 # Inserted a new row, so we just need to insert new overlap
203 # rows.
204 self._insert_skypix_overlaps([record])
205 elif "region" in inserted_or_updated: 205 ↛ 193line 205 didn't jump to line 193
206 # Updated the region, so we need to delete old overlap rows
207 # and insert new ones.
208 self._insert_skypix_overlaps([record], replace=True)
209 # We updated something other than a region.
210 return inserted_or_updated
212 def digestTables(self) -> list[sqlalchemy.schema.Table]:
213 # Docstring inherited from DimensionRecordStorage.digestTables.
214 result = [self._table]
215 if self._skypix_overlap_tables is not None:
216 result.append(self._skypix_overlap_tables.summary)
217 result.append(self._skypix_overlap_tables.overlaps)
218 return result
220 def connect(self, overlaps: DatabaseDimensionOverlapStorage) -> None:
221 # Docstring inherited from DatabaseDimensionRecordStorage.
222 (other,) = set(overlaps.elements) - {self.element}
223 self._otherOverlaps[other.name] = overlaps
225 def make_spatial_join_relation(
226 self,
227 other: DimensionElement,
228 context: queries.SqlQueryContext,
229 governor_constraints: Mapping[str, Set[str]],
230 ) -> Relation | None:
231 # Docstring inherited from DatabaseDimensionRecordStorage.
232 match other:
233 case SkyPixDimension() as skypix:
234 return self._make_skypix_join_relation(skypix, context)
235 case DatabaseDimensionElement() as other: 235 ↛ 237line 235 didn't jump to line 237, because the pattern on line 235 always matched
236 return self._otherOverlaps[other.name].make_relation(context, governor_constraints)
237 case _:
238 raise TypeError(f"Unexpected dimension element type for spatial join: {other}.")
240 def _on_governor_insert(self, record: DimensionRecord) -> None:
241 """A `GovernorDimensionRecordStorage.registerInsertionListener`
242 callback for this element.
244 Parameters
245 ----------
246 record : `DimensionRecord`
247 Record for this element's governor dimension.
248 """
249 # We need to enable overlaps between this new governor dimension value
250 # and the common skypix dimension to record that we materialize
251 # overlaps for that combination. Foreign keys guarantee that there
252 # can't be any rows of this storage object's own element with that
253 # governor value yet, so we know there's nothing to insert into the
254 # overlaps table yet.
255 skypix = self.element.universe.commonSkyPix
256 assert self._element.spatial is not None, "Only called for spatial dimension elements."
257 assert (
258 self._skypix_overlap_tables is not None
259 ), "Spatial dimension elements always have skypix overlap tables."
260 governor = self._element.spatial.governor
261 self._db.sync(
262 self._skypix_overlap_tables.summary,
263 keys={
264 "skypix_system": skypix.system.name,
265 "skypix_level": skypix.level,
266 governor.name: record.dataId[governor.name],
267 },
268 )
270 def _insert_skypix_overlaps(
271 self, records: Sequence[DimensionRecord], replace: bool = False, skip_existing: bool = False
272 ) -> None:
273 """Compute and insert overlap rows between this dimesion element and
274 the common skypix system.
276 Parameters
277 ----------
278 records : `Sequence` [ `DimensionRecord` ]
279 Records for ``self.element`` that are being inserted.
280 replace : `bool`, optional
281 If `True`, the given records are being inserted in a mode that may
282 replace existing records, and hence overlap rows may need to be
283 replaced as well.
284 skip_existing : `bool`, optional
285 If `True`, the given records are being inserted in a mode that
286 ignored existing records with the same data ID, and hence overlap
287 rows need to be inserted this way as well.
288 """
289 assert self._element.spatial is not None, "Only called for spatial dimension elements."
290 assert (
291 self._skypix_overlap_tables is not None
292 ), "Spatial dimension elements always have skypix overlap tables."
293 # At present, only overlaps with the "commonSkyPix" system can be
294 # materialized, so we just compute and insert overlaps with those.
295 #
296 # To guard against this code being used with a data repository in which
297 # newer code has enabled other overlaps, we check afterwards that the
298 # summary table only contains commonSkyPix for all of these governor
299 # dimensions. In the future, we'll have to think about whether we need
300 # some table locking to guarantee consistency for those other overlaps
301 # if the summary table is updated at the same time as records are
302 # being inserted. This should happen within the same transaction
303 # (handled by the caller) so that previous inserts get rolled back.
304 skypix = self._element.universe.commonSkyPix
305 if replace:
306 # Since any of the new records might have replaced existing ones
307 # that already have overlap records, and we don't know which, we
308 # have no choice but to delete all overlaps for these records and
309 # recompute them.
310 # We include the skypix_system and skypix_level column values
311 # explicitly instead of just letting the query search for all
312 # of those related to the given records, because they are the
313 # first columns in the primary key, and hence searching with
314 # them will be way faster (and we don't want to add a new index
315 # just for this operation).
316 to_delete: list[dict[str, Any]] = [
317 {"skypix_system": skypix.system.name, "skypix_level": skypix.level, **record.dataId.byName()}
318 for record in records
319 ]
320 _LOG.debug("Deleting old common skypix overlaps for %s.", self.element.name)
321 self._db.delete(
322 self._skypix_overlap_tables.overlaps,
323 ["skypix_system", "skypix_level"] + list(self.element.graph.required.names),
324 *to_delete,
325 )
326 _LOG.debug("Precomputing common skypix overlaps for %s.", self.element.name)
327 overlap_records: list[dict[str, Any]] = []
328 for record in records:
329 if record.region is None:
330 continue
331 base_overlap_record = record.dataId.byName()
332 base_overlap_record["skypix_system"] = skypix.system.name
333 base_overlap_record["skypix_level"] = skypix.level
334 for begin, end in skypix.pixelization.envelope(record.region):
335 for index in range(begin, end):
336 overlap_records.append({"skypix_index": index, **base_overlap_record})
337 _LOG.debug("Inserting %d new skypix overlap rows for %s.", len(overlap_records), self.element.name)
338 if skip_existing:
339 self._db.ensure(self._skypix_overlap_tables.overlaps, *overlap_records, primary_key_only=True)
340 else:
341 self._db.insert(self._skypix_overlap_tables.overlaps, *overlap_records)
342 # Finally we check for non-commonSkyPix values in the summary table, as
343 # noted above.
344 summary = self._skypix_overlap_tables.summary
345 check_sql = (
346 sqlalchemy.sql.select(summary.columns.skypix_system, summary.columns.skypix_level)
347 .select_from(summary)
348 .where(
349 sqlalchemy.sql.not_(
350 sqlalchemy.sql.and_(
351 summary.columns.skypix_system == skypix.system.name,
352 summary.columns.skypix_level == skypix.level,
353 )
354 )
355 )
356 )
357 with self._db.query(check_sql) as sql_result:
358 bad_summary_rows = sql_result.fetchall()
359 if bad_summary_rows: 359 ↛ 360line 359 didn't jump to line 360, because the condition on line 359 was never true
360 bad_skypix_names = [f"{row.skypix_system}{row.skypix.level}" for row in bad_summary_rows]
361 raise RuntimeError(
362 f"Data repository has overlaps between {self._element} and {bad_skypix_names} that "
363 "are not supported by this version of daf_butler. Please use a newer version."
364 )
366 def _make_skypix_join_relation(
367 self,
368 skypix: SkyPixDimension,
369 context: queries.SqlQueryContext,
370 ) -> Relation | None:
371 """Construct a subquery expression containing overlaps between the
372 given skypix dimension and governor values.
374 Parameters
375 ----------
376 skypix : `SkyPixDimension`
377 The skypix dimension (system and level) for which overlaps should
378 be materialized.
379 context : `.queries.SqlQueryContext`
380 Object that manages relation engines and database-side state
381 (e.g. temporary tables) for the query.
383 Returns
384 -------
385 relation : `sql.Relation` or `None`
386 Join relation, or `None` if overlaps are not materialized for this
387 combination of dimensions.
388 """
389 assert self._element.spatial is not None, "Only called for spatial dimension elements."
390 assert (
391 self._skypix_overlap_tables is not None
392 ), "Spatial dimension elements always have skypix overlap tables."
393 if skypix != self._element.universe.commonSkyPix:
394 return None
395 table = self._skypix_overlap_tables.overlaps
396 payload = sql.Payload[LogicalColumn](table)
397 payload.columns_available[
398 DimensionKeyColumnTag(skypix.name)
399 ] = payload.from_clause.columns.skypix_index
400 for dimension_name in self.element.graph.required.names:
401 payload.columns_available[DimensionKeyColumnTag(dimension_name)] = payload.from_clause.columns[
402 dimension_name
403 ]
404 payload.where.append(table.columns.skypix_system == skypix.system.name)
405 payload.where.append(table.columns.skypix_level == skypix.level)
406 leaf = context.sql_engine.make_leaf(
407 payload.columns_available.keys(),
408 name=f"{self.element.name}_{skypix.name}_overlap",
409 payload=payload,
410 )
411 return leaf
414@dataclasses.dataclass
415class _SkyPixOverlapTables:
416 """A helper object for `TableDimensionRecordStorage` that manages the
417 tables for materialized overlaps with skypix dimensions.
419 New instances should be constructed by calling `initialize`, not by calling
420 the dataclass-provided constructor directly.
422 Notes
423 -----
424 This class (and the related methods in TableDimensionRecordStorage) can in
425 principle manage overlaps between a database dimension element and any
426 skypix dimension, but at present it is only being used to manage
427 relationships with the special ``commonSkyPix`` dimension, because that's
428 all the query system uses. Eventually, we expect to require users to
429 explicitly materialize more relationships.
431 Other possible future improvements include:
433 - allowing finer-grained skypix dimensions to provide overlap rows for
434 coarser ones, by dividing indices by powers of 4 (and possibly doing
435 ``SELECT DISTINCT`` in the subquery to remove duplicates);
437 - allowing finer-grained database elements (e.g. patch) to provide overlap
438 rows for coarser ones (e.g. tract), by ignoring irrelevant columns (e.g.
439 the patch IDs) in the subquery (again, possible with ``SELECT
440 DISTINCT``).
442 But there's no point to doing any of that until the query system can figure
443 out how best to ask for overlap rows when an exact match isn't available.
444 """
446 summary: sqlalchemy.schema.Table
447 """Table that records which governor value / skypix combinations have
448 materialized overlaps.
449 """
451 overlaps: sqlalchemy.schema.Table
452 """Table that actually holds overlap rows.
453 """
455 @classmethod
456 def initialize(
457 cls,
458 db: Database,
459 element: DatabaseDimensionElement,
460 *,
461 context: StaticTablesContext | None,
462 ) -> _SkyPixOverlapTables:
463 """Construct a new instance, creating tables as needed.
465 Parameters
466 ----------
467 db : `Database`
468 Interface to the underlying database engine and namespace.
469 element : `DatabaseDimensionElement`
470 Dimension element whose overlaps are to be managed.
471 context : `StaticTablesContext`, optional
472 If provided, an object to use to create any new tables. If not
473 provided, ``db.ensureTableExists`` should be used instead.
474 """
475 if context is not None: 475 ↛ 478line 475 didn't jump to line 478, because the condition on line 475 was never false
476 op = context.addTable
477 else:
478 op = db.ensureTableExists
479 summary = op(
480 cls._SUMMARY_TABLE_NAME_SPEC.format(element=element),
481 cls._makeSummaryTableSpec(element),
482 )
483 overlaps = op(
484 cls._OVERLAP_TABLE_NAME_SPEC.format(element=element),
485 cls._makeOverlapTableSpec(element),
486 )
487 return cls(summary=summary, overlaps=overlaps)
489 _SUMMARY_TABLE_NAME_SPEC = "{element.name}_skypix_overlap_summary"
491 @classmethod
492 def _makeSummaryTableSpec(cls, element: DatabaseDimensionElement) -> ddl.TableSpec:
493 """Create a specification for the table that records which combinations
494 of skypix dimension and governor value have materialized overlaps.
496 Parameters
497 ----------
498 element : `DatabaseDimensionElement`
499 Dimension element whose overlaps are to be managed.
501 Returns
502 -------
503 tableSpec : `ddl.TableSpec`
504 Table specification.
505 """
506 assert element.spatial is not None
507 tableSpec = ddl.TableSpec(
508 fields=[
509 ddl.FieldSpec(
510 name="skypix_system",
511 dtype=sqlalchemy.String,
512 length=16,
513 nullable=False,
514 primaryKey=True,
515 ),
516 ddl.FieldSpec(
517 name="skypix_level",
518 dtype=sqlalchemy.SmallInteger,
519 nullable=False,
520 primaryKey=True,
521 ),
522 ]
523 )
524 addDimensionForeignKey(tableSpec, element.spatial.governor, primaryKey=True)
525 return tableSpec
527 _OVERLAP_TABLE_NAME_SPEC = "{element.name}_skypix_overlap"
529 @classmethod
530 def _makeOverlapTableSpec(cls, element: DatabaseDimensionElement) -> ddl.TableSpec:
531 """Create a specification for the table that holds materialized
532 overlap rows.
534 Parameters
535 ----------
536 element : `DatabaseDimensionElement`
537 Dimension element whose overlaps are to be managed.
539 Returns
540 -------
541 tableSpec : `ddl.TableSpec`
542 Table specification.
543 """
544 assert element.spatial is not None
545 tableSpec = ddl.TableSpec(
546 fields=[
547 ddl.FieldSpec(
548 name="skypix_system",
549 dtype=sqlalchemy.String,
550 length=16,
551 nullable=False,
552 primaryKey=True,
553 ),
554 ddl.FieldSpec(
555 name="skypix_level",
556 dtype=sqlalchemy.SmallInteger,
557 nullable=False,
558 primaryKey=True,
559 ),
560 # (more columns added below)
561 ],
562 unique=set(),
563 indexes={
564 # This index has the same fields as the PK, in a different
565 # order, to facilitate queries that know skypix_index and want
566 # to find the other element.
567 ddl.IndexSpec(
568 "skypix_system",
569 "skypix_level",
570 "skypix_index",
571 *element.graph.required.names,
572 ),
573 },
574 foreignKeys=[
575 # Foreign key to summary table. This makes sure we don't
576 # materialize any overlaps without remembering that we've done
577 # so in the summary table, though it can't prevent the converse
578 # of adding a summary row without adding overlap row (either of
579 # those is a logic bug, of course, but we want to be defensive
580 # about those). Using ON DELETE CASCADE, it'd be very easy to
581 # implement "disabling" an overlap materialization, because we
582 # can just delete the summary row.
583 # Note that the governor dimension column is added below, in
584 # the call to addDimensionForeignKey.
585 ddl.ForeignKeySpec(
586 cls._SUMMARY_TABLE_NAME_SPEC.format(element=element),
587 source=("skypix_system", "skypix_level", element.spatial.governor.name),
588 target=("skypix_system", "skypix_level", element.spatial.governor.name),
589 onDelete="CASCADE",
590 ),
591 ],
592 )
593 # Add fields for the standard element this class manages overlaps for.
594 # This is guaranteed to add a column for the governor dimension,
595 # because that's a required dependency of element.
596 for dimension in element.required:
597 addDimensionForeignKey(tableSpec, dimension, primaryKey=True)
598 # Add field for the actual skypix index. We do this later because I
599 # think we care (at least a bit) about the order in which the primary
600 # key is defined, in that we want a non-summary column like this one
601 # to appear after the governor dimension column.
602 tableSpec.fields.add(
603 ddl.FieldSpec(
604 name="skypix_index",
605 dtype=sqlalchemy.BigInteger,
606 nullable=False,
607 primaryKey=True,
608 )
609 )
610 return tableSpec