Coverage for python/lsst/daf/butler/registry/dimensions/table.py: 92%
186 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-27 09:43 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-27 09:43 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
27from __future__ import annotations
29from ... import ddl
31__all__ = ["TableDimensionRecordStorage"]
33import dataclasses
34import logging
35from collections.abc import Mapping, Sequence, Set
36from typing import Any
38import sqlalchemy
39from lsst.daf.relation import Join, Relation, sql
41from ..._column_tags import DimensionKeyColumnTag
42from ..._column_type_info import LogicalColumn
43from ..._named import NamedKeyMapping
44from ..._timespan import TimespanDatabaseRepresentation
45from ...dimensions import (
46 DatabaseDimensionElement,
47 DataCoordinate,
48 DimensionElement,
49 DimensionRecord,
50 GovernorDimension,
51 SkyPixDimension,
52 addDimensionForeignKey,
53)
54from .. import queries
55from ..interfaces import (
56 Database,
57 DatabaseDimensionOverlapStorage,
58 DatabaseDimensionRecordStorage,
59 GovernorDimensionRecordStorage,
60 StaticTablesContext,
61)
63_LOG = logging.getLogger(__name__)
66MAX_FETCH_CHUNK = 1000
67"""Maximum number of data IDs we fetch records at a time.
69Barring something database-engine-specific, this sets the size of the actual
70SQL query, not just the number of result rows, because the only way to query
71for multiple data IDs in a single SELECT query via SQLAlchemy is to have an OR
72term in the WHERE clause for each one.
73"""
76class TableDimensionRecordStorage(DatabaseDimensionRecordStorage):
77 """A record storage implementation uses a regular database table.
79 Parameters
80 ----------
81 db : `Database`
82 Interface to the database engine and namespace that will hold these
83 dimension records.
84 element : `DatabaseDimensionElement`
85 The element whose records this storage will manage.
86 table : `sqlalchemy.schema.Table`
87 The logical table for the element.
88 skypix_overlap_tables : `_SkyPixOverlapTables`, optional
89 Object that manages the tables that hold materialized spatial overlap
90 joins to skypix dimensions. Should be `None` if (and only if)
91 ``element.spatial is None``.
92 """
94 def __init__(
95 self,
96 db: Database,
97 element: DatabaseDimensionElement,
98 *,
99 table: sqlalchemy.schema.Table,
100 skypix_overlap_tables: _SkyPixOverlapTables | None = None,
101 ):
102 self._db = db
103 self._table = table
104 self._element = element
105 self._fetchColumns: dict[str, sqlalchemy.sql.ColumnElement] = {
106 dimension.name: self._table.columns[name]
107 for dimension, name in zip(
108 self._element.dimensions, self._element.RecordClass.fields.dimensions.names, strict=True
109 )
110 }
111 self._skypix_overlap_tables = skypix_overlap_tables
112 self._otherOverlaps: dict[str, DatabaseDimensionOverlapStorage] = {}
114 @classmethod
115 def initialize(
116 cls,
117 db: Database,
118 element: DatabaseDimensionElement,
119 *,
120 context: StaticTablesContext | None = None,
121 config: Mapping[str, Any],
122 governors: NamedKeyMapping[GovernorDimension, GovernorDimensionRecordStorage],
123 view_target: DatabaseDimensionRecordStorage | None = None,
124 ) -> DatabaseDimensionRecordStorage:
125 # Docstring inherited from DatabaseDimensionRecordStorage.
126 assert view_target is None, f"Storage for {element} is not a view."
127 spec = element.RecordClass.fields.makeTableSpec(TimespanReprClass=db.getTimespanRepresentation())
128 if context is not None: 128 ↛ 131line 128 didn't jump to line 131, because the condition on line 128 was never false
129 table = context.addTable(element.name, spec)
130 else:
131 table = db.ensureTableExists(element.name, spec)
132 if element.spatial is not None:
133 governor = governors[element.spatial.governor]
134 skypix_overlap_tables = _SkyPixOverlapTables.initialize(db, element, context=context)
135 result = cls(db, element, table=table, skypix_overlap_tables=skypix_overlap_tables)
136 governor.registerInsertionListener(result._on_governor_insert)
137 return result
138 else:
139 return cls(db, element, table=table)
141 @property
142 def element(self) -> DatabaseDimensionElement:
143 # Docstring inherited from DimensionRecordStorage.element.
144 return self._element
146 def clearCaches(self) -> None:
147 # Docstring inherited from DimensionRecordStorage.clearCaches.
148 pass
150 def make_relation(self, context: queries.SqlQueryContext) -> Relation:
151 # Docstring inherited from DimensionRecordStorage.
152 payload = self._build_sql_payload(self._table, context.column_types)
153 return context.sql_engine.make_leaf(
154 payload.columns_available.keys(),
155 name=self.element.name,
156 payload=payload,
157 )
159 def fetch_one(self, data_id: DataCoordinate, context: queries.SqlQueryContext) -> DimensionRecord | None:
160 # Docstring inherited from DimensionRecordStorage.
161 from .. import queries
163 relation = self.join(context.make_initial_relation(), Join(), context).with_rows_satisfying(
164 context.make_data_coordinate_predicate(data_id, full=False)
165 )[0:1]
166 rows = list(context.fetch_iterable(relation))
167 if not rows:
168 return None
169 reader = queries.DimensionRecordReader(self._element)
170 return reader.read(rows[0])
172 def insert(self, *records: DimensionRecord, replace: bool = False, skip_existing: bool = False) -> None:
173 # Docstring inherited from DimensionRecordStorage.insert.
174 elementRows = [record.toDict() for record in records]
175 if self.element.temporal is not None:
176 TimespanReprClass = self._db.getTimespanRepresentation()
177 for row in elementRows:
178 timespan = row.pop(TimespanDatabaseRepresentation.NAME)
179 TimespanReprClass.update(timespan, result=row)
180 with self._db.transaction():
181 if replace:
182 self._db.replace(self._table, *elementRows)
183 elif skip_existing:
184 self._db.ensure(self._table, *elementRows, primary_key_only=True)
185 else:
186 self._db.insert(self._table, *elementRows)
187 if self._skypix_overlap_tables is not None:
188 self._insert_skypix_overlaps(records, replace=replace, skip_existing=skip_existing)
190 def sync(self, record: DimensionRecord, update: bool = False) -> bool | dict[str, Any]:
191 # Docstring inherited from DimensionRecordStorage.sync.
192 compared = record.toDict()
193 keys = {}
194 for name in record.fields.required.names:
195 keys[name] = compared.pop(name)
196 if self.element.temporal is not None:
197 TimespanReprClass = self._db.getTimespanRepresentation()
198 timespan = compared.pop(TimespanDatabaseRepresentation.NAME)
199 TimespanReprClass.update(timespan, result=compared)
200 with self._db.transaction():
201 _, inserted_or_updated = self._db.sync(
202 self._table,
203 keys=keys,
204 compared=compared,
205 update=update,
206 )
207 if inserted_or_updated and self._skypix_overlap_tables is not None:
208 if inserted_or_updated is True:
209 # Inserted a new row, so we just need to insert new overlap
210 # rows.
211 self._insert_skypix_overlaps([record])
212 elif "region" in inserted_or_updated: 212 ↛ 200line 212 didn't jump to line 200
213 # Updated the region, so we need to delete old overlap rows
214 # and insert new ones.
215 self._insert_skypix_overlaps([record], replace=True)
216 # We updated something other than a region.
217 return inserted_or_updated
219 def digestTables(self) -> list[sqlalchemy.schema.Table]:
220 # Docstring inherited from DimensionRecordStorage.digestTables.
221 result = [self._table]
222 if self._skypix_overlap_tables is not None:
223 result.append(self._skypix_overlap_tables.summary)
224 result.append(self._skypix_overlap_tables.overlaps)
225 return result
227 def connect(self, overlaps: DatabaseDimensionOverlapStorage) -> None:
228 # Docstring inherited from DatabaseDimensionRecordStorage.
229 (other,) = set(overlaps.elements) - {self.element}
230 self._otherOverlaps[other.name] = overlaps
232 def make_spatial_join_relation(
233 self,
234 other: DimensionElement,
235 context: queries.SqlQueryContext,
236 governor_constraints: Mapping[str, Set[str]],
237 ) -> Relation | None:
238 # Docstring inherited from DatabaseDimensionRecordStorage.
239 match other:
240 case SkyPixDimension() as skypix:
241 return self._make_skypix_join_relation(skypix, context)
242 case DatabaseDimensionElement() as other: 242 ↛ 244line 242 didn't jump to line 244, because the pattern on line 242 always matched
243 return self._otherOverlaps[other.name].make_relation(context, governor_constraints)
244 case _:
245 raise TypeError(f"Unexpected dimension element type for spatial join: {other}.")
247 def _on_governor_insert(self, record: DimensionRecord) -> None:
248 """`GovernorDimensionRecordStorage.registerInsertionListener`
249 callback for this element.
251 Parameters
252 ----------
253 record : `DimensionRecord`
254 Record for this element's governor dimension.
255 """
256 # We need to enable overlaps between this new governor dimension value
257 # and the common skypix dimension to record that we materialize
258 # overlaps for that combination. Foreign keys guarantee that there
259 # can't be any rows of this storage object's own element with that
260 # governor value yet, so we know there's nothing to insert into the
261 # overlaps table yet.
262 skypix = self.element.universe.commonSkyPix
263 assert self._element.spatial is not None, "Only called for spatial dimension elements."
264 assert (
265 self._skypix_overlap_tables is not None
266 ), "Spatial dimension elements always have skypix overlap tables."
267 governor = self._element.spatial.governor
268 self._db.sync(
269 self._skypix_overlap_tables.summary,
270 keys={
271 "skypix_system": skypix.system.name,
272 "skypix_level": skypix.level,
273 governor.name: record.dataId[governor.name],
274 },
275 )
277 def _insert_skypix_overlaps(
278 self, records: Sequence[DimensionRecord], replace: bool = False, skip_existing: bool = False
279 ) -> None:
280 """Compute and insert overlap rows between this dimesion element and
281 the common skypix system.
283 Parameters
284 ----------
285 records : `~collections.abc.Sequence` [ `DimensionRecord` ]
286 Records for ``self.element`` that are being inserted.
287 replace : `bool`, optional
288 If `True`, the given records are being inserted in a mode that may
289 replace existing records, and hence overlap rows may need to be
290 replaced as well.
291 skip_existing : `bool`, optional
292 If `True`, the given records are being inserted in a mode that
293 ignored existing records with the same data ID, and hence overlap
294 rows need to be inserted this way as well.
295 """
296 assert self._element.spatial is not None, "Only called for spatial dimension elements."
297 assert (
298 self._skypix_overlap_tables is not None
299 ), "Spatial dimension elements always have skypix overlap tables."
300 # At present, only overlaps with the "commonSkyPix" system can be
301 # materialized, so we just compute and insert overlaps with those.
302 #
303 # To guard against this code being used with a data repository in which
304 # newer code has enabled other overlaps, we check afterwards that the
305 # summary table only contains commonSkyPix for all of these governor
306 # dimensions. In the future, we'll have to think about whether we need
307 # some table locking to guarantee consistency for those other overlaps
308 # if the summary table is updated at the same time as records are
309 # being inserted. This should happen within the same transaction
310 # (handled by the caller) so that previous inserts get rolled back.
311 skypix = self._element.universe.commonSkyPix
312 if replace:
313 # Since any of the new records might have replaced existing ones
314 # that already have overlap records, and we don't know which, we
315 # have no choice but to delete all overlaps for these records and
316 # recompute them.
317 # We include the skypix_system and skypix_level column values
318 # explicitly instead of just letting the query search for all
319 # of those related to the given records, because they are the
320 # first columns in the primary key, and hence searching with
321 # them will be way faster (and we don't want to add a new index
322 # just for this operation).
323 to_delete: list[dict[str, Any]] = [
324 {"skypix_system": skypix.system.name, "skypix_level": skypix.level, **record.dataId.byName()}
325 for record in records
326 ]
327 _LOG.debug("Deleting old common skypix overlaps for %s.", self.element.name)
328 self._db.delete(
329 self._skypix_overlap_tables.overlaps,
330 ["skypix_system", "skypix_level"] + list(self.element.graph.required.names),
331 *to_delete,
332 )
333 _LOG.debug("Precomputing common skypix overlaps for %s.", self.element.name)
334 overlap_records: list[dict[str, Any]] = []
335 for record in records:
336 if record.region is None:
337 continue
338 base_overlap_record = record.dataId.byName()
339 base_overlap_record["skypix_system"] = skypix.system.name
340 base_overlap_record["skypix_level"] = skypix.level
341 for begin, end in skypix.pixelization.envelope(record.region):
342 for index in range(begin, end):
343 overlap_records.append({"skypix_index": index, **base_overlap_record})
344 _LOG.debug("Inserting %d new skypix overlap rows for %s.", len(overlap_records), self.element.name)
345 if skip_existing:
346 self._db.ensure(self._skypix_overlap_tables.overlaps, *overlap_records, primary_key_only=True)
347 else:
348 self._db.insert(self._skypix_overlap_tables.overlaps, *overlap_records)
349 # Finally we check for non-commonSkyPix values in the summary table, as
350 # noted above.
351 summary = self._skypix_overlap_tables.summary
352 check_sql = (
353 sqlalchemy.sql.select(summary.columns.skypix_system, summary.columns.skypix_level)
354 .select_from(summary)
355 .where(
356 sqlalchemy.sql.not_(
357 sqlalchemy.sql.and_(
358 summary.columns.skypix_system == skypix.system.name,
359 summary.columns.skypix_level == skypix.level,
360 )
361 )
362 )
363 )
364 with self._db.query(check_sql) as sql_result:
365 bad_summary_rows = sql_result.fetchall()
366 if bad_summary_rows: 366 ↛ 367line 366 didn't jump to line 367, because the condition on line 366 was never true
367 bad_skypix_names = [f"{row.skypix_system}{row.skypix.level}" for row in bad_summary_rows]
368 raise RuntimeError(
369 f"Data repository has overlaps between {self._element} and {bad_skypix_names} that "
370 "are not supported by this version of daf_butler. Please use a newer version."
371 )
373 def _make_skypix_join_relation(
374 self,
375 skypix: SkyPixDimension,
376 context: queries.SqlQueryContext,
377 ) -> Relation | None:
378 """Construct a subquery expression containing overlaps between the
379 given skypix dimension and governor values.
381 Parameters
382 ----------
383 skypix : `SkyPixDimension`
384 The skypix dimension (system and level) for which overlaps should
385 be materialized.
386 context : `.queries.SqlQueryContext`
387 Object that manages relation engines and database-side state
388 (e.g. temporary tables) for the query.
390 Returns
391 -------
392 relation : `sql.Relation` or `None`
393 Join relation, or `None` if overlaps are not materialized for this
394 combination of dimensions.
395 """
396 assert self._element.spatial is not None, "Only called for spatial dimension elements."
397 assert (
398 self._skypix_overlap_tables is not None
399 ), "Spatial dimension elements always have skypix overlap tables."
400 if skypix != self._element.universe.commonSkyPix:
401 return None
402 table = self._skypix_overlap_tables.overlaps
403 payload = sql.Payload[LogicalColumn](table)
404 payload.columns_available[
405 DimensionKeyColumnTag(skypix.name)
406 ] = payload.from_clause.columns.skypix_index
407 for dimension_name in self.element.graph.required.names:
408 payload.columns_available[DimensionKeyColumnTag(dimension_name)] = payload.from_clause.columns[
409 dimension_name
410 ]
411 payload.where.append(table.columns.skypix_system == skypix.system.name)
412 payload.where.append(table.columns.skypix_level == skypix.level)
413 leaf = context.sql_engine.make_leaf(
414 payload.columns_available.keys(),
415 name=f"{self.element.name}_{skypix.name}_overlap",
416 payload=payload,
417 )
418 return leaf
421@dataclasses.dataclass
422class _SkyPixOverlapTables:
423 """A helper object for `TableDimensionRecordStorage` that manages the
424 tables for materialized overlaps with skypix dimensions.
426 New instances should be constructed by calling `initialize`, not by calling
427 the dataclass-provided constructor directly.
429 Notes
430 -----
431 This class (and the related methods in TableDimensionRecordStorage) can in
432 principle manage overlaps between a database dimension element and any
433 skypix dimension, but at present it is only being used to manage
434 relationships with the special ``commonSkyPix`` dimension, because that's
435 all the query system uses. Eventually, we expect to require users to
436 explicitly materialize more relationships.
438 Other possible future improvements include:
440 - allowing finer-grained skypix dimensions to provide overlap rows for
441 coarser ones, by dividing indices by powers of 4 (and possibly doing
442 ``SELECT DISTINCT`` in the subquery to remove duplicates);
444 - allowing finer-grained database elements (e.g. patch) to provide overlap
445 rows for coarser ones (e.g. tract), by ignoring irrelevant columns (e.g.
446 the patch IDs) in the subquery (again, possible with ``SELECT
447 DISTINCT``).
449 But there's no point to doing any of that until the query system can figure
450 out how best to ask for overlap rows when an exact match isn't available.
451 """
453 summary: sqlalchemy.schema.Table
454 """Table that records which governor value / skypix combinations have
455 materialized overlaps.
456 """
458 overlaps: sqlalchemy.schema.Table
459 """Table that actually holds overlap rows.
460 """
462 @classmethod
463 def initialize(
464 cls,
465 db: Database,
466 element: DatabaseDimensionElement,
467 *,
468 context: StaticTablesContext | None,
469 ) -> _SkyPixOverlapTables:
470 """Construct a new instance, creating tables as needed.
472 Parameters
473 ----------
474 db : `Database`
475 Interface to the underlying database engine and namespace.
476 element : `DatabaseDimensionElement`
477 Dimension element whose overlaps are to be managed.
478 context : `StaticTablesContext`, optional
479 If provided, an object to use to create any new tables. If not
480 provided, ``db.ensureTableExists`` should be used instead.
481 """
482 if context is not None: 482 ↛ 485line 482 didn't jump to line 485, because the condition on line 482 was never false
483 op = context.addTable
484 else:
485 op = db.ensureTableExists
486 summary = op(
487 cls._SUMMARY_TABLE_NAME_SPEC.format(element=element),
488 cls._makeSummaryTableSpec(element),
489 )
490 overlaps = op(
491 cls._OVERLAP_TABLE_NAME_SPEC.format(element=element),
492 cls._makeOverlapTableSpec(element),
493 )
494 return cls(summary=summary, overlaps=overlaps)
496 _SUMMARY_TABLE_NAME_SPEC = "{element.name}_skypix_overlap_summary"
498 @classmethod
499 def _makeSummaryTableSpec(cls, element: DatabaseDimensionElement) -> ddl.TableSpec:
500 """Create a specification for the table that records which combinations
501 of skypix dimension and governor value have materialized overlaps.
503 Parameters
504 ----------
505 element : `DatabaseDimensionElement`
506 Dimension element whose overlaps are to be managed.
508 Returns
509 -------
510 tableSpec : `ddl.TableSpec`
511 Table specification.
512 """
513 assert element.spatial is not None
514 tableSpec = ddl.TableSpec(
515 fields=[
516 ddl.FieldSpec(
517 name="skypix_system",
518 dtype=sqlalchemy.String,
519 length=16,
520 nullable=False,
521 primaryKey=True,
522 ),
523 ddl.FieldSpec(
524 name="skypix_level",
525 dtype=sqlalchemy.SmallInteger,
526 nullable=False,
527 primaryKey=True,
528 ),
529 ]
530 )
531 addDimensionForeignKey(tableSpec, element.spatial.governor, primaryKey=True)
532 return tableSpec
534 _OVERLAP_TABLE_NAME_SPEC = "{element.name}_skypix_overlap"
536 @classmethod
537 def _makeOverlapTableSpec(cls, element: DatabaseDimensionElement) -> ddl.TableSpec:
538 """Create a specification for the table that holds materialized
539 overlap rows.
541 Parameters
542 ----------
543 element : `DatabaseDimensionElement`
544 Dimension element whose overlaps are to be managed.
546 Returns
547 -------
548 tableSpec : `ddl.TableSpec`
549 Table specification.
550 """
551 assert element.spatial is not None
552 tableSpec = ddl.TableSpec(
553 fields=[
554 ddl.FieldSpec(
555 name="skypix_system",
556 dtype=sqlalchemy.String,
557 length=16,
558 nullable=False,
559 primaryKey=True,
560 ),
561 ddl.FieldSpec(
562 name="skypix_level",
563 dtype=sqlalchemy.SmallInteger,
564 nullable=False,
565 primaryKey=True,
566 ),
567 # (more columns added below)
568 ],
569 unique=set(),
570 indexes={
571 # This index has the same fields as the PK, in a different
572 # order, to facilitate queries that know skypix_index and want
573 # to find the other element.
574 ddl.IndexSpec(
575 "skypix_system",
576 "skypix_level",
577 "skypix_index",
578 *element.graph.required.names,
579 ),
580 },
581 foreignKeys=[
582 # Foreign key to summary table. This makes sure we don't
583 # materialize any overlaps without remembering that we've done
584 # so in the summary table, though it can't prevent the converse
585 # of adding a summary row without adding overlap row (either of
586 # those is a logic bug, of course, but we want to be defensive
587 # about those). Using ON DELETE CASCADE, it'd be very easy to
588 # implement "disabling" an overlap materialization, because we
589 # can just delete the summary row.
590 # Note that the governor dimension column is added below, in
591 # the call to addDimensionForeignKey.
592 ddl.ForeignKeySpec(
593 cls._SUMMARY_TABLE_NAME_SPEC.format(element=element),
594 source=("skypix_system", "skypix_level", element.spatial.governor.name),
595 target=("skypix_system", "skypix_level", element.spatial.governor.name),
596 onDelete="CASCADE",
597 ),
598 ],
599 )
600 # Add fields for the standard element this class manages overlaps for.
601 # This is guaranteed to add a column for the governor dimension,
602 # because that's a required dependency of element.
603 for dimension in element.required:
604 addDimensionForeignKey(tableSpec, dimension, primaryKey=True)
605 # Add field for the actual skypix index. We do this later because I
606 # think we care (at least a bit) about the order in which the primary
607 # key is defined, in that we want a non-summary column like this one
608 # to appear after the governor dimension column.
609 tableSpec.fields.add(
610 ddl.FieldSpec(
611 name="skypix_index",
612 dtype=sqlalchemy.BigInteger,
613 nullable=False,
614 primaryKey=True,
615 )
616 )
617 return tableSpec