Coverage for python/lsst/daf/butler/registry/dimensions/table.py: 92%
181 statements
« prev ^ index » next coverage.py v7.3.1, created at 2023-10-02 07:59 +0000
« prev ^ index » next coverage.py v7.3.1, created at 2023-10-02 07:59 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
27from __future__ import annotations
29__all__ = ["TableDimensionRecordStorage"]
31import dataclasses
32import logging
33from collections.abc import Mapping, Sequence, Set
34from typing import Any
36import sqlalchemy
37from lsst.daf.relation import Join, Relation, sql
39from ...core import (
40 DatabaseDimensionElement,
41 DataCoordinate,
42 DimensionElement,
43 DimensionKeyColumnTag,
44 DimensionRecord,
45 GovernorDimension,
46 LogicalColumn,
47 NamedKeyMapping,
48 SkyPixDimension,
49 TimespanDatabaseRepresentation,
50 addDimensionForeignKey,
51 ddl,
52)
53from .. import queries
54from ..interfaces import (
55 Database,
56 DatabaseDimensionOverlapStorage,
57 DatabaseDimensionRecordStorage,
58 GovernorDimensionRecordStorage,
59 StaticTablesContext,
60)
62_LOG = logging.getLogger(__name__)
65MAX_FETCH_CHUNK = 1000
66"""Maximum number of data IDs we fetch records at a time.
68Barring something database-engine-specific, this sets the size of the actual
69SQL query, not just the number of result rows, because the only way to query
70for multiple data IDs in a single SELECT query via SQLAlchemy is to have an OR
71term in the WHERE clause for each one.
72"""
75class TableDimensionRecordStorage(DatabaseDimensionRecordStorage):
76 """A record storage implementation uses a regular database table.
78 Parameters
79 ----------
80 db : `Database`
81 Interface to the database engine and namespace that will hold these
82 dimension records.
83 element : `DatabaseDimensionElement`
84 The element whose records this storage will manage.
85 table : `sqlalchemy.schema.Table`
86 The logical table for the element.
87 skypix_overlap_tables : `_SkyPixOverlapTables`, optional
88 Object that manages the tables that hold materialized spatial overlap
89 joins to skypix dimensions. Should be `None` if (and only if)
90 ``element.spatial is None``.
91 """
93 def __init__(
94 self,
95 db: Database,
96 element: DatabaseDimensionElement,
97 *,
98 table: sqlalchemy.schema.Table,
99 skypix_overlap_tables: _SkyPixOverlapTables | None = None,
100 ):
101 self._db = db
102 self._table = table
103 self._element = element
104 self._fetchColumns: dict[str, sqlalchemy.sql.ColumnElement] = {
105 dimension.name: self._table.columns[name]
106 for dimension, name in zip(
107 self._element.dimensions, self._element.RecordClass.fields.dimensions.names, strict=True
108 )
109 }
110 self._skypix_overlap_tables = skypix_overlap_tables
111 self._otherOverlaps: dict[str, DatabaseDimensionOverlapStorage] = {}
113 @classmethod
114 def initialize(
115 cls,
116 db: Database,
117 element: DatabaseDimensionElement,
118 *,
119 context: StaticTablesContext | None = None,
120 config: Mapping[str, Any],
121 governors: NamedKeyMapping[GovernorDimension, GovernorDimensionRecordStorage],
122 view_target: DatabaseDimensionRecordStorage | None = None,
123 ) -> DatabaseDimensionRecordStorage:
124 # Docstring inherited from DatabaseDimensionRecordStorage.
125 assert view_target is None, f"Storage for {element} is not a view."
126 spec = element.RecordClass.fields.makeTableSpec(TimespanReprClass=db.getTimespanRepresentation())
127 if context is not None: 127 ↛ 130line 127 didn't jump to line 130, because the condition on line 127 was never false
128 table = context.addTable(element.name, spec)
129 else:
130 table = db.ensureTableExists(element.name, spec)
131 if element.spatial is not None:
132 governor = governors[element.spatial.governor]
133 skypix_overlap_tables = _SkyPixOverlapTables.initialize(db, element, context=context)
134 result = cls(db, element, table=table, skypix_overlap_tables=skypix_overlap_tables)
135 governor.registerInsertionListener(result._on_governor_insert)
136 return result
137 else:
138 return cls(db, element, table=table)
140 @property
141 def element(self) -> DatabaseDimensionElement:
142 # Docstring inherited from DimensionRecordStorage.element.
143 return self._element
145 def clearCaches(self) -> None:
146 # Docstring inherited from DimensionRecordStorage.clearCaches.
147 pass
149 def make_relation(self, context: queries.SqlQueryContext) -> Relation:
150 # Docstring inherited from DimensionRecordStorage.
151 payload = self._build_sql_payload(self._table, context.column_types)
152 return context.sql_engine.make_leaf(
153 payload.columns_available.keys(),
154 name=self.element.name,
155 payload=payload,
156 )
158 def fetch_one(self, data_id: DataCoordinate, context: queries.SqlQueryContext) -> DimensionRecord | None:
159 # Docstring inherited from DimensionRecordStorage.
160 from .. import queries
162 relation = self.join(context.make_initial_relation(), Join(), context).with_rows_satisfying(
163 context.make_data_coordinate_predicate(data_id, full=False)
164 )[0:1]
165 rows = list(context.fetch_iterable(relation))
166 if not rows:
167 return None
168 reader = queries.DimensionRecordReader(self._element)
169 return reader.read(rows[0])
171 def insert(self, *records: DimensionRecord, replace: bool = False, skip_existing: bool = False) -> None:
172 # Docstring inherited from DimensionRecordStorage.insert.
173 elementRows = [record.toDict() for record in records]
174 if self.element.temporal is not None:
175 TimespanReprClass = self._db.getTimespanRepresentation()
176 for row in elementRows:
177 timespan = row.pop(TimespanDatabaseRepresentation.NAME)
178 TimespanReprClass.update(timespan, result=row)
179 with self._db.transaction():
180 if replace:
181 self._db.replace(self._table, *elementRows)
182 elif skip_existing:
183 self._db.ensure(self._table, *elementRows, primary_key_only=True)
184 else:
185 self._db.insert(self._table, *elementRows)
186 if self._skypix_overlap_tables is not None:
187 self._insert_skypix_overlaps(records, replace=replace, skip_existing=skip_existing)
189 def sync(self, record: DimensionRecord, update: bool = False) -> bool | dict[str, Any]:
190 # Docstring inherited from DimensionRecordStorage.sync.
191 compared = record.toDict()
192 keys = {}
193 for name in record.fields.required.names:
194 keys[name] = compared.pop(name)
195 if self.element.temporal is not None:
196 TimespanReprClass = self._db.getTimespanRepresentation()
197 timespan = compared.pop(TimespanDatabaseRepresentation.NAME)
198 TimespanReprClass.update(timespan, result=compared)
199 with self._db.transaction():
200 _, inserted_or_updated = self._db.sync(
201 self._table,
202 keys=keys,
203 compared=compared,
204 update=update,
205 )
206 if inserted_or_updated and self._skypix_overlap_tables is not None:
207 if inserted_or_updated is True:
208 # Inserted a new row, so we just need to insert new overlap
209 # rows.
210 self._insert_skypix_overlaps([record])
211 elif "region" in inserted_or_updated: 211 ↛ 199line 211 didn't jump to line 199
212 # Updated the region, so we need to delete old overlap rows
213 # and insert new ones.
214 self._insert_skypix_overlaps([record], replace=True)
215 # We updated something other than a region.
216 return inserted_or_updated
218 def digestTables(self) -> list[sqlalchemy.schema.Table]:
219 # Docstring inherited from DimensionRecordStorage.digestTables.
220 result = [self._table]
221 if self._skypix_overlap_tables is not None:
222 result.append(self._skypix_overlap_tables.summary)
223 result.append(self._skypix_overlap_tables.overlaps)
224 return result
226 def connect(self, overlaps: DatabaseDimensionOverlapStorage) -> None:
227 # Docstring inherited from DatabaseDimensionRecordStorage.
228 (other,) = set(overlaps.elements) - {self.element}
229 self._otherOverlaps[other.name] = overlaps
231 def make_spatial_join_relation(
232 self,
233 other: DimensionElement,
234 context: queries.SqlQueryContext,
235 governor_constraints: Mapping[str, Set[str]],
236 ) -> Relation | None:
237 # Docstring inherited from DatabaseDimensionRecordStorage.
238 match other:
239 case SkyPixDimension() as skypix:
240 return self._make_skypix_join_relation(skypix, context)
241 case DatabaseDimensionElement() as other: 241 ↛ 243line 241 didn't jump to line 243, because the pattern on line 241 always matched
242 return self._otherOverlaps[other.name].make_relation(context, governor_constraints)
243 case _:
244 raise TypeError(f"Unexpected dimension element type for spatial join: {other}.")
246 def _on_governor_insert(self, record: DimensionRecord) -> None:
247 """`GovernorDimensionRecordStorage.registerInsertionListener`
248 callback for this element.
250 Parameters
251 ----------
252 record : `DimensionRecord`
253 Record for this element's governor dimension.
254 """
255 # We need to enable overlaps between this new governor dimension value
256 # and the common skypix dimension to record that we materialize
257 # overlaps for that combination. Foreign keys guarantee that there
258 # can't be any rows of this storage object's own element with that
259 # governor value yet, so we know there's nothing to insert into the
260 # overlaps table yet.
261 skypix = self.element.universe.commonSkyPix
262 assert self._element.spatial is not None, "Only called for spatial dimension elements."
263 assert (
264 self._skypix_overlap_tables is not None
265 ), "Spatial dimension elements always have skypix overlap tables."
266 governor = self._element.spatial.governor
267 self._db.sync(
268 self._skypix_overlap_tables.summary,
269 keys={
270 "skypix_system": skypix.system.name,
271 "skypix_level": skypix.level,
272 governor.name: record.dataId[governor.name],
273 },
274 )
276 def _insert_skypix_overlaps(
277 self, records: Sequence[DimensionRecord], replace: bool = False, skip_existing: bool = False
278 ) -> None:
279 """Compute and insert overlap rows between this dimesion element and
280 the common skypix system.
282 Parameters
283 ----------
284 records : `~collections.abc.Sequence` [ `DimensionRecord` ]
285 Records for ``self.element`` that are being inserted.
286 replace : `bool`, optional
287 If `True`, the given records are being inserted in a mode that may
288 replace existing records, and hence overlap rows may need to be
289 replaced as well.
290 skip_existing : `bool`, optional
291 If `True`, the given records are being inserted in a mode that
292 ignored existing records with the same data ID, and hence overlap
293 rows need to be inserted this way as well.
294 """
295 assert self._element.spatial is not None, "Only called for spatial dimension elements."
296 assert (
297 self._skypix_overlap_tables is not None
298 ), "Spatial dimension elements always have skypix overlap tables."
299 # At present, only overlaps with the "commonSkyPix" system can be
300 # materialized, so we just compute and insert overlaps with those.
301 #
302 # To guard against this code being used with a data repository in which
303 # newer code has enabled other overlaps, we check afterwards that the
304 # summary table only contains commonSkyPix for all of these governor
305 # dimensions. In the future, we'll have to think about whether we need
306 # some table locking to guarantee consistency for those other overlaps
307 # if the summary table is updated at the same time as records are
308 # being inserted. This should happen within the same transaction
309 # (handled by the caller) so that previous inserts get rolled back.
310 skypix = self._element.universe.commonSkyPix
311 if replace:
312 # Since any of the new records might have replaced existing ones
313 # that already have overlap records, and we don't know which, we
314 # have no choice but to delete all overlaps for these records and
315 # recompute them.
316 # We include the skypix_system and skypix_level column values
317 # explicitly instead of just letting the query search for all
318 # of those related to the given records, because they are the
319 # first columns in the primary key, and hence searching with
320 # them will be way faster (and we don't want to add a new index
321 # just for this operation).
322 to_delete: list[dict[str, Any]] = [
323 {"skypix_system": skypix.system.name, "skypix_level": skypix.level, **record.dataId.byName()}
324 for record in records
325 ]
326 _LOG.debug("Deleting old common skypix overlaps for %s.", self.element.name)
327 self._db.delete(
328 self._skypix_overlap_tables.overlaps,
329 ["skypix_system", "skypix_level"] + list(self.element.graph.required.names),
330 *to_delete,
331 )
332 _LOG.debug("Precomputing common skypix overlaps for %s.", self.element.name)
333 overlap_records: list[dict[str, Any]] = []
334 for record in records:
335 if record.region is None:
336 continue
337 base_overlap_record = record.dataId.byName()
338 base_overlap_record["skypix_system"] = skypix.system.name
339 base_overlap_record["skypix_level"] = skypix.level
340 for begin, end in skypix.pixelization.envelope(record.region):
341 for index in range(begin, end):
342 overlap_records.append({"skypix_index": index, **base_overlap_record})
343 _LOG.debug("Inserting %d new skypix overlap rows for %s.", len(overlap_records), self.element.name)
344 if skip_existing:
345 self._db.ensure(self._skypix_overlap_tables.overlaps, *overlap_records, primary_key_only=True)
346 else:
347 self._db.insert(self._skypix_overlap_tables.overlaps, *overlap_records)
348 # Finally we check for non-commonSkyPix values in the summary table, as
349 # noted above.
350 summary = self._skypix_overlap_tables.summary
351 check_sql = (
352 sqlalchemy.sql.select(summary.columns.skypix_system, summary.columns.skypix_level)
353 .select_from(summary)
354 .where(
355 sqlalchemy.sql.not_(
356 sqlalchemy.sql.and_(
357 summary.columns.skypix_system == skypix.system.name,
358 summary.columns.skypix_level == skypix.level,
359 )
360 )
361 )
362 )
363 with self._db.query(check_sql) as sql_result:
364 bad_summary_rows = sql_result.fetchall()
365 if bad_summary_rows: 365 ↛ 366line 365 didn't jump to line 366, because the condition on line 365 was never true
366 bad_skypix_names = [f"{row.skypix_system}{row.skypix.level}" for row in bad_summary_rows]
367 raise RuntimeError(
368 f"Data repository has overlaps between {self._element} and {bad_skypix_names} that "
369 "are not supported by this version of daf_butler. Please use a newer version."
370 )
372 def _make_skypix_join_relation(
373 self,
374 skypix: SkyPixDimension,
375 context: queries.SqlQueryContext,
376 ) -> Relation | None:
377 """Construct a subquery expression containing overlaps between the
378 given skypix dimension and governor values.
380 Parameters
381 ----------
382 skypix : `SkyPixDimension`
383 The skypix dimension (system and level) for which overlaps should
384 be materialized.
385 context : `.queries.SqlQueryContext`
386 Object that manages relation engines and database-side state
387 (e.g. temporary tables) for the query.
389 Returns
390 -------
391 relation : `sql.Relation` or `None`
392 Join relation, or `None` if overlaps are not materialized for this
393 combination of dimensions.
394 """
395 assert self._element.spatial is not None, "Only called for spatial dimension elements."
396 assert (
397 self._skypix_overlap_tables is not None
398 ), "Spatial dimension elements always have skypix overlap tables."
399 if skypix != self._element.universe.commonSkyPix:
400 return None
401 table = self._skypix_overlap_tables.overlaps
402 payload = sql.Payload[LogicalColumn](table)
403 payload.columns_available[
404 DimensionKeyColumnTag(skypix.name)
405 ] = payload.from_clause.columns.skypix_index
406 for dimension_name in self.element.graph.required.names:
407 payload.columns_available[DimensionKeyColumnTag(dimension_name)] = payload.from_clause.columns[
408 dimension_name
409 ]
410 payload.where.append(table.columns.skypix_system == skypix.system.name)
411 payload.where.append(table.columns.skypix_level == skypix.level)
412 leaf = context.sql_engine.make_leaf(
413 payload.columns_available.keys(),
414 name=f"{self.element.name}_{skypix.name}_overlap",
415 payload=payload,
416 )
417 return leaf
420@dataclasses.dataclass
421class _SkyPixOverlapTables:
422 """A helper object for `TableDimensionRecordStorage` that manages the
423 tables for materialized overlaps with skypix dimensions.
425 New instances should be constructed by calling `initialize`, not by calling
426 the dataclass-provided constructor directly.
428 Notes
429 -----
430 This class (and the related methods in TableDimensionRecordStorage) can in
431 principle manage overlaps between a database dimension element and any
432 skypix dimension, but at present it is only being used to manage
433 relationships with the special ``commonSkyPix`` dimension, because that's
434 all the query system uses. Eventually, we expect to require users to
435 explicitly materialize more relationships.
437 Other possible future improvements include:
439 - allowing finer-grained skypix dimensions to provide overlap rows for
440 coarser ones, by dividing indices by powers of 4 (and possibly doing
441 ``SELECT DISTINCT`` in the subquery to remove duplicates);
443 - allowing finer-grained database elements (e.g. patch) to provide overlap
444 rows for coarser ones (e.g. tract), by ignoring irrelevant columns (e.g.
445 the patch IDs) in the subquery (again, possible with ``SELECT
446 DISTINCT``).
448 But there's no point to doing any of that until the query system can figure
449 out how best to ask for overlap rows when an exact match isn't available.
450 """
452 summary: sqlalchemy.schema.Table
453 """Table that records which governor value / skypix combinations have
454 materialized overlaps.
455 """
457 overlaps: sqlalchemy.schema.Table
458 """Table that actually holds overlap rows.
459 """
461 @classmethod
462 def initialize(
463 cls,
464 db: Database,
465 element: DatabaseDimensionElement,
466 *,
467 context: StaticTablesContext | None,
468 ) -> _SkyPixOverlapTables:
469 """Construct a new instance, creating tables as needed.
471 Parameters
472 ----------
473 db : `Database`
474 Interface to the underlying database engine and namespace.
475 element : `DatabaseDimensionElement`
476 Dimension element whose overlaps are to be managed.
477 context : `StaticTablesContext`, optional
478 If provided, an object to use to create any new tables. If not
479 provided, ``db.ensureTableExists`` should be used instead.
480 """
481 if context is not None: 481 ↛ 484line 481 didn't jump to line 484, because the condition on line 481 was never false
482 op = context.addTable
483 else:
484 op = db.ensureTableExists
485 summary = op(
486 cls._SUMMARY_TABLE_NAME_SPEC.format(element=element),
487 cls._makeSummaryTableSpec(element),
488 )
489 overlaps = op(
490 cls._OVERLAP_TABLE_NAME_SPEC.format(element=element),
491 cls._makeOverlapTableSpec(element),
492 )
493 return cls(summary=summary, overlaps=overlaps)
495 _SUMMARY_TABLE_NAME_SPEC = "{element.name}_skypix_overlap_summary"
497 @classmethod
498 def _makeSummaryTableSpec(cls, element: DatabaseDimensionElement) -> ddl.TableSpec:
499 """Create a specification for the table that records which combinations
500 of skypix dimension and governor value have materialized overlaps.
502 Parameters
503 ----------
504 element : `DatabaseDimensionElement`
505 Dimension element whose overlaps are to be managed.
507 Returns
508 -------
509 tableSpec : `ddl.TableSpec`
510 Table specification.
511 """
512 assert element.spatial is not None
513 tableSpec = ddl.TableSpec(
514 fields=[
515 ddl.FieldSpec(
516 name="skypix_system",
517 dtype=sqlalchemy.String,
518 length=16,
519 nullable=False,
520 primaryKey=True,
521 ),
522 ddl.FieldSpec(
523 name="skypix_level",
524 dtype=sqlalchemy.SmallInteger,
525 nullable=False,
526 primaryKey=True,
527 ),
528 ]
529 )
530 addDimensionForeignKey(tableSpec, element.spatial.governor, primaryKey=True)
531 return tableSpec
533 _OVERLAP_TABLE_NAME_SPEC = "{element.name}_skypix_overlap"
535 @classmethod
536 def _makeOverlapTableSpec(cls, element: DatabaseDimensionElement) -> ddl.TableSpec:
537 """Create a specification for the table that holds materialized
538 overlap rows.
540 Parameters
541 ----------
542 element : `DatabaseDimensionElement`
543 Dimension element whose overlaps are to be managed.
545 Returns
546 -------
547 tableSpec : `ddl.TableSpec`
548 Table specification.
549 """
550 assert element.spatial is not None
551 tableSpec = ddl.TableSpec(
552 fields=[
553 ddl.FieldSpec(
554 name="skypix_system",
555 dtype=sqlalchemy.String,
556 length=16,
557 nullable=False,
558 primaryKey=True,
559 ),
560 ddl.FieldSpec(
561 name="skypix_level",
562 dtype=sqlalchemy.SmallInteger,
563 nullable=False,
564 primaryKey=True,
565 ),
566 # (more columns added below)
567 ],
568 unique=set(),
569 indexes={
570 # This index has the same fields as the PK, in a different
571 # order, to facilitate queries that know skypix_index and want
572 # to find the other element.
573 ddl.IndexSpec(
574 "skypix_system",
575 "skypix_level",
576 "skypix_index",
577 *element.graph.required.names,
578 ),
579 },
580 foreignKeys=[
581 # Foreign key to summary table. This makes sure we don't
582 # materialize any overlaps without remembering that we've done
583 # so in the summary table, though it can't prevent the converse
584 # of adding a summary row without adding overlap row (either of
585 # those is a logic bug, of course, but we want to be defensive
586 # about those). Using ON DELETE CASCADE, it'd be very easy to
587 # implement "disabling" an overlap materialization, because we
588 # can just delete the summary row.
589 # Note that the governor dimension column is added below, in
590 # the call to addDimensionForeignKey.
591 ddl.ForeignKeySpec(
592 cls._SUMMARY_TABLE_NAME_SPEC.format(element=element),
593 source=("skypix_system", "skypix_level", element.spatial.governor.name),
594 target=("skypix_system", "skypix_level", element.spatial.governor.name),
595 onDelete="CASCADE",
596 ),
597 ],
598 )
599 # Add fields for the standard element this class manages overlaps for.
600 # This is guaranteed to add a column for the governor dimension,
601 # because that's a required dependency of element.
602 for dimension in element.required:
603 addDimensionForeignKey(tableSpec, dimension, primaryKey=True)
604 # Add field for the actual skypix index. We do this later because I
605 # think we care (at least a bit) about the order in which the primary
606 # key is defined, in that we want a non-summary column like this one
607 # to appear after the governor dimension column.
608 tableSpec.fields.add(
609 ddl.FieldSpec(
610 name="skypix_index",
611 dtype=sqlalchemy.BigInteger,
612 nullable=False,
613 primaryKey=True,
614 )
615 )
616 return tableSpec