Coverage for python/lsst/daf/butler/registry/dimensions/static.py: 97%
334 statements
« prev ^ index » next coverage.py v7.4.1, created at 2024-02-01 11:19 +0000
« prev ^ index » next coverage.py v7.4.1, created at 2024-02-01 11:19 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
27from __future__ import annotations
29import dataclasses
30import itertools
31import logging
32from collections import defaultdict
33from collections.abc import Mapping, Sequence, Set
34from typing import TYPE_CHECKING, Any
36import sqlalchemy
37from lsst.daf.relation import Calculation, ColumnExpression, Join, Relation, sql
39from ... import ddl
40from ..._column_tags import DimensionKeyColumnTag, DimensionRecordColumnTag
41from ..._column_type_info import LogicalColumn
42from ..._named import NamedKeyDict
43from ...dimensions import (
44 DatabaseTopologicalFamily,
45 DataCoordinate,
46 Dimension,
47 DimensionElement,
48 DimensionGroup,
49 DimensionRecord,
50 DimensionRecordSet,
51 DimensionUniverse,
52 SkyPixDimension,
53 addDimensionForeignKey,
54)
55from ...dimensions.record_cache import DimensionRecordCache
56from .._exceptions import MissingSpatialOverlapError
57from ..interfaces import Database, DimensionRecordStorageManager, StaticTablesContext, VersionTuple
59if TYPE_CHECKING:
60 from .. import queries
63# This has to be updated on every schema change
64_VERSION = VersionTuple(6, 0, 2)
66_LOG = logging.getLogger(__name__)
69class StaticDimensionRecordStorageManager(DimensionRecordStorageManager):
70 """An implementation of `DimensionRecordStorageManager` for single-layer
71 `Registry` and the base layers of multi-layer `Registry`.
73 This manager creates `DimensionRecordStorage` instances for all elements
74 in the `DimensionUniverse` in its own `initialize` method, as part of
75 static table creation, so it never needs to manage any dynamic registry
76 tables.
78 Parameters
79 ----------
80 db : `Database`
81 Interface to the underlying database engine and namespace.
82 tables : `dict` [ `str`, `sqlalchemy.Table` ]
83 Mapping from dimension element name to SQL table, for all elements that
84 have `DimensionElement.has_own_table` `True`.
85 overlap_tables : `dict` [ `str`, `tuple` [ `sqlalchemy.Table`, \
86 `sqlalchemy.Table` ] ]
87 Mapping from dimension element name to SQL table holding overlaps
88 between the common skypix dimension and that element, for all elements
89 that have `DimensionElement.has_own_table` `True` and
90 `DimensionElement.spatial` not `None`.
91 dimension_group_storage : `_DimensionGroupStorage`
92 Object that manages saved `DimensionGroup` definitions.
93 universe : `DimensionUniverse`
94 All known dimensions.
95 registry_schema_version : `VersionTuple` or `None`, optional
96 Version of registry schema.
97 """
99 def __init__(
100 self,
101 db: Database,
102 *,
103 tables: dict[str, sqlalchemy.Table],
104 overlap_tables: dict[str, tuple[sqlalchemy.Table, sqlalchemy.Table]],
105 dimension_group_storage: _DimensionGroupStorage,
106 universe: DimensionUniverse,
107 registry_schema_version: VersionTuple | None = None,
108 ):
109 super().__init__(universe=universe, registry_schema_version=registry_schema_version)
110 self._db = db
111 self._tables = tables
112 self._overlap_tables = overlap_tables
113 self._dimension_group_storage = dimension_group_storage
115 def clone(self, db: Database) -> StaticDimensionRecordStorageManager:
116 return StaticDimensionRecordStorageManager(
117 db,
118 tables=self._tables,
119 overlap_tables=self._overlap_tables,
120 dimension_group_storage=self._dimension_group_storage.clone(db),
121 universe=self.universe,
122 registry_schema_version=self._registry_schema_version,
123 )
125 @classmethod
126 def initialize(
127 cls,
128 db: Database,
129 context: StaticTablesContext,
130 *,
131 universe: DimensionUniverse,
132 registry_schema_version: VersionTuple | None = None,
133 ) -> DimensionRecordStorageManager:
134 # Docstring inherited from DimensionRecordStorageManager.
135 tables: dict[str, sqlalchemy.Table] = {}
136 # Define tables for governor dimensions, which are never spatial or
137 # temporal and always have tables.
138 for dimension in universe.governor_dimensions:
139 spec = dimension.RecordClass.fields.makeTableSpec(
140 TimespanReprClass=db.getTimespanRepresentation()
141 )
142 tables[dimension.name] = context.addTable(dimension.name, spec)
143 # Define tables for database dimension elements, which may or may not
144 # have their own tables and may be spatial or temporal.
145 spatial = NamedKeyDict[DatabaseTopologicalFamily, list[DimensionElement]]()
146 overlap_tables: dict[str, tuple[sqlalchemy.Table, sqlalchemy.Table]] = {}
147 for element in universe.database_elements:
148 if not element.has_own_table:
149 continue
150 spec = element.RecordClass.fields.makeTableSpec(TimespanReprClass=db.getTimespanRepresentation())
151 tables[element.name] = context.addTable(element.name, spec)
152 if element.spatial is not None:
153 spatial.setdefault(element.spatial, []).append(element)
154 overlap_tables[element.name] = cls._make_skypix_overlap_tables(context, element)
155 # Add some tables for materialized overlaps between database
156 # dimensions. We've never used these and no longer plan to, but we
157 # have to keep creating them to keep schema versioning consistent.
158 cls._make_legacy_overlap_tables(context, spatial)
159 # Create tables that store DimensionGraph definitions.
160 dimension_group_storage = _DimensionGroupStorage.initialize(db, context, universe=universe)
161 return cls(
162 db=db,
163 tables=tables,
164 overlap_tables=overlap_tables,
165 universe=universe,
166 dimension_group_storage=dimension_group_storage,
167 registry_schema_version=registry_schema_version,
168 )
170 def fetch_cache_dict(self) -> dict[str, DimensionRecordSet]:
171 # Docstring inherited.
172 result: dict[str, DimensionRecordSet] = {}
173 with self._db.transaction():
174 for element in self.universe.elements:
175 if not element.is_cached:
176 continue
177 assert not element.temporal, (
178 "Cached dimension elements should not be spatial or temporal, as that "
179 "suggests a large number of records."
180 )
181 if element.implied_union_target is not None:
182 assert isinstance(element, Dimension), "Only dimensions can be implied dependencies."
183 table = self._tables[element.implied_union_target.name]
184 sql = sqlalchemy.select(
185 table.columns[element.name].label(element.primary_key.name)
186 ).distinct()
187 else:
188 table = self._tables[element.name]
189 sql = table.select()
190 with self._db.query(sql) as results:
191 result[element.name] = DimensionRecordSet(
192 element=element,
193 records=[element.RecordClass(**row) for row in results.mappings()],
194 )
195 return result
197 def insert(
198 self,
199 element: DimensionElement,
200 *records: DimensionRecord,
201 replace: bool = False,
202 skip_existing: bool = False,
203 ) -> None:
204 # Docstring inherited.
205 if not element.has_own_table:
206 raise TypeError(f"Cannot insert {element.name} records.")
207 db_rows = self._make_record_db_rows(element, records, replace=replace)
208 table = self._tables[element.name]
209 with self._db.transaction():
210 if replace:
211 self._db.replace(table, *db_rows.main_rows)
212 elif skip_existing:
213 self._db.ensure(table, *db_rows.main_rows, primary_key_only=True)
214 else:
215 self._db.insert(table, *db_rows.main_rows)
216 self._insert_overlaps(
217 element, db_rows.overlap_insert_rows, db_rows.overlap_delete_rows, skip_existing=skip_existing
218 )
219 for related_element_name, summary_rows in db_rows.overlap_summary_rows.items():
220 self._db.ensure(self._overlap_tables[related_element_name][0], *summary_rows)
222 def sync(self, record: DimensionRecord, update: bool = False) -> bool | dict[str, Any]:
223 # Docstring inherited.
224 if not record.definition.has_own_table: 224 ↛ 225line 224 didn't jump to line 225, because the condition on line 224 was never true
225 raise TypeError(f"Cannot sync {record.definition.name} records.")
226 # We might not need the overlap rows at all; we won't know until we try
227 # to insert the main row. But we figure it's better to spend the time
228 # to compute them in advance always *outside* the database transaction
229 # than to compute them only as-needed inside the database transaction,
230 # since in-transaction time is especially precious.
231 db_rows = self._make_record_db_rows(record.definition, [record], replace=True)
232 (compared,) = db_rows.main_rows
233 keys = {}
234 for name in record.fields.required.names:
235 keys[name] = compared.pop(name)
236 with self._db.transaction():
237 _, inserted_or_updated = self._db.sync(
238 self._tables[record.definition.name],
239 keys=keys,
240 compared=compared,
241 update=update,
242 )
243 if inserted_or_updated:
244 if inserted_or_updated is True:
245 # Inserted a new row, so we just need to insert new
246 # overlap rows (if there are any).
247 self._insert_overlaps(
248 record.definition, db_rows.overlap_insert_rows, overlap_delete_rows=[]
249 )
250 elif "region" in inserted_or_updated: 250 ↛ 256line 250 didn't jump to line 256, because the condition on line 250 was never false
251 # Updated the region, so we need to delete old overlap
252 # rows and insert new ones.
253 self._insert_overlaps(
254 record.definition, db_rows.overlap_insert_rows, db_rows.overlap_delete_rows
255 )
256 for related_element_name, summary_rows in db_rows.overlap_summary_rows.items():
257 self._db.ensure(self._overlap_tables[related_element_name][0], *summary_rows)
258 return inserted_or_updated
260 def fetch_one(
261 self,
262 element_name: str,
263 data_id: DataCoordinate,
264 cache: DimensionRecordCache,
265 ) -> DimensionRecord | None:
266 # Docstring inherited.
267 element = self.universe[element_name]
268 if element_name in cache:
269 try:
270 return cache[element_name].find(data_id)
271 except LookupError:
272 return None
273 if element.implied_union_target is not None: 273 ↛ 274line 273 didn't jump to line 274, because the condition on line 273 was never true
274 assert isinstance(element, Dimension), "Only dimensions can be implied dependencies."
275 table = self._tables[element.implied_union_target.name]
276 sql = sqlalchemy.select(table.columns[element.name].label(element.primary_key.name)).where(
277 table.columns[element_name] == data_id[element_name]
278 )
279 elif isinstance(element, SkyPixDimension):
280 id = data_id[element_name]
281 return element.RecordClass(id=id, region=element.pixelization.pixel(id))
282 else:
283 table = self._tables[element.name]
284 sql = table.select().where(
285 *[
286 table.columns[column_name] == data_id[dimension_name]
287 for column_name, dimension_name in zip(
288 element.schema.required.names, element.required.names
289 )
290 ]
291 )
292 with self._db.query(sql) as results:
293 row = results.fetchone()
294 if row is None:
295 return None
296 mapping: Mapping
297 if element.temporal is not None:
298 mapping = dict(**row._mapping)
299 timespan = self._db.getTimespanRepresentation().extract(mapping)
300 for name in self._db.getTimespanRepresentation().getFieldNames():
301 del mapping[name]
302 mapping["timespan"] = timespan
303 else:
304 mapping = row._mapping
305 return element.RecordClass(**mapping)
307 def save_dimension_group(self, graph: DimensionGroup) -> int:
308 # Docstring inherited from DimensionRecordStorageManager.
309 return self._dimension_group_storage.save(graph)
311 def load_dimension_group(self, key: int) -> DimensionGroup:
312 # Docstring inherited from DimensionRecordStorageManager.
313 return self._dimension_group_storage.load(key)
315 def join(
316 self,
317 element_name: str,
318 target: Relation,
319 join: Join,
320 context: queries.SqlQueryContext,
321 ) -> Relation:
322 # Docstring inherited.
323 element = self.universe[element_name]
324 # We use Join.partial(...).apply(...) instead of Join.apply(..., ...)
325 # for the "backtracking" insertion capabilities of the former; more
326 # specifically, if `target` is a tree that starts with SQL relations
327 # and ends with iteration-engine operations (e.g. region-overlap
328 # postprocessing), this will try to perform the join upstream in the
329 # SQL engine before the transfer to iteration.
330 if element.has_own_table:
331 return join.partial(self._make_relation(element, context)).apply(target)
332 elif element.implied_union_target is not None:
333 columns = DimensionKeyColumnTag(element.name)
334 return join.partial(
335 self._make_relation(element.implied_union_target, context)
336 .with_only_columns(
337 {columns},
338 preferred_engine=context.preferred_engine,
339 require_preferred_engine=True,
340 )
341 .without_duplicates()
342 ).apply(target)
343 elif isinstance(element, SkyPixDimension):
344 assert join.predicate.as_trivial(), "Expected trivial join predicate for skypix relation."
345 id_column = DimensionKeyColumnTag(element.name)
346 assert id_column in target.columns, "Guaranteed by QueryBuilder.make_dimension_target."
347 function_name = f"{element.name}_region"
348 context.iteration_engine.functions[function_name] = element.pixelization.pixel
349 calculation = Calculation(
350 tag=DimensionRecordColumnTag(element.name, "region"),
351 expression=ColumnExpression.function(function_name, ColumnExpression.reference(id_column)),
352 )
353 return calculation.apply(
354 target, preferred_engine=context.iteration_engine, transfer=True, backtrack=True
355 )
356 else:
357 raise AssertionError(f"Unexpected definition of {element_name!r}.")
359 def make_spatial_join_relation(
360 self,
361 element1: str,
362 element2: str,
363 context: queries.SqlQueryContext,
364 existing_relationships: Set[frozenset[str]] = frozenset(),
365 ) -> tuple[Relation, bool]:
366 # Docstring inherited.
367 overlap_relationship = frozenset(
368 self.universe[element1].dimensions.names | self.universe[element2].dimensions.names
369 )
370 if overlap_relationship in existing_relationships: 370 ↛ 371line 370 didn't jump to line 371, because the condition on line 370 was never true
371 return context.preferred_engine.make_join_identity_relation(), False
372 overlaps: Relation | None = None
373 needs_refinement: bool = False
374 if element1 == self.universe.commonSkyPix.name:
375 (element1, element2) = (element2, element1)
377 if element1 in self._overlap_tables:
378 if element2 in self._overlap_tables:
379 # Use commonSkyPix as an intermediary with post-query
380 # refinement.
381 have_overlap1_already = (
382 frozenset(self.universe[element1].dimensions.names | {self.universe.commonSkyPix.name})
383 in existing_relationships
384 )
385 have_overlap2_already = (
386 frozenset(self.universe[element2].dimensions.names | {self.universe.commonSkyPix.name})
387 in existing_relationships
388 )
389 overlap1 = context.preferred_engine.make_join_identity_relation()
390 overlap2 = context.preferred_engine.make_join_identity_relation()
391 if not have_overlap1_already:
392 overlap1 = self._make_common_skypix_join_relation(self.universe[element1], context)
393 if not have_overlap2_already:
394 overlap2 = self._make_common_skypix_join_relation(self.universe[element2], context)
395 overlaps = overlap1.join(overlap2)
396 if not have_overlap1_already and not have_overlap2_already:
397 # Drop the common skypix ID column from the overlap
398 # relation we return, since we don't want that column
399 # to be mistakenly equated with any other appearance of
400 # that column, since this would mangle queries like
401 # "join visit to tract and tract to healpix10", by
402 # incorrectly requiring all visits and healpix10 pixels
403 # share common skypix pixels, not just tracts.
404 columns = set(overlaps.columns)
405 columns.remove(DimensionKeyColumnTag(self.universe.commonSkyPix.name))
406 overlaps = overlaps.with_only_columns(columns)
407 needs_refinement = True
408 elif element2 == self.universe.commonSkyPix.name: 408 ↛ 410line 408 didn't jump to line 410, because the condition on line 408 was never false
409 overlaps = self._make_common_skypix_join_relation(self.universe[element1], context)
410 if overlaps is None:
411 # In the future, there's a lot more we could try here:
412 #
413 # - for skypix dimensions, looking for materialized overlaps at
414 # smaller spatial scales (higher-levels) and using bit-shifting;
415 #
416 # - for non-skypix dimensions, looking for materialized overlaps
417 # for more finer-grained members of the same family, and then
418 # doing SELECT DISTINCT (or even tolerating duplicates) on the
419 # columns we care about (e.g. use patch overlaps to satisfy a
420 # request for tract overlaps).
421 #
422 # It's not obvious that's better than just telling the user to
423 # materialize more overlaps, though.
424 raise MissingSpatialOverlapError(
425 f"No materialized overlaps for spatial join between {element1!r} and {element2!r}."
426 )
427 return overlaps, needs_refinement
429 def _make_relation(
430 self,
431 element: DimensionElement,
432 context: queries.SqlQueryContext,
433 ) -> Relation:
434 table = self._tables[element.name]
435 payload = sql.Payload[LogicalColumn](table)
436 for tag, field_name in element.RecordClass.fields.columns.items():
437 if field_name == "timespan":
438 payload.columns_available[tag] = self._db.getTimespanRepresentation().from_columns(
439 table.columns, name=field_name
440 )
441 else:
442 payload.columns_available[tag] = table.columns[field_name]
443 return context.sql_engine.make_leaf(
444 payload.columns_available.keys(),
445 name=element.name,
446 payload=payload,
447 )
449 def _make_common_skypix_join_relation(
450 self,
451 element: DimensionElement,
452 context: queries.SqlQueryContext,
453 ) -> Relation:
454 """Construct a subquery expression containing overlaps between the
455 common skypix dimension and the given dimension element.
457 Parameters
458 ----------
459 element : `DimensionElement`
460 Spatial dimension element whose overlaps with the common skypix
461 system are represented by the returned relation.
462 context : `.queries.SqlQueryContext`
463 Object that manages relation engines and database-side state
464 (e.g. temporary tables) for the query.
466 Returns
467 -------
468 relation : `sql.Relation`
469 Join relation.
470 """
471 assert element.spatial is not None, "Only called for spatial dimension elements."
472 assert element.has_own_table, "Only called for dimension elements with their own tables."
473 _, table = self._overlap_tables[element.name]
474 payload = sql.Payload[LogicalColumn](table)
475 payload.columns_available[DimensionKeyColumnTag(self.universe.commonSkyPix.name)] = (
476 payload.from_clause.columns.skypix_index
477 )
478 for dimension_name in element.graph.required.names:
479 payload.columns_available[DimensionKeyColumnTag(dimension_name)] = payload.from_clause.columns[
480 dimension_name
481 ]
482 payload.where.append(table.columns.skypix_system == self.universe.commonSkyPix.system.name)
483 payload.where.append(table.columns.skypix_level == self.universe.commonSkyPix.level)
484 leaf = context.sql_engine.make_leaf(
485 payload.columns_available.keys(),
486 name=f"{element.name}_{self.universe.commonSkyPix.name}_overlap",
487 payload=payload,
488 )
489 return leaf
491 @classmethod
492 def currentVersions(cls) -> list[VersionTuple]:
493 # Docstring inherited from VersionedExtension.
494 return [_VERSION]
496 @classmethod
497 def _make_skypix_overlap_tables(
498 cls, context: StaticTablesContext, element: DimensionElement
499 ) -> tuple[sqlalchemy.Table, sqlalchemy.Table]:
500 assert element.governor is not None
501 summary_spec = ddl.TableSpec(
502 fields=[
503 ddl.FieldSpec(
504 name="skypix_system",
505 dtype=sqlalchemy.String,
506 length=16,
507 nullable=False,
508 primaryKey=True,
509 ),
510 ddl.FieldSpec(
511 name="skypix_level",
512 dtype=sqlalchemy.SmallInteger,
513 nullable=False,
514 primaryKey=True,
515 ),
516 ]
517 )
518 addDimensionForeignKey(summary_spec, element.governor, primaryKey=True)
519 overlap_spec = ddl.TableSpec(
520 fields=[
521 ddl.FieldSpec(
522 name="skypix_system",
523 dtype=sqlalchemy.String,
524 length=16,
525 nullable=False,
526 primaryKey=True,
527 ),
528 ddl.FieldSpec(
529 name="skypix_level",
530 dtype=sqlalchemy.SmallInteger,
531 nullable=False,
532 primaryKey=True,
533 ),
534 # (more columns added below)
535 ],
536 unique=set(),
537 indexes={
538 # This index has the same fields as the PK, in a different
539 # order, to facilitate queries that know skypix_index and want
540 # to find the other element.
541 ddl.IndexSpec(
542 "skypix_system",
543 "skypix_level",
544 "skypix_index",
545 *element.graph.required.names,
546 ),
547 },
548 foreignKeys=[
549 # Foreign key to summary table. This makes sure we don't
550 # materialize any overlaps without remembering that we've done
551 # so in the summary table, though it can't prevent the converse
552 # of adding a summary row without adding overlap row (either of
553 # those is a logic bug, of course, but we want to be defensive
554 # about those). Using ON DELETE CASCADE, it'd be very easy to
555 # implement "disabling" an overlap materialization, because we
556 # can just delete the summary row.
557 # Note that the governor dimension column is added below, in
558 # the call to addDimensionForeignKey.
559 ddl.ForeignKeySpec(
560 f"{element.name}_skypix_overlap_summary",
561 source=("skypix_system", "skypix_level", element.governor.name),
562 target=("skypix_system", "skypix_level", element.governor.name),
563 onDelete="CASCADE",
564 ),
565 ],
566 )
567 # Add fields for the standard element this class manages overlaps for.
568 # This is guaranteed to add a column for the governor dimension,
569 # because that's a required dependency of element.
570 for dimension in element.required:
571 addDimensionForeignKey(overlap_spec, dimension, primaryKey=True)
572 # Add field for the actual skypix index. We do this later because I
573 # think we care (at least a bit) about the order in which the primary
574 # key is defined, in that we want a non-summary column like this one
575 # to appear after the governor dimension column.
576 overlap_spec.fields.add(
577 ddl.FieldSpec(
578 name="skypix_index",
579 dtype=sqlalchemy.BigInteger,
580 nullable=False,
581 primaryKey=True,
582 )
583 )
584 return (
585 context.addTable(f"{element.name}_skypix_overlap_summary", summary_spec),
586 context.addTable(f"{element.name}_skypix_overlap", overlap_spec),
587 )
589 @classmethod
590 def _make_legacy_overlap_tables(
591 cls,
592 context: StaticTablesContext,
593 spatial: NamedKeyDict[DatabaseTopologicalFamily, list[DimensionElement]],
594 ) -> None:
595 for (_, elements1), (_, elements2) in itertools.combinations(spatial.items(), 2):
596 for element1, element2 in itertools.product(elements1, elements2):
597 if element1 > element2: 597 ↛ 598line 597 didn't jump to line 598, because the condition on line 597 was never true
598 (element2, element1) = (element1, element2)
599 assert element1.spatial is not None and element2.spatial is not None
600 assert element1.governor != element2.governor
601 assert element1.governor is not None and element2.governor is not None
602 summary_spec = ddl.TableSpec(fields=[])
603 addDimensionForeignKey(summary_spec, element1.governor, primaryKey=True)
604 addDimensionForeignKey(summary_spec, element2.governor, primaryKey=True)
605 context.addTable(f"{element1.name}_{element2.name}_overlap_summary", summary_spec)
606 overlap_spec = ddl.TableSpec(fields=[])
607 addDimensionForeignKey(overlap_spec, element1.governor, primaryKey=True)
608 addDimensionForeignKey(overlap_spec, element2.governor, primaryKey=True)
609 for dimension in element1.required:
610 if dimension != element1.governor:
611 addDimensionForeignKey(overlap_spec, dimension, primaryKey=True)
612 for dimension in element2.required:
613 if dimension != element2.governor:
614 addDimensionForeignKey(overlap_spec, dimension, primaryKey=True)
615 context.addTable(f"{element1.name}_{element2.name}_overlap", overlap_spec)
617 def _make_record_db_rows(
618 self, element: DimensionElement, records: Sequence[DimensionRecord], replace: bool
619 ) -> _DimensionRecordDatabaseRows:
620 result = _DimensionRecordDatabaseRows()
621 result.main_rows = [record.toDict() for record in records]
622 if element.temporal is not None:
623 TimespanReprClass = self._db.getTimespanRepresentation()
624 for row in result.main_rows:
625 timespan = row.pop("timespan")
626 TimespanReprClass.update(timespan, result=row)
627 if element.spatial is not None:
628 result.overlap_insert_rows = self._compute_common_skypix_overlap_inserts(element, records)
629 if replace:
630 result.overlap_delete_rows = self._compute_common_skypix_overlap_deletes(records)
631 if element in self.universe.governor_dimensions:
632 for related_element_name in self._overlap_tables.keys():
633 if self.universe[related_element_name].governor == element:
634 result.overlap_summary_rows[related_element_name] = [
635 {
636 "skypix_system": self.universe.commonSkyPix.system.name,
637 "skypix_level": self.universe.commonSkyPix.level,
638 element.name: record.dataId[element.name],
639 }
640 for record in records
641 ]
642 return result
644 def _compute_common_skypix_overlap_deletes(
645 self, records: Sequence[DimensionRecord]
646 ) -> list[dict[str, Any]]:
647 return [
648 {
649 "skypix_system": self.universe.commonSkyPix.system.name,
650 "skypix_level": self.universe.commonSkyPix.level,
651 **record.dataId.required,
652 }
653 for record in records
654 ]
656 def _compute_common_skypix_overlap_inserts(
657 self,
658 element: DimensionElement,
659 records: Sequence[DimensionRecord],
660 ) -> list[dict[str, Any]]:
661 _LOG.debug("Precomputing common skypix overlaps for %s.", element.name)
662 overlap_records: list[dict[str, Any]] = []
663 for record in records:
664 if record.region is None:
665 continue
666 base_overlap_record = dict(record.dataId.required)
667 base_overlap_record["skypix_system"] = self.universe.commonSkyPix.system.name
668 base_overlap_record["skypix_level"] = self.universe.commonSkyPix.level
669 for begin, end in self.universe.commonSkyPix.pixelization.envelope(record.region):
670 for index in range(begin, end):
671 overlap_records.append({"skypix_index": index, **base_overlap_record})
672 return overlap_records
674 def _insert_overlaps(
675 self,
676 element: DimensionElement,
677 overlap_insert_rows: list[dict[str, Any]],
678 overlap_delete_rows: list[dict[str, Any]],
679 skip_existing: bool = False,
680 ) -> None:
681 if overlap_delete_rows:
682 # Since any of the new records might have replaced existing ones
683 # that already have overlap records, and we don't know which, we
684 # have no choice but to delete all overlaps for these records and
685 # recompute them. We include the skypix_system and skypix_level
686 # column values explicitly instead of just letting the query search
687 # for all of those related to the given records, because they are
688 # the first columns in the primary key, and hence searching with
689 # them will be way faster (and we don't want to add a new index
690 # just for this operation).
691 _LOG.debug("Deleting old common skypix overlaps for %s.", element.name)
692 self._db.delete(
693 self._overlap_tables[element.name][1],
694 ["skypix_system", "skypix_level"] + list(element.minimal_group.required),
695 *overlap_delete_rows,
696 )
697 if overlap_insert_rows:
698 _LOG.debug("Inserting %d new skypix overlap rows for %s.", len(overlap_insert_rows), element.name)
699 if skip_existing:
700 self._db.ensure(
701 self._overlap_tables[element.name][1], *overlap_insert_rows, primary_key_only=True
702 )
703 else:
704 self._db.insert(self._overlap_tables[element.name][1], *overlap_insert_rows)
705 # We have only ever put overlaps with the commonSkyPix system into
706 # this table, and *probably* only ever will. But the schema leaves
707 # open the possibility that we should be inserting overlaps for
708 # some other skypix system, as we once thought we'd support. In
709 # case that door opens again in the future, we need to check the
710 # "overlap summary" table to see if are any skypix systems other
711 # than the common skypix system and raise (rolling back the entire
712 # transaction) if there are.
713 summary_table = self._overlap_tables[element.name][0]
714 check_sql = (
715 sqlalchemy.sql.select(summary_table.columns.skypix_system, summary_table.columns.skypix_level)
716 .select_from(summary_table)
717 .where(
718 sqlalchemy.sql.not_(
719 sqlalchemy.sql.and_(
720 summary_table.columns.skypix_system == self.universe.commonSkyPix.system.name,
721 summary_table.columns.skypix_level == self.universe.commonSkyPix.level,
722 )
723 )
724 )
725 )
726 with self._db.query(check_sql) as sql_result:
727 bad_summary_rows = sql_result.fetchall()
728 if bad_summary_rows: 728 ↛ 729line 728 didn't jump to line 729, because the condition on line 728 was never true
729 bad_skypix_names = [f"{row.skypix_system}{row.skypix.level}" for row in bad_summary_rows]
730 raise RuntimeError(
731 f"Data repository has overlaps between {element} and {bad_skypix_names} that "
732 "are not supported by this version of daf_butler. Please use a newer version."
733 )
736@dataclasses.dataclass
737class _DimensionRecordDatabaseRows:
738 """Rows to be inserted into the database whenever a DimensionRecord is
739 added.
740 """
742 main_rows: list[dict[str, Any]] = dataclasses.field(default_factory=list)
743 """Rows for the dimension element table itself."""
745 overlap_insert_rows: list[dict[str, Any]] = dataclasses.field(default_factory=list)
746 """Rows for overlaps with the common skypix dimension."""
748 overlap_delete_rows: list[dict[str, Any]] = dataclasses.field(default_factory=list)
749 """Rows for overlaps with the common skypix dimension that should be
750 deleted before inserting new ones.
751 """
753 overlap_summary_rows: dict[str, list[dict[str, Any]]] = dataclasses.field(default_factory=dict)
754 """Rows that record which overlaps between skypix dimensiosn and other
755 dimension elements are stored.
757 This is populated when inserting governor dimension rows, with keys being
758 the names of spatial dimension elements associated with that governor.
759 """
762class _DimensionGroupStorage:
763 """Helper object that manages saved DimensionGroup definitions.
765 Should generally be constructed by calling `initialize` instead of invoking
766 the constructor directly.
768 Parameters
769 ----------
770 db : `Database`
771 Interface to the underlying database engine and namespace.
772 idTable : `sqlalchemy.schema.Table`
773 Table that just holds unique IDs for dimension graphs.
774 definitionTable : `sqlalchemy.schema.Table`
775 Table that maps dimension names to the IDs of the dimension graphs to
776 which they belong.
777 universe : `DimensionUniverse`
778 All known dimensions.
779 """
781 def __init__(
782 self,
783 db: Database,
784 idTable: sqlalchemy.schema.Table,
785 definitionTable: sqlalchemy.schema.Table,
786 universe: DimensionUniverse,
787 ):
788 self._db = db
789 self._idTable = idTable
790 self._definitionTable = definitionTable
791 self._universe = universe
792 self._keysByGroup: dict[DimensionGroup, int] = {universe.empty.as_group(): 0}
793 self._groupsByKey: dict[int, DimensionGroup] = {0: universe.empty.as_group()}
795 def clone(self, db: Database) -> _DimensionGroupStorage:
796 """Make an independent copy of this manager instance bound to a new
797 `Database` instance.
799 Parameters
800 ----------
801 db : `Database`
802 New `Database` object to use when instantiating the manager.
804 Returns
805 -------
806 instance : `_DimensionGroupStorage`
807 New manager instance with the same configuration as this instance,
808 but bound to a new Database object.
809 """
810 return _DimensionGroupStorage(
811 db=db, idTable=self._idTable, definitionTable=self._definitionTable, universe=self._universe
812 )
814 @classmethod
815 def initialize(
816 cls,
817 db: Database,
818 context: StaticTablesContext,
819 *,
820 universe: DimensionUniverse,
821 ) -> _DimensionGroupStorage:
822 """Construct a new instance, including creating tables if necessary.
824 Parameters
825 ----------
826 db : `Database`
827 Interface to the underlying database engine and namespace.
828 context : `StaticTablesContext`
829 Context object obtained from `Database.declareStaticTables`; used
830 to declare any tables that should always be present.
831 universe : `DimensionUniverse`
832 All known dimensions.
834 Returns
835 -------
836 storage : `_DimensionGroupStorage`
837 New instance of this class.
838 """
839 # We need two tables just so we have one where the autoincrement key is
840 # the only primary key column, as is required by (at least) SQLite. In
841 # other databases, we might be able to use a Sequence directly.
842 idTable = context.addTable(
843 "dimension_graph_key",
844 ddl.TableSpec(
845 fields=[
846 ddl.FieldSpec(
847 name="id",
848 dtype=sqlalchemy.BigInteger,
849 autoincrement=True,
850 primaryKey=True,
851 ),
852 ],
853 ),
854 )
855 definitionTable = context.addTable(
856 "dimension_graph_definition",
857 ddl.TableSpec(
858 fields=[
859 ddl.FieldSpec(name="dimension_graph_id", dtype=sqlalchemy.BigInteger, primaryKey=True),
860 ddl.FieldSpec(name="dimension_name", dtype=sqlalchemy.Text, primaryKey=True),
861 ],
862 foreignKeys=[
863 ddl.ForeignKeySpec(
864 "dimension_graph_key",
865 source=("dimension_graph_id",),
866 target=("id",),
867 onDelete="CASCADE",
868 ),
869 ],
870 ),
871 )
872 return cls(db, idTable, definitionTable, universe=universe)
874 def refresh(self) -> None:
875 """Refresh the in-memory cache of saved DimensionGraph definitions.
877 This should be done automatically whenever needed, but it can also
878 be called explicitly.
879 """
880 dimensionNamesByKey: dict[int, set[str]] = defaultdict(set)
881 with self._db.query(self._definitionTable.select()) as sql_result:
882 sql_rows = sql_result.mappings().fetchall()
883 for row in sql_rows:
884 key = row[self._definitionTable.columns.dimension_graph_id]
885 dimensionNamesByKey[key].add(row[self._definitionTable.columns.dimension_name])
886 keysByGraph: dict[DimensionGroup, int] = {self._universe.empty.as_group(): 0}
887 graphsByKey: dict[int, DimensionGroup] = {0: self._universe.empty.as_group()}
888 for key, dimensionNames in dimensionNamesByKey.items():
889 graph = DimensionGroup(self._universe, names=dimensionNames)
890 keysByGraph[graph] = key
891 graphsByKey[key] = graph
892 self._groupsByKey = graphsByKey
893 self._keysByGroup = keysByGraph
895 def save(self, group: DimensionGroup) -> int:
896 """Save a `DimensionGraph` definition to the database, allowing it to
897 be retrieved later via the returned key.
899 Parameters
900 ----------
901 group : `DimensionGroup`
902 Set of dimensions to save.
904 Returns
905 -------
906 key : `int`
907 Integer used as the unique key for this `DimensionGraph` in the
908 database.
909 """
910 key = self._keysByGroup.get(group)
911 if key is not None:
912 return key
913 # Lock tables and then refresh to guard against races where some other
914 # process is trying to register the exact same dimension graph. This
915 # is probably not the most efficient way to do it, but it should be a
916 # rare operation, especially since the short-circuit above will usually
917 # work in long-lived data repositories.
918 with self._db.transaction(lock=[self._idTable, self._definitionTable]):
919 self.refresh()
920 key = self._keysByGroup.get(group)
921 if key is None:
922 (key,) = self._db.insert(self._idTable, {}, returnIds=True) # type: ignore
923 self._db.insert(
924 self._definitionTable,
925 *[{"dimension_graph_id": key, "dimension_name": name} for name in group.required],
926 )
927 self._keysByGroup[group] = key
928 self._groupsByKey[key] = group
929 return key
931 def load(self, key: int) -> DimensionGroup:
932 """Retrieve a `DimensionGraph` that was previously saved in the
933 database.
935 Parameters
936 ----------
937 key : `int`
938 Integer used as the unique key for this `DimensionGraph` in the
939 database.
941 Returns
942 -------
943 graph : `DimensionGraph`
944 Retrieved graph.
945 """
946 graph = self._groupsByKey.get(key)
947 if graph is None:
948 self.refresh()
949 graph = self._groupsByKey[key]
950 return graph