Coverage for python/lsst/daf/butler/registry/dimensions/static.py: 97%
326 statements
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-16 10:43 +0000
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-16 10:43 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
27from __future__ import annotations
29import itertools
30import logging
31from collections import defaultdict
32from collections.abc import Sequence, Set
33from typing import TYPE_CHECKING, Any
35import sqlalchemy
36from lsst.daf.relation import Calculation, ColumnExpression, Join, Relation, sql
38from ... import ddl
39from ..._column_tags import DimensionKeyColumnTag, DimensionRecordColumnTag
40from ..._column_type_info import LogicalColumn
41from ..._named import NamedKeyDict
42from ...dimensions import (
43 DatabaseTopologicalFamily,
44 DataCoordinate,
45 Dimension,
46 DimensionElement,
47 DimensionGroup,
48 DimensionRecord,
49 DimensionRecordSet,
50 DimensionUniverse,
51 SkyPixDimension,
52 addDimensionForeignKey,
53)
54from ...dimensions.record_cache import DimensionRecordCache
55from .._exceptions import MissingSpatialOverlapError
56from ..interfaces import Database, DimensionRecordStorageManager, StaticTablesContext, VersionTuple
58if TYPE_CHECKING:
59 from .. import queries
62# This has to be updated on every schema change
63_VERSION = VersionTuple(6, 0, 2)
65_LOG = logging.getLogger(__name__)
68class StaticDimensionRecordStorageManager(DimensionRecordStorageManager):
69 """An implementation of `DimensionRecordStorageManager` for single-layer
70 `Registry` and the base layers of multi-layer `Registry`.
72 This manager creates `DimensionRecordStorage` instances for all elements
73 in the `DimensionUniverse` in its own `initialize` method, as part of
74 static table creation, so it never needs to manage any dynamic registry
75 tables.
77 Parameters
78 ----------
79 db : `Database`
80 Interface to the underlying database engine and namespace.
81 tables : `dict` [ `str`, `sqlalchemy.Table` ]
82 Mapping from dimension element name to SQL table, for all elements that
83 have `DimensionElement.has_own_table` `True`.
84 overlap_tables : `dict` [ `str`, `tuple` [ `sqlalchemy.Table`, \
85 `sqlalchemy.Table` ] ]
86 Mapping from dimension element name to SQL table holding overlaps
87 between the common skypix dimension and that element, for all elements
88 that have `DimensionElement.has_own_table` `True` and
89 `DimensionElement.spatial` not `None`.
90 dimension_group_storage : `_DimensionGroupStorage`
91 Object that manages saved `DimensionGroup` definitions.
92 universe : `DimensionUniverse`
93 All known dimensions.
94 registry_schema_version : `VersionTuple` or `None`, optional
95 Version of registry schema.
96 """
98 def __init__(
99 self,
100 db: Database,
101 *,
102 tables: dict[str, sqlalchemy.Table],
103 overlap_tables: dict[str, tuple[sqlalchemy.Table, sqlalchemy.Table]],
104 dimension_group_storage: _DimensionGroupStorage,
105 universe: DimensionUniverse,
106 registry_schema_version: VersionTuple | None = None,
107 ):
108 super().__init__(universe=universe, registry_schema_version=registry_schema_version)
109 self._db = db
110 self._tables = tables
111 self._overlap_tables = overlap_tables
112 self._dimension_group_storage = dimension_group_storage
114 @classmethod
115 def initialize(
116 cls,
117 db: Database,
118 context: StaticTablesContext,
119 *,
120 universe: DimensionUniverse,
121 registry_schema_version: VersionTuple | None = None,
122 ) -> DimensionRecordStorageManager:
123 # Docstring inherited from DimensionRecordStorageManager.
124 tables: dict[str, sqlalchemy.Table] = {}
125 # Define tables for governor dimensions, which are never spatial or
126 # temporal and always have tables.
127 for dimension in universe.governor_dimensions:
128 spec = dimension.RecordClass.fields.makeTableSpec(
129 TimespanReprClass=db.getTimespanRepresentation()
130 )
131 tables[dimension.name] = context.addTable(dimension.name, spec)
132 # Define tables for database dimension elements, which may or may not
133 # have their own tables and may be spatial or temporal.
134 spatial = NamedKeyDict[DatabaseTopologicalFamily, list[DimensionElement]]()
135 overlap_tables: dict[str, tuple[sqlalchemy.Table, sqlalchemy.Table]] = {}
136 for element in universe.database_elements:
137 if not element.has_own_table:
138 continue
139 spec = element.RecordClass.fields.makeTableSpec(TimespanReprClass=db.getTimespanRepresentation())
140 tables[element.name] = context.addTable(element.name, spec)
141 if element.spatial is not None:
142 spatial.setdefault(element.spatial, []).append(element)
143 overlap_tables[element.name] = cls._make_skypix_overlap_tables(context, element)
144 # Add some tables for materialized overlaps between database
145 # dimensions. We've never used these and no longer plan to, but we
146 # have to keep creating them to keep schema versioning consistent.
147 cls._make_legacy_overlap_tables(context, spatial)
148 # Create tables that store DimensionGraph definitions.
149 dimension_group_storage = _DimensionGroupStorage.initialize(db, context, universe=universe)
150 return cls(
151 db=db,
152 tables=tables,
153 overlap_tables=overlap_tables,
154 universe=universe,
155 dimension_group_storage=dimension_group_storage,
156 registry_schema_version=registry_schema_version,
157 )
159 def fetch_cache_dict(self) -> dict[str, DimensionRecordSet]:
160 # Docstring inherited.
161 result: dict[str, DimensionRecordSet] = {}
162 with self._db.transaction():
163 for element in self.universe.elements:
164 if not element.is_cached:
165 continue
166 assert not element.temporal, (
167 "Cached dimension elements should not be spatial or temporal, as that "
168 "suggests a large number of records."
169 )
170 if element.implied_union_target is not None:
171 assert isinstance(element, Dimension), "Only dimensions can be implied dependencies."
172 table = self._tables[element.implied_union_target.name]
173 sql = sqlalchemy.select(
174 table.columns[element.name].label(element.primary_key.name)
175 ).distinct()
176 else:
177 table = self._tables[element.name]
178 sql = table.select()
179 with self._db.query(sql) as results:
180 result[element.name] = DimensionRecordSet(
181 element=element,
182 records=[element.RecordClass(**row) for row in results.mappings()],
183 )
184 return result
186 def insert(
187 self,
188 element: DimensionElement,
189 *records: DimensionRecord,
190 cache: DimensionRecordCache,
191 replace: bool = False,
192 skip_existing: bool = False,
193 ) -> None:
194 # Docstring inherited.
195 if not element.has_own_table:
196 raise TypeError(f"Cannot insert {element.name} records.")
197 rows, overlap_insert_rows, overlap_delete_rows, overlap_summary_rows = self._make_record_db_rows(
198 element, records, replace=replace
199 )
200 table = self._tables[element.name]
201 with cache.modifying(element.name) as cache_records:
202 with self._db.transaction():
203 if replace:
204 self._db.replace(table, *rows)
205 elif skip_existing:
206 self._db.ensure(table, *rows, primary_key_only=True)
207 else:
208 self._db.insert(table, *rows)
209 self._insert_overlaps(
210 element, overlap_insert_rows, overlap_delete_rows, skip_existing=skip_existing
211 )
212 for related_element_name, summary_rows in overlap_summary_rows.items():
213 self._db.ensure(self._overlap_tables[related_element_name][0], *summary_rows)
214 # Database transaction succeeded; update the cache to keep them
215 # consistent.
216 if cache_records is not None:
217 cache_records.update(records, replace=not skip_existing)
219 def sync(
220 self, record: DimensionRecord, cache: DimensionRecordCache, update: bool = False
221 ) -> bool | dict[str, Any]:
222 # Docstring inherited.
223 if not record.definition.has_own_table: 223 ↛ 224line 223 didn't jump to line 224, because the condition on line 223 was never true
224 raise TypeError(f"Cannot sync {record.definition.name} records.")
225 # We might not need the overlap rows at all; we won't know until we try
226 # to insert the main row. But we figure it's better to spend the time
227 # to compute them in advance always *outside* the database transaction
228 # than to compute them only as-needed inside the database transaction,
229 # since in-transaction time is especially precious.
230 (
231 (compared,),
232 overlap_insert_rows,
233 overlap_delete_rows,
234 overlap_summary_rows,
235 ) = self._make_record_db_rows(record.definition, [record], replace=True)
236 keys = {}
237 for name in record.fields.required.names:
238 keys[name] = compared.pop(name)
239 with cache.modifying(record.definition.name) as cache_records:
240 with self._db.transaction():
241 _, inserted_or_updated = self._db.sync(
242 self._tables[record.definition.name],
243 keys=keys,
244 compared=compared,
245 update=update,
246 )
247 if inserted_or_updated:
248 if inserted_or_updated is True:
249 # Inserted a new row, so we just need to insert new
250 # overlap rows (if there are any).
251 self._insert_overlaps(record.definition, overlap_insert_rows, overlap_delete_rows=[])
252 elif "region" in inserted_or_updated: 252 ↛ 256line 252 didn't jump to line 256, because the condition on line 252 was never false
253 # Updated the region, so we need to delete old overlap
254 # rows and insert new ones.
255 self._insert_overlaps(record.definition, overlap_insert_rows, overlap_delete_rows)
256 for related_element_name, summary_rows in overlap_summary_rows.items():
257 self._db.ensure(self._overlap_tables[related_element_name][0], *summary_rows)
258 # We updated something other than a region; no need to change
259 # the overlap regions.
260 # Database transaction succeeded; update the cache to keep them
261 # consistent.
262 if cache_records is not None and inserted_or_updated:
263 cache_records.add(record, replace=update)
264 return inserted_or_updated
266 def fetch_one(
267 self,
268 element_name: str,
269 data_id: DataCoordinate,
270 cache: DimensionRecordCache,
271 ) -> DimensionRecord | None:
272 # Docstring inherited.
273 element = self.universe[element_name]
274 if element_name in cache:
275 try:
276 return cache[element_name].find(data_id)
277 except LookupError:
278 return None
279 if element.implied_union_target is not None: 279 ↛ 280line 279 didn't jump to line 280, because the condition on line 279 was never true
280 assert isinstance(element, Dimension), "Only dimensions can be implied dependencies."
281 table = self._tables[element.implied_union_target.name]
282 sql = sqlalchemy.select(table.columns[element.name].label(element.primary_key.name)).where(
283 table.columns[element_name] == data_id[element_name]
284 )
285 elif isinstance(element, SkyPixDimension):
286 id = data_id[element_name]
287 return element.RecordClass(id=id, region=element.pixelization.pixel(id))
288 else:
289 table = self._tables[element.name]
290 sql = table.select().where(
291 *[
292 table.columns[column_name] == data_id[dimension_name]
293 for column_name, dimension_name in zip(
294 element.schema.required.names, element.required.names
295 )
296 ]
297 )
298 with self._db.query(sql) as results:
299 row = results.fetchone()
300 if row is None:
301 return None
302 if element.temporal is not None:
303 mapping = dict(**row._mapping)
304 timespan = self._db.getTimespanRepresentation().extract(mapping)
305 for name in self._db.getTimespanRepresentation().getFieldNames():
306 del mapping[name]
307 mapping["timespan"] = timespan
308 else:
309 # MyPy says this isn't a real collections.abc.Mapping, but it
310 # sure behaves like one.
311 mapping = row._mapping # type: ignore
312 return element.RecordClass(**mapping)
314 def save_dimension_group(self, graph: DimensionGroup) -> int:
315 # Docstring inherited from DimensionRecordStorageManager.
316 return self._dimension_group_storage.save(graph)
318 def load_dimension_group(self, key: int) -> DimensionGroup:
319 # Docstring inherited from DimensionRecordStorageManager.
320 return self._dimension_group_storage.load(key)
322 def join(
323 self,
324 element_name: str,
325 target: Relation,
326 join: Join,
327 context: queries.SqlQueryContext,
328 ) -> Relation:
329 # Docstring inherited.
330 element = self.universe[element_name]
331 # We use Join.partial(...).apply(...) instead of Join.apply(..., ...)
332 # for the "backtracking" insertion capabilities of the former; more
333 # specifically, if `target` is a tree that starts with SQL relations
334 # and ends with iteration-engine operations (e.g. region-overlap
335 # postprocessing), this will try to perform the join upstream in the
336 # SQL engine before the transfer to iteration.
337 if element.has_own_table:
338 return join.partial(self._make_relation(element, context)).apply(target)
339 elif element.implied_union_target is not None:
340 columns = DimensionKeyColumnTag(element.name)
341 return join.partial(
342 self._make_relation(element.implied_union_target, context)
343 .with_only_columns(
344 {columns},
345 preferred_engine=context.preferred_engine,
346 require_preferred_engine=True,
347 )
348 .without_duplicates()
349 ).apply(target)
350 elif isinstance(element, SkyPixDimension):
351 assert join.predicate.as_trivial(), "Expected trivial join predicate for skypix relation."
352 id_column = DimensionKeyColumnTag(element.name)
353 assert id_column in target.columns, "Guaranteed by QueryBuilder.make_dimension_target."
354 function_name = f"{element.name}_region"
355 context.iteration_engine.functions[function_name] = element.pixelization.pixel
356 calculation = Calculation(
357 tag=DimensionRecordColumnTag(element.name, "region"),
358 expression=ColumnExpression.function(function_name, ColumnExpression.reference(id_column)),
359 )
360 return calculation.apply(
361 target, preferred_engine=context.iteration_engine, transfer=True, backtrack=True
362 )
363 else:
364 raise AssertionError(f"Unexpected definition of {element_name!r}.")
366 def make_spatial_join_relation(
367 self,
368 element1: str,
369 element2: str,
370 context: queries.SqlQueryContext,
371 existing_relationships: Set[frozenset[str]] = frozenset(),
372 ) -> tuple[Relation, bool]:
373 # Docstring inherited.
374 overlap_relationship = frozenset(
375 self.universe[element1].dimensions.names | self.universe[element2].dimensions.names
376 )
377 if overlap_relationship in existing_relationships: 377 ↛ 378line 377 didn't jump to line 378, because the condition on line 377 was never true
378 return context.preferred_engine.make_join_identity_relation(), False
379 overlaps: Relation | None = None
380 needs_refinement: bool = False
381 if element1 == self.universe.commonSkyPix.name:
382 (element1, element2) = (element2, element1)
384 if element1 in self._overlap_tables:
385 if element2 in self._overlap_tables:
386 # Use commonSkyPix as an intermediary with post-query
387 # refinement.
388 have_overlap1_already = (
389 frozenset(self.universe[element1].dimensions.names | {self.universe.commonSkyPix.name})
390 in existing_relationships
391 )
392 have_overlap2_already = (
393 frozenset(self.universe[element2].dimensions.names | {self.universe.commonSkyPix.name})
394 in existing_relationships
395 )
396 overlap1 = context.preferred_engine.make_join_identity_relation()
397 overlap2 = context.preferred_engine.make_join_identity_relation()
398 if not have_overlap1_already:
399 overlap1 = self._make_common_skypix_join_relation(self.universe[element1], context)
400 if not have_overlap2_already:
401 overlap2 = self._make_common_skypix_join_relation(self.universe[element2], context)
402 overlaps = overlap1.join(overlap2)
403 if not have_overlap1_already and not have_overlap2_already:
404 # Drop the common skypix ID column from the overlap
405 # relation we return, since we don't want that column
406 # to be mistakenly equated with any other appearance of
407 # that column, since this would mangle queries like
408 # "join visit to tract and tract to healpix10", by
409 # incorrectly requiring all visits and healpix10 pixels
410 # share common skypix pixels, not just tracts.
411 columns = set(overlaps.columns)
412 columns.remove(DimensionKeyColumnTag(self.universe.commonSkyPix.name))
413 overlaps = overlaps.with_only_columns(columns)
414 needs_refinement = True
415 elif element2 == self.universe.commonSkyPix.name: 415 ↛ 417line 415 didn't jump to line 417, because the condition on line 415 was never false
416 overlaps = self._make_common_skypix_join_relation(self.universe[element1], context)
417 if overlaps is None:
418 # In the future, there's a lot more we could try here:
419 #
420 # - for skypix dimensions, looking for materialized overlaps at
421 # smaller spatial scales (higher-levels) and using bit-shifting;
422 #
423 # - for non-skypix dimensions, looking for materialized overlaps
424 # for more finer-grained members of the same family, and then
425 # doing SELECT DISTINCT (or even tolerating duplicates) on the
426 # columns we care about (e.g. use patch overlaps to satisfy a
427 # request for tract overlaps).
428 #
429 # It's not obvious that's better than just telling the user to
430 # materialize more overlaps, though.
431 raise MissingSpatialOverlapError(
432 f"No materialized overlaps for spatial join between {element1!r} and {element2!r}."
433 )
434 return overlaps, needs_refinement
436 def _make_relation(
437 self,
438 element: DimensionElement,
439 context: queries.SqlQueryContext,
440 ) -> Relation:
441 table = self._tables[element.name]
442 payload = sql.Payload[LogicalColumn](table)
443 for tag, field_name in element.RecordClass.fields.columns.items():
444 if field_name == "timespan":
445 payload.columns_available[tag] = self._db.getTimespanRepresentation().from_columns(
446 table.columns, name=field_name
447 )
448 else:
449 payload.columns_available[tag] = table.columns[field_name]
450 return context.sql_engine.make_leaf(
451 payload.columns_available.keys(),
452 name=element.name,
453 payload=payload,
454 )
456 def _make_common_skypix_join_relation(
457 self,
458 element: DimensionElement,
459 context: queries.SqlQueryContext,
460 ) -> Relation:
461 """Construct a subquery expression containing overlaps between the
462 common skypix dimension and the given dimension element.
464 Parameters
465 ----------
466 element : `DimensionElement`
467 Spatial dimension element whose overlaps with the common skypix
468 system are represented by the returned relation.
469 context : `.queries.SqlQueryContext`
470 Object that manages relation engines and database-side state
471 (e.g. temporary tables) for the query.
473 Returns
474 -------
475 relation : `sql.Relation`
476 Join relation.
477 """
478 assert element.spatial is not None, "Only called for spatial dimension elements."
479 assert element.has_own_table, "Only called for dimension elements with their own tables."
480 _, table = self._overlap_tables[element.name]
481 payload = sql.Payload[LogicalColumn](table)
482 payload.columns_available[
483 DimensionKeyColumnTag(self.universe.commonSkyPix.name)
484 ] = payload.from_clause.columns.skypix_index
485 for dimension_name in element.graph.required.names:
486 payload.columns_available[DimensionKeyColumnTag(dimension_name)] = payload.from_clause.columns[
487 dimension_name
488 ]
489 payload.where.append(table.columns.skypix_system == self.universe.commonSkyPix.system.name)
490 payload.where.append(table.columns.skypix_level == self.universe.commonSkyPix.level)
491 leaf = context.sql_engine.make_leaf(
492 payload.columns_available.keys(),
493 name=f"{element.name}_{self.universe.commonSkyPix.name}_overlap",
494 payload=payload,
495 )
496 return leaf
498 @classmethod
499 def currentVersions(cls) -> list[VersionTuple]:
500 # Docstring inherited from VersionedExtension.
501 return [_VERSION]
503 @classmethod
504 def _make_skypix_overlap_tables(
505 cls, context: StaticTablesContext, element: DimensionElement
506 ) -> tuple[sqlalchemy.Table, sqlalchemy.Table]:
507 assert element.governor is not None
508 summary_spec = ddl.TableSpec(
509 fields=[
510 ddl.FieldSpec(
511 name="skypix_system",
512 dtype=sqlalchemy.String,
513 length=16,
514 nullable=False,
515 primaryKey=True,
516 ),
517 ddl.FieldSpec(
518 name="skypix_level",
519 dtype=sqlalchemy.SmallInteger,
520 nullable=False,
521 primaryKey=True,
522 ),
523 ]
524 )
525 addDimensionForeignKey(summary_spec, element.governor, primaryKey=True)
526 overlap_spec = ddl.TableSpec(
527 fields=[
528 ddl.FieldSpec(
529 name="skypix_system",
530 dtype=sqlalchemy.String,
531 length=16,
532 nullable=False,
533 primaryKey=True,
534 ),
535 ddl.FieldSpec(
536 name="skypix_level",
537 dtype=sqlalchemy.SmallInteger,
538 nullable=False,
539 primaryKey=True,
540 ),
541 # (more columns added below)
542 ],
543 unique=set(),
544 indexes={
545 # This index has the same fields as the PK, in a different
546 # order, to facilitate queries that know skypix_index and want
547 # to find the other element.
548 ddl.IndexSpec(
549 "skypix_system",
550 "skypix_level",
551 "skypix_index",
552 *element.graph.required.names,
553 ),
554 },
555 foreignKeys=[
556 # Foreign key to summary table. This makes sure we don't
557 # materialize any overlaps without remembering that we've done
558 # so in the summary table, though it can't prevent the converse
559 # of adding a summary row without adding overlap row (either of
560 # those is a logic bug, of course, but we want to be defensive
561 # about those). Using ON DELETE CASCADE, it'd be very easy to
562 # implement "disabling" an overlap materialization, because we
563 # can just delete the summary row.
564 # Note that the governor dimension column is added below, in
565 # the call to addDimensionForeignKey.
566 ddl.ForeignKeySpec(
567 f"{element.name}_skypix_overlap_summary",
568 source=("skypix_system", "skypix_level", element.governor.name),
569 target=("skypix_system", "skypix_level", element.governor.name),
570 onDelete="CASCADE",
571 ),
572 ],
573 )
574 # Add fields for the standard element this class manages overlaps for.
575 # This is guaranteed to add a column for the governor dimension,
576 # because that's a required dependency of element.
577 for dimension in element.required:
578 addDimensionForeignKey(overlap_spec, dimension, primaryKey=True)
579 # Add field for the actual skypix index. We do this later because I
580 # think we care (at least a bit) about the order in which the primary
581 # key is defined, in that we want a non-summary column like this one
582 # to appear after the governor dimension column.
583 overlap_spec.fields.add(
584 ddl.FieldSpec(
585 name="skypix_index",
586 dtype=sqlalchemy.BigInteger,
587 nullable=False,
588 primaryKey=True,
589 )
590 )
591 return (
592 context.addTable(f"{element.name}_skypix_overlap_summary", summary_spec),
593 context.addTable(f"{element.name}_skypix_overlap", overlap_spec),
594 )
596 @classmethod
597 def _make_legacy_overlap_tables(
598 cls,
599 context: StaticTablesContext,
600 spatial: NamedKeyDict[DatabaseTopologicalFamily, list[DimensionElement]],
601 ) -> None:
602 for (_, elements1), (_, elements2) in itertools.combinations(spatial.items(), 2):
603 for element1, element2 in itertools.product(elements1, elements2):
604 if element1 > element2: 604 ↛ 605line 604 didn't jump to line 605, because the condition on line 604 was never true
605 (element2, element1) = (element1, element2)
606 assert element1.spatial is not None and element2.spatial is not None
607 assert element1.governor != element2.governor
608 assert element1.governor is not None and element2.governor is not None
609 summary_spec = ddl.TableSpec(fields=[])
610 addDimensionForeignKey(summary_spec, element1.governor, primaryKey=True)
611 addDimensionForeignKey(summary_spec, element2.governor, primaryKey=True)
612 context.addTable(f"{element1.name}_{element2.name}_overlap_summary", summary_spec)
613 overlap_spec = ddl.TableSpec(fields=[])
614 addDimensionForeignKey(overlap_spec, element1.governor, primaryKey=True)
615 addDimensionForeignKey(overlap_spec, element2.governor, primaryKey=True)
616 for dimension in element1.required:
617 if dimension != element1.governor:
618 addDimensionForeignKey(overlap_spec, dimension, primaryKey=True)
619 for dimension in element2.required:
620 if dimension != element2.governor:
621 addDimensionForeignKey(overlap_spec, dimension, primaryKey=True)
622 context.addTable(f"{element1.name}_{element2.name}_overlap", overlap_spec)
624 def _make_record_db_rows(
625 self, element: DimensionElement, records: Sequence[DimensionRecord], replace: bool
626 ) -> tuple[
627 list[dict[str, Any]],
628 list[dict[str, Any]],
629 list[dict[str, Any]],
630 dict[str, list[dict[str, Any]]],
631 ]:
632 rows = [record.toDict() for record in records]
633 if element.temporal is not None:
634 TimespanReprClass = self._db.getTimespanRepresentation()
635 for row in rows:
636 timespan = row.pop("timespan")
637 TimespanReprClass.update(timespan, result=row)
638 overlap_delete_rows = []
639 overlap_insert_rows = []
640 if element.spatial is not None:
641 overlap_insert_rows = self._compute_common_skypix_overlap_inserts(element, records)
642 if replace:
643 overlap_delete_rows = self._compute_common_skypix_overlap_deletes(records)
644 overlap_summary_rows = {}
645 if element in self.universe.governor_dimensions:
646 for related_element_name in self._overlap_tables.keys():
647 if self.universe[related_element_name].governor == element:
648 overlap_summary_rows[related_element_name] = [
649 {
650 "skypix_system": self.universe.commonSkyPix.system.name,
651 "skypix_level": self.universe.commonSkyPix.level,
652 element.name: record.dataId[element.name],
653 }
654 for record in records
655 ]
656 return rows, overlap_insert_rows, overlap_delete_rows, overlap_summary_rows
658 def _compute_common_skypix_overlap_deletes(
659 self, records: Sequence[DimensionRecord]
660 ) -> list[dict[str, Any]]:
661 return [
662 {
663 "skypix_system": self.universe.commonSkyPix.system.name,
664 "skypix_level": self.universe.commonSkyPix.level,
665 **record.dataId.required,
666 }
667 for record in records
668 ]
670 def _compute_common_skypix_overlap_inserts(
671 self,
672 element: DimensionElement,
673 records: Sequence[DimensionRecord],
674 ) -> list[dict[str, Any]]:
675 _LOG.debug("Precomputing common skypix overlaps for %s.", element.name)
676 overlap_records: list[dict[str, Any]] = []
677 for record in records:
678 if record.region is None:
679 continue
680 base_overlap_record = dict(record.dataId.required)
681 base_overlap_record["skypix_system"] = self.universe.commonSkyPix.system.name
682 base_overlap_record["skypix_level"] = self.universe.commonSkyPix.level
683 for begin, end in self.universe.commonSkyPix.pixelization.envelope(record.region):
684 for index in range(begin, end):
685 overlap_records.append({"skypix_index": index, **base_overlap_record})
686 return overlap_records
688 def _insert_overlaps(
689 self,
690 element: DimensionElement,
691 overlap_insert_rows: list[dict[str, Any]],
692 overlap_delete_rows: list[dict[str, Any]],
693 skip_existing: bool = False,
694 ) -> None:
695 if overlap_delete_rows:
696 # Since any of the new records might have replaced existing ones
697 # that already have overlap records, and we don't know which, we
698 # have no choice but to delete all overlaps for these records and
699 # recompute them. We include the skypix_system and skypix_level
700 # column values explicitly instead of just letting the query search
701 # for all of those related to the given records, because they are
702 # the first columns in the primary key, and hence searching with
703 # them will be way faster (and we don't want to add a new index
704 # just for this operation).
705 _LOG.debug("Deleting old common skypix overlaps for %s.", element.name)
706 self._db.delete(
707 self._overlap_tables[element.name][1],
708 ["skypix_system", "skypix_level"] + list(element.minimal_group.required),
709 *overlap_delete_rows,
710 )
711 if overlap_insert_rows:
712 _LOG.debug("Inserting %d new skypix overlap rows for %s.", len(overlap_insert_rows), element.name)
713 if skip_existing:
714 self._db.ensure(
715 self._overlap_tables[element.name][1], *overlap_insert_rows, primary_key_only=True
716 )
717 else:
718 self._db.insert(self._overlap_tables[element.name][1], *overlap_insert_rows)
719 # We have only ever put overlaps with the commonSkyPix system into
720 # this table, and *probably* only ever will. But the schema leaves
721 # open the possibility that we should be inserting overlaps for
722 # some other skypix system, as we once thought we'd support. In
723 # case that door opens again in the future, we need to check the
724 # "overlap summary" table to see if are any skypix systems other
725 # than the common skypix system and raise (rolling back the entire
726 # transaction) if there are.
727 summary_table = self._overlap_tables[element.name][0]
728 check_sql = (
729 sqlalchemy.sql.select(summary_table.columns.skypix_system, summary_table.columns.skypix_level)
730 .select_from(summary_table)
731 .where(
732 sqlalchemy.sql.not_(
733 sqlalchemy.sql.and_(
734 summary_table.columns.skypix_system == self.universe.commonSkyPix.system.name,
735 summary_table.columns.skypix_level == self.universe.commonSkyPix.level,
736 )
737 )
738 )
739 )
740 with self._db.query(check_sql) as sql_result:
741 bad_summary_rows = sql_result.fetchall()
742 if bad_summary_rows: 742 ↛ 743line 742 didn't jump to line 743, because the condition on line 742 was never true
743 bad_skypix_names = [f"{row.skypix_system}{row.skypix.level}" for row in bad_summary_rows]
744 raise RuntimeError(
745 f"Data repository has overlaps between {element} and {bad_skypix_names} that "
746 "are not supported by this version of daf_butler. Please use a newer version."
747 )
750class _DimensionGroupStorage:
751 """Helper object that manages saved DimensionGroup definitions.
753 Should generally be constructed by calling `initialize` instead of invoking
754 the constructor directly.
756 Parameters
757 ----------
758 db : `Database`
759 Interface to the underlying database engine and namespace.
760 idTable : `sqlalchemy.schema.Table`
761 Table that just holds unique IDs for dimension graphs.
762 definitionTable : `sqlalchemy.schema.Table`
763 Table that maps dimension names to the IDs of the dimension graphs to
764 which they belong.
765 universe : `DimensionUniverse`
766 All known dimensions.
767 """
769 def __init__(
770 self,
771 db: Database,
772 idTable: sqlalchemy.schema.Table,
773 definitionTable: sqlalchemy.schema.Table,
774 universe: DimensionUniverse,
775 ):
776 self._db = db
777 self._idTable = idTable
778 self._definitionTable = definitionTable
779 self._universe = universe
780 self._keysByGroup: dict[DimensionGroup, int] = {universe.empty.as_group(): 0}
781 self._groupsByKey: dict[int, DimensionGroup] = {0: universe.empty.as_group()}
783 @classmethod
784 def initialize(
785 cls,
786 db: Database,
787 context: StaticTablesContext,
788 *,
789 universe: DimensionUniverse,
790 ) -> _DimensionGroupStorage:
791 """Construct a new instance, including creating tables if necessary.
793 Parameters
794 ----------
795 db : `Database`
796 Interface to the underlying database engine and namespace.
797 context : `StaticTablesContext`
798 Context object obtained from `Database.declareStaticTables`; used
799 to declare any tables that should always be present.
800 universe : `DimensionUniverse`
801 All known dimensions.
803 Returns
804 -------
805 storage : `_DimensionGroupStorage`
806 New instance of this class.
807 """
808 # We need two tables just so we have one where the autoincrement key is
809 # the only primary key column, as is required by (at least) SQLite. In
810 # other databases, we might be able to use a Sequence directly.
811 idTable = context.addTable(
812 "dimension_graph_key",
813 ddl.TableSpec(
814 fields=[
815 ddl.FieldSpec(
816 name="id",
817 dtype=sqlalchemy.BigInteger,
818 autoincrement=True,
819 primaryKey=True,
820 ),
821 ],
822 ),
823 )
824 definitionTable = context.addTable(
825 "dimension_graph_definition",
826 ddl.TableSpec(
827 fields=[
828 ddl.FieldSpec(name="dimension_graph_id", dtype=sqlalchemy.BigInteger, primaryKey=True),
829 ddl.FieldSpec(name="dimension_name", dtype=sqlalchemy.Text, primaryKey=True),
830 ],
831 foreignKeys=[
832 ddl.ForeignKeySpec(
833 "dimension_graph_key",
834 source=("dimension_graph_id",),
835 target=("id",),
836 onDelete="CASCADE",
837 ),
838 ],
839 ),
840 )
841 return cls(db, idTable, definitionTable, universe=universe)
843 def refresh(self) -> None:
844 """Refresh the in-memory cache of saved DimensionGraph definitions.
846 This should be done automatically whenever needed, but it can also
847 be called explicitly.
848 """
849 dimensionNamesByKey: dict[int, set[str]] = defaultdict(set)
850 with self._db.query(self._definitionTable.select()) as sql_result:
851 sql_rows = sql_result.mappings().fetchall()
852 for row in sql_rows:
853 key = row[self._definitionTable.columns.dimension_graph_id]
854 dimensionNamesByKey[key].add(row[self._definitionTable.columns.dimension_name])
855 keysByGraph: dict[DimensionGroup, int] = {self._universe.empty.as_group(): 0}
856 graphsByKey: dict[int, DimensionGroup] = {0: self._universe.empty.as_group()}
857 for key, dimensionNames in dimensionNamesByKey.items():
858 graph = DimensionGroup(self._universe, names=dimensionNames)
859 keysByGraph[graph] = key
860 graphsByKey[key] = graph
861 self._groupsByKey = graphsByKey
862 self._keysByGroup = keysByGraph
864 def save(self, group: DimensionGroup) -> int:
865 """Save a `DimensionGraph` definition to the database, allowing it to
866 be retrieved later via the returned key.
868 Parameters
869 ----------
870 group : `DimensionGroup`
871 Set of dimensions to save.
873 Returns
874 -------
875 key : `int`
876 Integer used as the unique key for this `DimensionGraph` in the
877 database.
878 """
879 key = self._keysByGroup.get(group)
880 if key is not None:
881 return key
882 # Lock tables and then refresh to guard against races where some other
883 # process is trying to register the exact same dimension graph. This
884 # is probably not the most efficient way to do it, but it should be a
885 # rare operation, especially since the short-circuit above will usually
886 # work in long-lived data repositories.
887 with self._db.transaction(lock=[self._idTable, self._definitionTable]):
888 self.refresh()
889 key = self._keysByGroup.get(group)
890 if key is None:
891 (key,) = self._db.insert(self._idTable, {}, returnIds=True) # type: ignore
892 self._db.insert(
893 self._definitionTable,
894 *[{"dimension_graph_id": key, "dimension_name": name} for name in group.required],
895 )
896 self._keysByGroup[group] = key
897 self._groupsByKey[key] = group
898 return key
900 def load(self, key: int) -> DimensionGroup:
901 """Retrieve a `DimensionGraph` that was previously saved in the
902 database.
904 Parameters
905 ----------
906 key : `int`
907 Integer used as the unique key for this `DimensionGraph` in the
908 database.
910 Returns
911 -------
912 graph : `DimensionGraph`
913 Retrieved graph.
914 """
915 graph = self._groupsByKey.get(key)
916 if graph is None:
917 self.refresh()
918 graph = self._groupsByKey[key]
919 return graph