Coverage for python / lsst / daf / butler / dimensions / _schema.py: 23%
162 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-24 08:17 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-24 08:17 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
27from __future__ import annotations
29__all__ = ("DimensionRecordSchema", "addDimensionForeignKey")
31import copy
32from collections.abc import Set
33from typing import TYPE_CHECKING
35from lsst.utils.classes import immutable
37from .. import arrow_utils, ddl
38from .._named import NamedValueAbstractSet, NamedValueSet
39from ..column_spec import RegionColumnSpec, TimespanColumnSpec
40from ..timespan_database_representation import TimespanDatabaseRepresentation
42if TYPE_CHECKING: # Imports needed only for type annotations; may be circular.
43 from ._elements import Dimension, DimensionElement, KeyColumnSpec, MetadataColumnSpec
44 from ._group import DimensionGroup
47@immutable
48class DimensionRecordSchema:
49 """A description of the columns in a dimension element's records.
51 Instances of this class should be obtained via `DimensionElement.schema`,
52 where they are cached on first use.
54 Parameters
55 ----------
56 element : `DimensionElement`
57 Element this object describes.
58 """
60 def __init__(self, element: DimensionElement):
61 self.element = element
62 self.required = NamedValueSet()
63 self.implied = NamedValueSet()
64 self.dimensions = NamedValueSet()
65 self.remainder = NamedValueSet()
66 self.all = NamedValueSet()
67 for dimension in element.required:
68 if dimension != element:
69 key_spec = dimension.primary_key.model_copy(update={"name": dimension.name})
70 else:
71 # A Dimension instance is in its own required dependency graph
72 # (always at the end, because of topological ordering). In
73 # this case we don't want to rename the field.
74 key_spec = element.primary_key # type: ignore
75 self.required.add(key_spec)
76 self.dimensions.add(key_spec)
77 for dimension in element.implied:
78 key_spec = dimension.primary_key.model_copy(update={"name": dimension.name})
79 self.implied.add(key_spec)
80 self.dimensions.add(key_spec)
81 self.all.update(self.dimensions)
82 # Add non-primary unique keys.
83 self.remainder.update(element.alternate_keys)
84 # Add other metadata record_fields.
85 self.remainder.update(element.metadata_columns)
86 if element.spatial:
87 self.remainder.add(RegionColumnSpec(nullable=True))
88 if element.temporal:
89 self.remainder.add(TimespanColumnSpec(nullable=True))
90 self.all.update(self.remainder)
91 self.required.freeze()
92 self.implied.freeze()
93 self.dimensions.freeze()
94 self.remainder.freeze()
95 self.all.freeze()
97 element: DimensionElement
98 """The dimension element these fields correspond to.
100 (`DimensionElement`)
101 """
103 required: NamedValueAbstractSet[KeyColumnSpec]
104 """The required dimension columns of this element's records.
106 The elements of this set correspond to `DimensionElement.required`, in the
107 same order.
108 """
110 implied: NamedValueAbstractSet[KeyColumnSpec]
111 """The implied dimension columns of this element's records.
113 The elements of this set correspond to `DimensionElement.implied`, in the
114 same order.
115 """
117 dimensions: NamedValueAbstractSet[KeyColumnSpec]
118 """The required and implied dimension columns of this element's records.
120 The elements of this set correspond to `DimensionElement.dimensions`, in
121 the same order.
122 """
124 remainder: NamedValueAbstractSet[MetadataColumnSpec | RegionColumnSpec | TimespanColumnSpec]
125 """The fields of this table that do not correspond to dimensions.
127 This includes alternate keys, metadata columns, and any region or timespan.
128 """
130 all: NamedValueAbstractSet[MetadataColumnSpec | RegionColumnSpec | TimespanColumnSpec]
131 """All columns for this dimension element's records, in order."""
133 @property
134 def names(self) -> Set[str]:
135 """The names of all columns, in order."""
136 return self.all.names
138 def __str__(self) -> str:
139 lines = [f"{self.element.name}: "]
140 for column_spec in self.all:
141 lines.extend(column_spec.display(level=1))
142 return "\n".join(lines)
144 def to_arrow(
145 self, remainder_only: bool = False, dimensions: DimensionGroup | None = None
146 ) -> list[arrow_utils.ToArrow]:
147 """Convert this schema to Arrow form.
149 Parameters
150 ----------
151 remainder_only : `bool`, optional
152 If `True`, skip the fields in `dimensions` and convert only those
153 in ``remainder``.
154 dimensions : `DimensionGroup`, optional
155 Full set of dimensions over which the rows of the table are unique
156 or close to unique. This is used to determine whether to use
157 Arrow's dictionary encoding to compress duplicate values. Defaults
158 to this element's `~DimensionElement.minimal_group`, which is
159 appropriate for tables of just the records of this element.
161 Returns
162 -------
163 converters : `list` [ `~lsst.daf.butler.arrow_utils.ToArrow` ]
164 List of objects that can convert `DimensionRecord` attribute values
165 to Arrow records, corresponding exactly to either ``all`` or
166 ``remainder``, depending on ``remainder_only``.
167 """
168 if dimensions is None:
169 dimensions = self.element.minimal_group
170 converters: list[arrow_utils.ToArrow] = []
171 if not remainder_only:
172 for dimension, key_spec in zip(self.element.dimensions, self.dimensions):
173 converters.append(dimension.to_arrow(dimensions, key_spec))
174 for remainder_spec in self.remainder:
175 if remainder_spec.type == "string" and (
176 remainder_spec.name in self.element.metadata_columns.names
177 or dimensions != self.element.minimal_group
178 ):
179 converters.append(remainder_spec.to_arrow().dictionary_encoded())
180 else:
181 converters.append(remainder_spec.to_arrow())
182 return converters
185def _makeForeignKeySpec(dimension: Dimension) -> ddl.ForeignKeySpec:
186 """Make a `ddl.ForeignKeySpec`.
188 This will reference the table for the given `Dimension` table.
190 Most callers should use the higher-level `addDimensionForeignKey` function
191 instead.
193 Parameters
194 ----------
195 dimension : `Dimension`
196 The dimension to be referenced. Caller guarantees that it is actually
197 associated with a table.
199 Returns
200 -------
201 spec : `ddl.ForeignKeySpec`
202 A database-agnostic foreign key specification.
203 """
204 source = []
205 target = []
206 for other in dimension.required:
207 if other == dimension:
208 target.append(dimension.primaryKey.name)
209 else:
210 target.append(other.name)
211 source.append(other.name)
212 return ddl.ForeignKeySpec(table=dimension.name, source=tuple(source), target=tuple(target))
215def addDimensionForeignKey(
216 tableSpec: ddl.TableSpec,
217 dimension: Dimension,
218 *,
219 primaryKey: bool,
220 nullable: bool = False,
221 constraint: bool = True,
222) -> ddl.FieldSpec:
223 """Add a field and possibly a foreign key to a table specification.
225 The field will reference the table for the given `Dimension`.
227 Parameters
228 ----------
229 tableSpec : `ddl.TableSpec`
230 Specification the field and foreign key are to be added to.
231 dimension : `Dimension`
232 Dimension to be referenced. If this dimension has required
233 dependencies, those must have already been added to the table. A field
234 will be added that correspond to this dimension's primary key, and a
235 foreign key constraint will be added only if the dimension is
236 associated with a table of its own.
237 primaryKey : `bool`
238 If `True`, the new field will be added as part of a compound primary
239 key for the table.
240 nullable : `bool`, optional
241 If `False` (default) the new field will be added with a NOT NULL
242 constraint.
243 constraint : `bool`
244 If `False` (`True` is default), just add the field, not the foreign
245 key constraint.
247 Returns
248 -------
249 fieldSpec : `ddl.FieldSpec`
250 Specification for the field just added.
251 """
252 # Add the dependency's primary key field, but use the dimension name for
253 # the field name to make it unique and more meaningful in this table.
254 fieldSpec = copy.copy(dimension.primaryKey)
255 fieldSpec.name = dimension.name
256 fieldSpec.primaryKey = primaryKey
257 fieldSpec.nullable = nullable
258 tableSpec.fields.add(fieldSpec)
259 # Also add a foreign key constraint on the dependency table, but only if
260 # there actually is one and we weren't told not to.
261 if dimension.has_own_table and constraint:
262 tableSpec.foreignKeys.append(_makeForeignKeySpec(dimension))
263 return fieldSpec
266class DimensionElementFields:
267 """Class for constructing table schemas for `DimensionElement`.
269 This creates an object that constructs the table schema for a
270 `DimensionElement` and provides a categorized view of its fields.
272 Parameters
273 ----------
274 element : `DimensionElement`
275 Element for which to make a table specification.
277 Notes
278 -----
279 This combines the foreign key fields from dependencies, unique keys
280 for true `Dimension` instances, metadata fields, and region/timestamp
281 fields for spatial/temporal elements.
283 Callers should use `DimensionUniverse.makeSchemaSpec` if they want to
284 account for elements that have no table or reference another table; this
285 class simply creates a specification for the table an element _would_ have
286 without checking whether it does have one. That can be useful in contexts
287 (e.g. `DimensionRecord`) where we want to simulate the existence of such a
288 table.
289 """
291 def __init__(self, element: DimensionElement):
292 self.element = element
293 self._tableSpec = ddl.TableSpec(fields=())
294 # Add the primary key fields of required dimensions. These continue to
295 # be primary keys in the table for this dimension.
296 self.required = NamedValueSet()
297 self.dimensions = NamedValueSet()
298 self.facts = NamedValueSet()
299 self.standard = NamedValueSet()
300 dependencies = []
301 for dimension in element.required:
302 if dimension != element:
303 fieldSpec = addDimensionForeignKey(self._tableSpec, dimension, primaryKey=True)
304 dependencies.append(fieldSpec.name)
305 else:
306 fieldSpec = element.primaryKey # type: ignore
307 # A Dimension instance is in its own required dependency graph
308 # (always at the end, because of topological ordering). In
309 # this case we don't want to rename the field.
310 self._tableSpec.fields.add(fieldSpec)
311 self.required.add(fieldSpec)
312 self.dimensions.add(fieldSpec)
313 self.standard.add(fieldSpec)
314 # Add fields and foreign keys for implied dimensions. These are
315 # primary keys in their own table, but should not be here. As with
316 # required dependencies, we rename the fields with the dimension name.
317 # We use element.implied instead of element.graph.implied because we
318 # don't want *recursive* implied dependencies.
319 self.implied = NamedValueSet()
320 for dimension in element.implied:
321 fieldSpec = addDimensionForeignKey(self._tableSpec, dimension, primaryKey=False, nullable=False)
322 self.implied.add(fieldSpec)
323 self.dimensions.add(fieldSpec)
324 self.standard.add(fieldSpec)
325 # Add non-primary unique keys and unique constraints for them.
326 for fieldSpec in getattr(element, "alternateKeys", ()):
327 self._tableSpec.fields.add(fieldSpec)
328 self._tableSpec.unique.add(tuple(dependencies) + (fieldSpec.name,))
329 self.standard.add(fieldSpec)
330 self.facts.add(fieldSpec)
331 # Add other metadata fields.
332 for fieldSpec in element.metadata:
333 self._tableSpec.fields.add(fieldSpec)
334 self.standard.add(fieldSpec)
335 self.facts.add(fieldSpec)
336 names = list(self.standard.names)
337 # Add fields for regions and/or timespans.
338 if element.spatial is not None:
339 names.append("region")
340 if element.temporal is not None:
341 names.append(TimespanDatabaseRepresentation.NAME)
342 self.names = tuple(names)
344 def makeTableSpec(
345 self,
346 TimespanReprClass: type[TimespanDatabaseRepresentation],
347 ) -> ddl.TableSpec:
348 """Construct a complete specification for a table.
350 The table could hold the records of this element.
352 Parameters
353 ----------
354 TimespanReprClass : `type` [ `TimespanDatabaseRepresentation` ]
355 Class object that specifies how timespans are represented in the
356 database.
358 Returns
359 -------
360 spec : `ddl.TableSpec`
361 Specification for a table.
362 """
363 if self.element.temporal is not None or self.element.spatial is not None:
364 spec = ddl.TableSpec(
365 fields=NamedValueSet(self._tableSpec.fields),
366 unique=self._tableSpec.unique,
367 indexes=self._tableSpec.indexes,
368 foreignKeys=self._tableSpec.foreignKeys,
369 )
370 if self.element.spatial is not None:
371 spec.fields.add(ddl.FieldSpec.for_region())
372 if self.element.temporal is not None:
373 spec.fields.update(TimespanReprClass.makeFieldSpecs(nullable=True))
374 else:
375 spec = self._tableSpec
376 return spec
378 def __str__(self) -> str:
379 lines = [f"{self.element.name}: "]
380 lines.extend(f" {field.name}: {field.getPythonType().__name__}" for field in self.standard)
381 if self.element.spatial is not None:
382 lines.append(" region: lsst.sphgeom.Region")
383 if self.element.temporal is not None:
384 lines.append(" timespan: lsst.daf.butler.Timespan")
385 return "\n".join(lines)
387 element: DimensionElement
388 """The dimension element these fields correspond to.
390 (`DimensionElement`)
391 """
393 required: NamedValueSet[ddl.FieldSpec]
394 """The required dimension fields of this table.
396 They correspond to the element's required
397 dimensions, in that order, i.e. `DimensionElement.required`
398 (`NamedValueSet` [ `ddl.FieldSpec` ]).
399 """
401 implied: NamedValueSet[ddl.FieldSpec]
402 """The implied dimension fields of this table.
404 They correspond to the element's implied
405 dimensions, in that order, i.e. `DimensionElement.implied`
406 (`NamedValueSet` [ `ddl.FieldSpec` ]).
407 """
409 dimensions: NamedValueSet[ddl.FieldSpec]
410 """The direct and implied dimension fields of this table.
412 They correspond to the element's direct
413 required and implied dimensions, in that order, i.e.
414 `DimensionElement.dimensions` (`NamedValueSet` [ `ddl.FieldSpec` ]).
415 """
417 facts: NamedValueSet[ddl.FieldSpec]
418 """The standard fields of this table that do not correspond to dimensions.
420 (`NamedValueSet` [ `ddl.FieldSpec` ]).
422 This is equivalent to ``standard - dimensions`` (but possibly in a
423 different order).
424 """
426 standard: NamedValueSet[ddl.FieldSpec]
427 """All standard fields that are expected to have the same form.
429 They are expected to have the same form in all
430 databases; this is all fields other than those that represent a region
431 and/or timespan (`NamedValueSet` [ `ddl.FieldSpec` ]).
432 """
434 names: tuple[str, ...]
435 """The names of all fields in the specification (`tuple` [ `str` ]).
437 This includes "region" and/or "timespan" if `element` is spatial and/or
438 temporal (respectively). The actual database representation of these
439 quantities may involve multiple fields (or even fields only on a different
440 table), but the Python representation of those rows (i.e. `DimensionRecord`
441 instances) will always contain exactly these fields.
442 """