Coverage for python/lsst/daf/butler/dimensions/_schema.py: 25%
111 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-01 11:00 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-01 11:00 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
27from __future__ import annotations
29__all__ = ("addDimensionForeignKey",)
31import copy
32from collections.abc import Mapping
33from typing import TYPE_CHECKING
35from lsst.utils.classes import cached_getter
37from .. import ddl
38from .._column_tags import DimensionKeyColumnTag, DimensionRecordColumnTag
39from .._named import NamedValueSet
40from .._timespan import TimespanDatabaseRepresentation
42if TYPE_CHECKING: # Imports needed only for type annotations; may be circular.
43 from lsst.daf.relation import ColumnTag
45 from ._elements import Dimension, DimensionElement
48def _makeForeignKeySpec(dimension: Dimension) -> ddl.ForeignKeySpec:
49 """Make a `ddl.ForeignKeySpec`.
51 This will reference the table for the given `Dimension` table.
53 Most callers should use the higher-level `addDimensionForeignKey` function
54 instead.
56 Parameters
57 ----------
58 dimension : `Dimension`
59 The dimension to be referenced. Caller guarantees that it is actually
60 associated with a table.
62 Returns
63 -------
64 spec : `ddl.ForeignKeySpec`
65 A database-agnostic foreign key specification.
66 """
67 source = []
68 target = []
69 for other in dimension.required:
70 if other == dimension:
71 target.append(dimension.primaryKey.name)
72 else:
73 target.append(other.name)
74 source.append(other.name)
75 return ddl.ForeignKeySpec(table=dimension.name, source=tuple(source), target=tuple(target))
78def addDimensionForeignKey(
79 tableSpec: ddl.TableSpec,
80 dimension: Dimension,
81 *,
82 primaryKey: bool,
83 nullable: bool = False,
84 constraint: bool = True,
85) -> ddl.FieldSpec:
86 """Add a field and possibly a foreign key to a table specification.
88 The field will reference the table for the given `Dimension`.
90 Parameters
91 ----------
92 tableSpec : `ddl.TableSpec`
93 Specification the field and foreign key are to be added to.
94 dimension : `Dimension`
95 Dimension to be referenced. If this dimension has required
96 dependencies, those must have already been added to the table. A field
97 will be added that correspond to this dimension's primary key, and a
98 foreign key constraint will be added only if the dimension is
99 associated with a table of its own.
100 primaryKey : `bool`
101 If `True`, the new field will be added as part of a compound primary
102 key for the table.
103 nullable : `bool`, optional
104 If `False` (default) the new field will be added with a NOT NULL
105 constraint.
106 constraint : `bool`
107 If `False` (`True` is default), just add the field, not the foreign
108 key constraint.
110 Returns
111 -------
112 fieldSpec : `ddl.FieldSpec`
113 Specification for the field just added.
114 """
115 # Add the dependency's primary key field, but use the dimension name for
116 # the field name to make it unique and more meaningful in this table.
117 fieldSpec = copy.copy(dimension.primaryKey)
118 fieldSpec.name = dimension.name
119 fieldSpec.primaryKey = primaryKey
120 fieldSpec.nullable = nullable
121 tableSpec.fields.add(fieldSpec)
122 # Also add a foreign key constraint on the dependency table, but only if
123 # there actually is one and we weren't told not to.
124 if dimension.hasTable() and dimension.viewOf is None and constraint:
125 tableSpec.foreignKeys.append(_makeForeignKeySpec(dimension))
126 return fieldSpec
129class DimensionElementFields:
130 """Class for constructing table schemas for `DimensionElement`.
132 This creates an object that constructs the table schema for a
133 `DimensionElement` and provides a categorized view of its fields.
135 Parameters
136 ----------
137 element : `DimensionElement`
138 Element for which to make a table specification.
140 Notes
141 -----
142 This combines the foreign key fields from dependencies, unique keys
143 for true `Dimension` instances, metadata fields, and region/timestamp
144 fields for spatial/temporal elements.
146 Callers should use `DimensionUniverse.makeSchemaSpec` if they want to
147 account for elements that have no table or reference another table; this
148 class simply creates a specification for the table an element _would_ have
149 without checking whether it does have one. That can be useful in contexts
150 (e.g. `DimensionRecord`) where we want to simulate the existence of such a
151 table.
152 """
154 def __init__(self, element: DimensionElement):
155 self.element = element
156 self._tableSpec = ddl.TableSpec(fields=())
157 # Add the primary key fields of required dimensions. These continue to
158 # be primary keys in the table for this dimension.
159 self.required = NamedValueSet()
160 self.dimensions = NamedValueSet()
161 self.facts = NamedValueSet()
162 self.standard = NamedValueSet()
163 dependencies = []
164 for dimension in element.required:
165 if dimension != element:
166 fieldSpec = addDimensionForeignKey(self._tableSpec, dimension, primaryKey=True)
167 dependencies.append(fieldSpec.name)
168 else:
169 fieldSpec = element.primaryKey # type: ignore
170 # A Dimension instance is in its own required dependency graph
171 # (always at the end, because of topological ordering). In
172 # this case we don't want to rename the field.
173 self._tableSpec.fields.add(fieldSpec)
174 self.required.add(fieldSpec)
175 self.dimensions.add(fieldSpec)
176 self.standard.add(fieldSpec)
177 # Add fields and foreign keys for implied dimensions. These are
178 # primary keys in their own table, but should not be here. As with
179 # required dependencies, we rename the fields with the dimension name.
180 # We use element.implied instead of element.graph.implied because we
181 # don't want *recursive* implied dependencies.
182 self.implied = NamedValueSet()
183 for dimension in element.implied:
184 fieldSpec = addDimensionForeignKey(self._tableSpec, dimension, primaryKey=False, nullable=False)
185 self.implied.add(fieldSpec)
186 self.dimensions.add(fieldSpec)
187 self.standard.add(fieldSpec)
188 # Add non-primary unique keys and unique constraints for them.
189 for fieldSpec in getattr(element, "alternateKeys", ()):
190 self._tableSpec.fields.add(fieldSpec)
191 self._tableSpec.unique.add(tuple(dependencies) + (fieldSpec.name,))
192 self.standard.add(fieldSpec)
193 self.facts.add(fieldSpec)
194 # Add other metadata fields.
195 for fieldSpec in element.metadata:
196 self._tableSpec.fields.add(fieldSpec)
197 self.standard.add(fieldSpec)
198 self.facts.add(fieldSpec)
199 names = list(self.standard.names)
200 # Add fields for regions and/or timespans.
201 if element.spatial is not None:
202 names.append("region")
203 if element.temporal is not None:
204 names.append(TimespanDatabaseRepresentation.NAME)
205 self.names = tuple(names)
207 def makeTableSpec(
208 self,
209 TimespanReprClass: type[TimespanDatabaseRepresentation],
210 ) -> ddl.TableSpec:
211 """Construct a complete specification for a table.
213 The table could hold the records of this element.
215 Parameters
216 ----------
217 TimespanReprClass : `type` [ `TimespanDatabaseRepresentation` ]
218 Class object that specifies how timespans are represented in the
219 database.
221 Returns
222 -------
223 spec : `ddl.TableSpec`
224 Specification for a table.
225 """
226 if self.element.temporal is not None or self.element.spatial is not None:
227 spec = ddl.TableSpec(
228 fields=NamedValueSet(self._tableSpec.fields),
229 unique=self._tableSpec.unique,
230 indexes=self._tableSpec.indexes,
231 foreignKeys=self._tableSpec.foreignKeys,
232 )
233 if self.element.spatial is not None:
234 spec.fields.add(ddl.FieldSpec.for_region())
235 if self.element.temporal is not None:
236 spec.fields.update(TimespanReprClass.makeFieldSpecs(nullable=True))
237 else:
238 spec = self._tableSpec
239 return spec
241 def __str__(self) -> str:
242 lines = [f"{self.element.name}: "]
243 lines.extend(f" {field.name}: {field.getPythonType().__name__}" for field in self.standard)
244 if self.element.spatial is not None:
245 lines.append(" region: lsst.sphgeom.Region")
246 if self.element.temporal is not None:
247 lines.append(" timespan: lsst.daf.butler.Timespan")
248 return "\n".join(lines)
250 @property
251 @cached_getter
252 def columns(self) -> Mapping[ColumnTag, str]:
253 """A mapping from `ColumnTag` to field name for all fields in this
254 element's records (`~collections.abc.Mapping`).
255 """
256 result: dict[ColumnTag, str] = {}
257 for dimension_name, field_name in zip(
258 self.element.dimensions.names, self.dimensions.names, strict=True
259 ):
260 result[DimensionKeyColumnTag(dimension_name)] = field_name
261 for field_name in self.facts.names:
262 result[DimensionRecordColumnTag(self.element.name, field_name)] = field_name
263 if self.element.spatial:
264 result[DimensionRecordColumnTag(self.element.name, "region")] = "region"
265 if self.element.temporal:
266 result[DimensionRecordColumnTag(self.element.name, "timespan")] = "timespan"
267 return result
269 element: DimensionElement
270 """The dimension element these fields correspond to.
272 (`DimensionElement`)
273 """
275 required: NamedValueSet[ddl.FieldSpec]
276 """The required dimension fields of this table.
278 They correspond to the element's required
279 dimensions, in that order, i.e. `DimensionElement.required`
280 (`NamedValueSet` [ `ddl.FieldSpec` ]).
281 """
283 implied: NamedValueSet[ddl.FieldSpec]
284 """The implied dimension fields of this table.
286 They correspond to the element's implied
287 dimensions, in that order, i.e. `DimensionElement.implied`
288 (`NamedValueSet` [ `ddl.FieldSpec` ]).
289 """
291 dimensions: NamedValueSet[ddl.FieldSpec]
292 """The direct and implied dimension fields of this table.
294 They correspond to the element's direct
295 required and implied dimensions, in that order, i.e.
296 `DimensionElement.dimensions` (`NamedValueSet` [ `ddl.FieldSpec` ]).
297 """
299 facts: NamedValueSet[ddl.FieldSpec]
300 """The standard fields of this table that do not correspond to dimensions.
302 (`NamedValueSet` [ `ddl.FieldSpec` ]).
304 This is equivalent to ``standard - dimensions`` (but possibly in a
305 different order).
306 """
308 standard: NamedValueSet[ddl.FieldSpec]
309 """All standard fields that are expected to have the same form.
311 They are expected to have the same form in all
312 databases; this is all fields other than those that represent a region
313 and/or timespan (`NamedValueSet` [ `ddl.FieldSpec` ]).
314 """
316 names: tuple[str, ...]
317 """The names of all fields in the specification (`tuple` [ `str` ]).
319 This includes "region" and/or "timespan" if `element` is spatial and/or
320 temporal (respectively). The actual database representation of these
321 quantities may involve multiple fields (or even fields only on a different
322 table), but the Python representation of those rows (i.e. `DimensionRecord`
323 instances) will always contain exactly these fields.
324 """