Coverage for python/lsst/daf/butler/registry/datasets/byDimensions/tables.py: 96%
62 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-01-10 02:32 -0800
« prev ^ index » next coverage.py v6.5.0, created at 2023-01-10 02:32 -0800
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = (
25 "addDatasetForeignKey",
26 "makeCalibTableName",
27 "makeCalibTableSpec",
28 "makeStaticTableSpecs",
29 "makeTagTableName",
30 "makeTagTableSpec",
31 "StaticDatasetTablesTuple",
32)
34from collections import namedtuple
35from typing import Any
37import sqlalchemy
39from ....core import (
40 DatasetType,
41 DimensionUniverse,
42 GovernorDimension,
43 TimespanDatabaseRepresentation,
44 addDimensionForeignKey,
45 ddl,
46)
47from ...interfaces import CollectionManager
49DATASET_TYPE_NAME_LENGTH = 128
52StaticDatasetTablesTuple = namedtuple(
53 "StaticDatasetTablesTuple",
54 [
55 "dataset_type",
56 "dataset",
57 ],
58)
61def addDatasetForeignKey(
62 tableSpec: ddl.TableSpec,
63 dtype: type,
64 *,
65 name: str = "dataset",
66 onDelete: str | None = None,
67 constraint: bool = True,
68 **kwargs: Any,
69) -> ddl.FieldSpec:
70 """Add a foreign key column for datasets and (optionally) a constraint to
71 a table.
73 This is an internal interface for the ``byDimensions`` package; external
74 code should use `DatasetRecordStorageManager.addDatasetForeignKey` instead.
76 Parameters
77 ----------
78 tableSpec : `ddl.TableSpec`
79 Specification for the table that should reference the dataset
80 table. Will be modified in place.
81 dtype: `type`
82 Type of the column, same as the column type of the PK column of
83 a referenced table (``dataset.id``).
84 name: `str`, optional
85 A name to use for the prefix of the new field; the full name is
86 ``{name}_id``.
87 onDelete: `str`, optional
88 One of "CASCADE" or "SET NULL", indicating what should happen to
89 the referencing row if the collection row is deleted. `None`
90 indicates that this should be an integrity error.
91 constraint: `bool`, optional
92 If `False` (`True` is default), add a field that can be joined to
93 the dataset primary key, but do not add a foreign key constraint.
94 **kwargs
95 Additional keyword arguments are forwarded to the `ddl.FieldSpec`
96 constructor (only the ``name`` and ``dtype`` arguments are
97 otherwise provided).
99 Returns
100 -------
101 idSpec : `ddl.FieldSpec`
102 Specification for the ID field.
103 """
104 idFieldSpec = ddl.FieldSpec(f"{name}_id", dtype=dtype, **kwargs)
105 tableSpec.fields.add(idFieldSpec)
106 if constraint:
107 tableSpec.foreignKeys.append(
108 ddl.ForeignKeySpec("dataset", source=(idFieldSpec.name,), target=("id",), onDelete=onDelete)
109 )
110 return idFieldSpec
113def makeStaticTableSpecs(
114 collections: type[CollectionManager],
115 universe: DimensionUniverse,
116 dtype: type,
117 autoincrement: bool,
118) -> StaticDatasetTablesTuple:
119 """Construct all static tables used by the classes in this package.
121 Static tables are those that are present in all Registries and do not
122 depend on what DatasetTypes have been registered.
124 Parameters
125 ----------
126 collections: `CollectionManager`
127 Manager object for the collections in this `Registry`.
128 universe : `DimensionUniverse`
129 Universe graph containing all dimensions known to this `Registry`.
130 dtype: `type`
131 Type of the dataset ID (primary key) column.
132 autoincrement: `bool`
133 If `True` then dataset ID column will be auto-incrementing.
135 Returns
136 -------
137 specs : `StaticDatasetTablesTuple`
138 A named tuple containing `ddl.TableSpec` instances.
139 """
140 specs = StaticDatasetTablesTuple(
141 dataset_type=ddl.TableSpec(
142 fields=[
143 ddl.FieldSpec(
144 name="id",
145 dtype=sqlalchemy.BigInteger,
146 autoincrement=True,
147 primaryKey=True,
148 doc=(
149 "Autoincrement ID that uniquely identifies a dataset "
150 "type in other tables. Python code outside the "
151 "`Registry` class should never interact with this; "
152 "its existence is considered an implementation detail."
153 ),
154 ),
155 ddl.FieldSpec(
156 name="name",
157 dtype=sqlalchemy.String,
158 length=DATASET_TYPE_NAME_LENGTH,
159 nullable=False,
160 doc="String name that uniquely identifies a dataset type.",
161 ),
162 ddl.FieldSpec(
163 name="storage_class",
164 dtype=sqlalchemy.String,
165 length=64,
166 nullable=False,
167 doc=(
168 "Name of the storage class associated with all "
169 "datasets of this type. Storage classes are "
170 "generally associated with a Python class, and are "
171 "enumerated in butler configuration."
172 ),
173 ),
174 ddl.FieldSpec(
175 name="dimensions_key",
176 dtype=sqlalchemy.BigInteger,
177 nullable=False,
178 doc="Unique key for the set of dimensions that identifies datasets of this type.",
179 ),
180 ddl.FieldSpec(
181 name="tag_association_table",
182 dtype=sqlalchemy.String,
183 length=128,
184 nullable=False,
185 doc=(
186 "Name of the table that holds associations between "
187 "datasets of this type and most types of collections."
188 ),
189 ),
190 ddl.FieldSpec(
191 name="calibration_association_table",
192 dtype=sqlalchemy.String,
193 length=128,
194 nullable=True,
195 doc=(
196 "Name of the table that holds associations between "
197 "datasets of this type and CALIBRATION collections. "
198 "NULL values indicate dataset types with "
199 "isCalibration=False."
200 ),
201 ),
202 ],
203 unique=[("name",)],
204 ),
205 dataset=ddl.TableSpec(
206 fields=[
207 ddl.FieldSpec(
208 name="id",
209 dtype=dtype,
210 autoincrement=autoincrement,
211 primaryKey=True,
212 doc="A unique field used as the primary key for dataset.",
213 ),
214 ddl.FieldSpec(
215 name="dataset_type_id",
216 dtype=sqlalchemy.BigInteger,
217 nullable=False,
218 doc=("Reference to the associated entry in the dataset_type table."),
219 ),
220 ddl.FieldSpec(
221 name="ingest_date",
222 dtype=sqlalchemy.TIMESTAMP,
223 default=sqlalchemy.sql.func.now(),
224 nullable=False,
225 doc="Time of dataset ingestion.",
226 ),
227 # Foreign key field/constraint to run added below.
228 ],
229 foreignKeys=[
230 ddl.ForeignKeySpec("dataset_type", source=("dataset_type_id",), target=("id",)),
231 ],
232 ),
233 )
234 # Add foreign key fields programmatically.
235 collections.addRunForeignKey(specs.dataset, onDelete="CASCADE", nullable=False)
236 return specs
239def makeTagTableName(datasetType: DatasetType, dimensionsKey: int) -> str:
240 """Construct the name for a dynamic (DatasetType-dependent) tag table used
241 by the classes in this package.
243 Parameters
244 ----------
245 datasetType : `DatasetType`
246 Dataset type to construct a name for. Multiple dataset types may
247 share the same table.
248 dimensionsKey : `int`
249 Integer key used to save ``datasetType.dimensions`` to the database.
251 Returns
252 -------
253 name : `str`
254 Name for the table.
255 """
256 return f"dataset_tags_{dimensionsKey:08d}"
259def makeCalibTableName(datasetType: DatasetType, dimensionsKey: int) -> str:
260 """Construct the name for a dynamic (DatasetType-dependent) tag + validity
261 range table used by the classes in this package.
263 Parameters
264 ----------
265 datasetType : `DatasetType`
266 Dataset type to construct a name for. Multiple dataset types may
267 share the same table.
268 dimensionsKey : `int`
269 Integer key used to save ``datasetType.dimensions`` to the database.
271 Returns
272 -------
273 name : `str`
274 Name for the table.
275 """
276 assert datasetType.isCalibration()
277 return f"dataset_calibs_{dimensionsKey:08d}"
280def makeTagTableSpec(
281 datasetType: DatasetType, collections: type[CollectionManager], dtype: type, *, constraints: bool = True
282) -> ddl.TableSpec:
283 """Construct the specification for a dynamic (DatasetType-dependent) tag
284 table used by the classes in this package.
286 Parameters
287 ----------
288 datasetType : `DatasetType`
289 Dataset type to construct a spec for. Multiple dataset types may
290 share the same table.
291 collections : `type` [ `CollectionManager` ]
292 `CollectionManager` subclass that can be used to construct foreign keys
293 to the run and/or collection tables.
294 dtype : `type`
295 Type of the FK column, same as the column type of the PK column of
296 a referenced table (``dataset.id``).
297 constraints : `bool`, optional
298 If `False` (`True` is default), do not define foreign key constraints.
300 Returns
301 -------
302 spec : `ddl.TableSpec`
303 Specification for the table.
304 """
305 tableSpec = ddl.TableSpec(
306 fields=[
307 # Foreign key fields to dataset, collection, and usually dimension
308 # tables added below.
309 # The dataset_type_id field here would be redundant with the one
310 # in the main monolithic dataset table, but we need it here for an
311 # important unique constraint.
312 ddl.FieldSpec("dataset_type_id", dtype=sqlalchemy.BigInteger, nullable=False),
313 ]
314 )
315 if constraints:
316 tableSpec.foreignKeys.append(
317 ddl.ForeignKeySpec("dataset_type", source=("dataset_type_id",), target=("id",))
318 )
319 # We'll also have a unique constraint on dataset type, collection, and data
320 # ID. We only include the required part of the data ID, as that's
321 # sufficient and saves us from worrying about nulls in the constraint.
322 constraint = ["dataset_type_id"]
323 # Add foreign key fields to dataset table (part of the primary key)
324 addDatasetForeignKey(tableSpec, dtype, primaryKey=True, onDelete="CASCADE", constraint=constraints)
325 # Add foreign key fields to collection table (part of the primary key and
326 # the data ID unique constraint).
327 collectionFieldSpec = collections.addCollectionForeignKey(
328 tableSpec, primaryKey=True, onDelete="CASCADE", constraint=constraints
329 )
330 constraint.append(collectionFieldSpec.name)
331 # Add foreign key constraint to the collection_summary_dataset_type table.
332 if constraints:
333 tableSpec.foreignKeys.append(
334 ddl.ForeignKeySpec(
335 "collection_summary_dataset_type",
336 source=(collectionFieldSpec.name, "dataset_type_id"),
337 target=(collectionFieldSpec.name, "dataset_type_id"),
338 )
339 )
340 for dimension in datasetType.dimensions.required:
341 fieldSpec = addDimensionForeignKey(
342 tableSpec, dimension=dimension, nullable=False, primaryKey=False, constraint=constraints
343 )
344 constraint.append(fieldSpec.name)
345 # If this is a governor dimension, add a foreign key constraint to the
346 # collection_summary_<dimension> table.
347 if isinstance(dimension, GovernorDimension) and constraints:
348 tableSpec.foreignKeys.append(
349 ddl.ForeignKeySpec(
350 f"collection_summary_{dimension.name}",
351 source=(collectionFieldSpec.name, fieldSpec.name),
352 target=(collectionFieldSpec.name, fieldSpec.name),
353 )
354 )
355 # Actually add the unique constraint.
356 tableSpec.unique.add(tuple(constraint))
357 return tableSpec
360def makeCalibTableSpec(
361 datasetType: DatasetType,
362 collections: type[CollectionManager],
363 TimespanReprClass: type[TimespanDatabaseRepresentation],
364 dtype: type,
365) -> ddl.TableSpec:
366 """Construct the specification for a dynamic (DatasetType-dependent) tag +
367 validity range table used by the classes in this package.
369 Parameters
370 ----------
371 datasetType : `DatasetType`
372 Dataset type to construct a spec for. Multiple dataset types may
373 share the same table.
374 collections : `type` [ `CollectionManager` ]
375 `CollectionManager` subclass that can be used to construct foreign keys
376 to the run and/or collection tables.
377 dtype: `type`
378 Type of the FK column, same as the column type of the PK column of
379 a referenced table (``dataset.id``).
381 Returns
382 -------
383 spec : `ddl.TableSpec`
384 Specification for the table.
385 """
386 tableSpec = ddl.TableSpec(
387 fields=[
388 # This table has no natural primary key, compound or otherwise, so
389 # we add an autoincrement key. We may use this field a bit
390 # internally, but its presence is an implementation detail and it
391 # shouldn't appear as a foreign key in any other tables.
392 ddl.FieldSpec("id", dtype=sqlalchemy.BigInteger, autoincrement=True, primaryKey=True),
393 # Foreign key fields to dataset, collection, and usually dimension
394 # tables added below. The dataset_type_id field here is redundant
395 # with the one in the main monolithic dataset table, but this bit
396 # of denormalization lets us define what should be a much more
397 # useful index.
398 ddl.FieldSpec("dataset_type_id", dtype=sqlalchemy.BigInteger, nullable=False),
399 ],
400 foreignKeys=[
401 ddl.ForeignKeySpec("dataset_type", source=("dataset_type_id",), target=("id",)),
402 ],
403 )
404 # Record fields that should go in the temporal lookup index/constraint,
405 # starting with the dataset type.
406 index: list[str | type[TimespanDatabaseRepresentation]] = ["dataset_type_id"]
407 # Add foreign key fields to dataset table (not part of the temporal
408 # lookup/constraint).
409 addDatasetForeignKey(tableSpec, dtype, nullable=False, onDelete="CASCADE")
410 # Add foreign key fields to collection table (part of the temporal lookup
411 # index/constraint).
412 collectionFieldSpec = collections.addCollectionForeignKey(tableSpec, nullable=False, onDelete="CASCADE")
413 index.append(collectionFieldSpec.name)
414 # Add foreign key constraint to the collection_summary_dataset_type table.
415 tableSpec.foreignKeys.append(
416 ddl.ForeignKeySpec(
417 "collection_summary_dataset_type",
418 source=(collectionFieldSpec.name, "dataset_type_id"),
419 target=(collectionFieldSpec.name, "dataset_type_id"),
420 )
421 )
422 # Add dimension fields (part of the temporal lookup index.constraint).
423 for dimension in datasetType.dimensions.required:
424 fieldSpec = addDimensionForeignKey(tableSpec, dimension=dimension, nullable=False, primaryKey=False)
425 index.append(fieldSpec.name)
426 # If this is a governor dimension, add a foreign key constraint to the
427 # collection_summary_<dimension> table.
428 if isinstance(dimension, GovernorDimension):
429 tableSpec.foreignKeys.append(
430 ddl.ForeignKeySpec(
431 f"collection_summary_{dimension.name}",
432 source=(collectionFieldSpec.name, fieldSpec.name),
433 target=(collectionFieldSpec.name, fieldSpec.name),
434 )
435 )
436 # Add validity-range field(s) (part of the temporal lookup
437 # index/constraint).
438 tsFieldSpecs = TimespanReprClass.makeFieldSpecs(nullable=False)
439 for fieldSpec in tsFieldSpecs:
440 tableSpec.fields.add(fieldSpec)
441 if TimespanReprClass.hasExclusionConstraint(): 441 ↛ 446line 441 didn't jump to line 446, because the condition on line 441 was never true
442 # This database's timespan representation can define a database-level
443 # constraint that prevents overlapping validity ranges for entries with
444 # the same DatasetType, collection, and data ID.
445 # This also creates an index.
446 index.append(TimespanReprClass)
447 tableSpec.exclusion.add(tuple(index))
448 else:
449 # No database-level constraint possible. We'll have to simulate that
450 # in our DatasetRecordStorage.certify() implementation, and just create
451 # a regular index here in the hope that helps with lookups.
452 index.extend(fieldSpec.name for fieldSpec in tsFieldSpecs)
453 tableSpec.indexes.add(ddl.IndexSpec(*index)) # type: ignore
454 return tableSpec