Coverage for python/lsst/daf/butler/registry/datasets/byDimensions/tables.py: 96%
62 statements
« prev ^ index » next coverage.py v6.4.2, created at 2022-08-03 02:29 -0700
« prev ^ index » next coverage.py v6.4.2, created at 2022-08-03 02:29 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = (
25 "addDatasetForeignKey",
26 "makeCalibTableName",
27 "makeCalibTableSpec",
28 "makeStaticTableSpecs",
29 "makeTagTableName",
30 "makeTagTableSpec",
31 "StaticDatasetTablesTuple",
32)
34from collections import namedtuple
35from typing import Any, List, Optional, Type, Union
37import sqlalchemy
38from lsst.daf.butler import (
39 DatasetType,
40 DimensionUniverse,
41 GovernorDimension,
42 TimespanDatabaseRepresentation,
43 addDimensionForeignKey,
44 ddl,
45)
46from lsst.daf.butler.registry.interfaces import CollectionManager
48DATASET_TYPE_NAME_LENGTH = 128
51StaticDatasetTablesTuple = namedtuple(
52 "StaticDatasetTablesTuple",
53 [
54 "dataset_type",
55 "dataset",
56 ],
57)
60def addDatasetForeignKey(
61 tableSpec: ddl.TableSpec,
62 dtype: type,
63 *,
64 name: str = "dataset",
65 onDelete: Optional[str] = None,
66 constraint: bool = True,
67 **kwargs: Any,
68) -> ddl.FieldSpec:
69 """Add a foreign key column for datasets and (optionally) a constraint to
70 a table.
72 This is an internal interface for the ``byDimensions`` package; external
73 code should use `DatasetRecordStorageManager.addDatasetForeignKey` instead.
75 Parameters
76 ----------
77 tableSpec : `ddl.TableSpec`
78 Specification for the table that should reference the dataset
79 table. Will be modified in place.
80 dtype: `type`
81 Type of the column, same as the column type of the PK column of
82 a referenced table (``dataset.id``).
83 name: `str`, optional
84 A name to use for the prefix of the new field; the full name is
85 ``{name}_id``.
86 onDelete: `str`, optional
87 One of "CASCADE" or "SET NULL", indicating what should happen to
88 the referencing row if the collection row is deleted. `None`
89 indicates that this should be an integrity error.
90 constraint: `bool`, optional
91 If `False` (`True` is default), add a field that can be joined to
92 the dataset primary key, but do not add a foreign key constraint.
93 **kwargs
94 Additional keyword arguments are forwarded to the `ddl.FieldSpec`
95 constructor (only the ``name`` and ``dtype`` arguments are
96 otherwise provided).
98 Returns
99 -------
100 idSpec : `ddl.FieldSpec`
101 Specification for the ID field.
102 """
103 idFieldSpec = ddl.FieldSpec(f"{name}_id", dtype=dtype, **kwargs)
104 tableSpec.fields.add(idFieldSpec)
105 if constraint:
106 tableSpec.foreignKeys.append(
107 ddl.ForeignKeySpec("dataset", source=(idFieldSpec.name,), target=("id",), onDelete=onDelete)
108 )
109 return idFieldSpec
112def makeStaticTableSpecs(
113 collections: Type[CollectionManager],
114 universe: DimensionUniverse,
115 dtype: type,
116 autoincrement: bool,
117) -> StaticDatasetTablesTuple:
118 """Construct all static tables used by the classes in this package.
120 Static tables are those that are present in all Registries and do not
121 depend on what DatasetTypes have been registered.
123 Parameters
124 ----------
125 collections: `CollectionManager`
126 Manager object for the collections in this `Registry`.
127 universe : `DimensionUniverse`
128 Universe graph containing all dimensions known to this `Registry`.
129 dtype: `type`
130 Type of the dataset ID (primary key) column.
131 autoincrement: `bool`
132 If `True` then dataset ID column will be auto-incrementing.
134 Returns
135 -------
136 specs : `StaticDatasetTablesTuple`
137 A named tuple containing `ddl.TableSpec` instances.
138 """
139 specs = StaticDatasetTablesTuple(
140 dataset_type=ddl.TableSpec(
141 fields=[
142 ddl.FieldSpec(
143 name="id",
144 dtype=sqlalchemy.BigInteger,
145 autoincrement=True,
146 primaryKey=True,
147 doc=(
148 "Autoincrement ID that uniquely identifies a dataset "
149 "type in other tables. Python code outside the "
150 "`Registry` class should never interact with this; "
151 "its existence is considered an implementation detail."
152 ),
153 ),
154 ddl.FieldSpec(
155 name="name",
156 dtype=sqlalchemy.String,
157 length=DATASET_TYPE_NAME_LENGTH,
158 nullable=False,
159 doc="String name that uniquely identifies a dataset type.",
160 ),
161 ddl.FieldSpec(
162 name="storage_class",
163 dtype=sqlalchemy.String,
164 length=64,
165 nullable=False,
166 doc=(
167 "Name of the storage class associated with all "
168 "datasets of this type. Storage classes are "
169 "generally associated with a Python class, and are "
170 "enumerated in butler configuration."
171 ),
172 ),
173 ddl.FieldSpec(
174 name="dimensions_key",
175 dtype=sqlalchemy.BigInteger,
176 nullable=False,
177 doc="Unique key for the set of dimensions that identifies datasets of this type.",
178 ),
179 ddl.FieldSpec(
180 name="tag_association_table",
181 dtype=sqlalchemy.String,
182 length=128,
183 nullable=False,
184 doc=(
185 "Name of the table that holds associations between "
186 "datasets of this type and most types of collections."
187 ),
188 ),
189 ddl.FieldSpec(
190 name="calibration_association_table",
191 dtype=sqlalchemy.String,
192 length=128,
193 nullable=True,
194 doc=(
195 "Name of the table that holds associations between "
196 "datasets of this type and CALIBRATION collections. "
197 "NULL values indicate dataset types with "
198 "isCalibration=False."
199 ),
200 ),
201 ],
202 unique=[("name",)],
203 ),
204 dataset=ddl.TableSpec(
205 fields=[
206 ddl.FieldSpec(
207 name="id",
208 dtype=dtype,
209 autoincrement=autoincrement,
210 primaryKey=True,
211 doc="A unique field used as the primary key for dataset.",
212 ),
213 ddl.FieldSpec(
214 name="dataset_type_id",
215 dtype=sqlalchemy.BigInteger,
216 nullable=False,
217 doc=("Reference to the associated entry in the dataset_type table."),
218 ),
219 ddl.FieldSpec(
220 name="ingest_date",
221 dtype=sqlalchemy.TIMESTAMP,
222 default=sqlalchemy.sql.func.now(),
223 nullable=False,
224 doc="Time of dataset ingestion.",
225 ),
226 # Foreign key field/constraint to run added below.
227 ],
228 foreignKeys=[
229 ddl.ForeignKeySpec("dataset_type", source=("dataset_type_id",), target=("id",)),
230 ],
231 ),
232 )
233 # Add foreign key fields programmatically.
234 collections.addRunForeignKey(specs.dataset, onDelete="CASCADE", nullable=False)
235 return specs
238def makeTagTableName(datasetType: DatasetType, dimensionsKey: int) -> str:
239 """Construct the name for a dynamic (DatasetType-dependent) tag table used
240 by the classes in this package.
242 Parameters
243 ----------
244 datasetType : `DatasetType`
245 Dataset type to construct a name for. Multiple dataset types may
246 share the same table.
247 dimensionsKey : `int`
248 Integer key used to save ``datasetType.dimensions`` to the database.
250 Returns
251 -------
252 name : `str`
253 Name for the table.
254 """
255 return f"dataset_tags_{dimensionsKey:08d}"
258def makeCalibTableName(datasetType: DatasetType, dimensionsKey: int) -> str:
259 """Construct the name for a dynamic (DatasetType-dependent) tag + validity
260 range table used by the classes in this package.
262 Parameters
263 ----------
264 datasetType : `DatasetType`
265 Dataset type to construct a name for. Multiple dataset types may
266 share the same table.
267 dimensionsKey : `int`
268 Integer key used to save ``datasetType.dimensions`` to the database.
270 Returns
271 -------
272 name : `str`
273 Name for the table.
274 """
275 assert datasetType.isCalibration()
276 return f"dataset_calibs_{dimensionsKey:08d}"
279def makeTagTableSpec(
280 datasetType: DatasetType, collections: Type[CollectionManager], dtype: type, *, constraints: bool = True
281) -> ddl.TableSpec:
282 """Construct the specification for a dynamic (DatasetType-dependent) tag
283 table used by the classes in this package.
285 Parameters
286 ----------
287 datasetType : `DatasetType`
288 Dataset type to construct a spec for. Multiple dataset types may
289 share the same table.
290 collections : `type` [ `CollectionManager` ]
291 `CollectionManager` subclass that can be used to construct foreign keys
292 to the run and/or collection tables.
293 dtype : `type`
294 Type of the FK column, same as the column type of the PK column of
295 a referenced table (``dataset.id``).
296 constraints : `bool`, optional
297 If `False` (`True` is default), do not define foreign key constraints.
299 Returns
300 -------
301 spec : `ddl.TableSpec`
302 Specification for the table.
303 """
304 tableSpec = ddl.TableSpec(
305 fields=[
306 # Foreign key fields to dataset, collection, and usually dimension
307 # tables added below.
308 # The dataset_type_id field here would be redundant with the one
309 # in the main monolithic dataset table, but we need it here for an
310 # important unique constraint.
311 ddl.FieldSpec("dataset_type_id", dtype=sqlalchemy.BigInteger, nullable=False),
312 ]
313 )
314 if constraints:
315 tableSpec.foreignKeys.append(
316 ddl.ForeignKeySpec("dataset_type", source=("dataset_type_id",), target=("id",))
317 )
318 # We'll also have a unique constraint on dataset type, collection, and data
319 # ID. We only include the required part of the data ID, as that's
320 # sufficient and saves us from worrying about nulls in the constraint.
321 constraint = ["dataset_type_id"]
322 # Add foreign key fields to dataset table (part of the primary key)
323 addDatasetForeignKey(tableSpec, dtype, primaryKey=True, onDelete="CASCADE", constraint=constraints)
324 # Add foreign key fields to collection table (part of the primary key and
325 # the data ID unique constraint).
326 collectionFieldSpec = collections.addCollectionForeignKey(
327 tableSpec, primaryKey=True, onDelete="CASCADE", constraint=constraints
328 )
329 constraint.append(collectionFieldSpec.name)
330 # Add foreign key constraint to the collection_summary_dataset_type table.
331 if constraints:
332 tableSpec.foreignKeys.append(
333 ddl.ForeignKeySpec(
334 "collection_summary_dataset_type",
335 source=(collectionFieldSpec.name, "dataset_type_id"),
336 target=(collectionFieldSpec.name, "dataset_type_id"),
337 )
338 )
339 for dimension in datasetType.dimensions.required:
340 fieldSpec = addDimensionForeignKey(
341 tableSpec, dimension=dimension, nullable=False, primaryKey=False, constraint=constraints
342 )
343 constraint.append(fieldSpec.name)
344 # If this is a governor dimension, add a foreign key constraint to the
345 # collection_summary_<dimension> table.
346 if isinstance(dimension, GovernorDimension) and constraints:
347 tableSpec.foreignKeys.append(
348 ddl.ForeignKeySpec(
349 f"collection_summary_{dimension.name}",
350 source=(collectionFieldSpec.name, fieldSpec.name),
351 target=(collectionFieldSpec.name, fieldSpec.name),
352 )
353 )
354 # Actually add the unique constraint.
355 tableSpec.unique.add(tuple(constraint))
356 return tableSpec
359def makeCalibTableSpec(
360 datasetType: DatasetType,
361 collections: Type[CollectionManager],
362 TimespanReprClass: Type[TimespanDatabaseRepresentation],
363 dtype: type,
364) -> ddl.TableSpec:
365 """Construct the specification for a dynamic (DatasetType-dependent) tag +
366 validity range table used by the classes in this package.
368 Parameters
369 ----------
370 datasetType : `DatasetType`
371 Dataset type to construct a spec for. Multiple dataset types may
372 share the same table.
373 collections : `type` [ `CollectionManager` ]
374 `CollectionManager` subclass that can be used to construct foreign keys
375 to the run and/or collection tables.
376 dtype: `type`
377 Type of the FK column, same as the column type of the PK column of
378 a referenced table (``dataset.id``).
380 Returns
381 -------
382 spec : `ddl.TableSpec`
383 Specification for the table.
384 """
385 tableSpec = ddl.TableSpec(
386 fields=[
387 # This table has no natural primary key, compound or otherwise, so
388 # we add an autoincrement key. We may use this field a bit
389 # internally, but its presence is an implementation detail and it
390 # shouldn't appear as a foreign key in any other tables.
391 ddl.FieldSpec("id", dtype=sqlalchemy.BigInteger, autoincrement=True, primaryKey=True),
392 # Foreign key fields to dataset, collection, and usually dimension
393 # tables added below. The dataset_type_id field here is redundant
394 # with the one in the main monolithic dataset table, but this bit
395 # of denormalization lets us define what should be a much more
396 # useful index.
397 ddl.FieldSpec("dataset_type_id", dtype=sqlalchemy.BigInteger, nullable=False),
398 ],
399 foreignKeys=[
400 ddl.ForeignKeySpec("dataset_type", source=("dataset_type_id",), target=("id",)),
401 ],
402 )
403 # Record fields that should go in the temporal lookup index/constraint,
404 # starting with the dataset type.
405 index: List[Union[str, Type[TimespanDatabaseRepresentation]]] = ["dataset_type_id"]
406 # Add foreign key fields to dataset table (not part of the temporal
407 # lookup/constraint).
408 addDatasetForeignKey(tableSpec, dtype, nullable=False, onDelete="CASCADE")
409 # Add foreign key fields to collection table (part of the temporal lookup
410 # index/constraint).
411 collectionFieldSpec = collections.addCollectionForeignKey(tableSpec, nullable=False, onDelete="CASCADE")
412 index.append(collectionFieldSpec.name)
413 # Add foreign key constraint to the collection_summary_dataset_type table.
414 tableSpec.foreignKeys.append(
415 ddl.ForeignKeySpec(
416 "collection_summary_dataset_type",
417 source=(collectionFieldSpec.name, "dataset_type_id"),
418 target=(collectionFieldSpec.name, "dataset_type_id"),
419 )
420 )
421 # Add dimension fields (part of the temporal lookup index.constraint).
422 for dimension in datasetType.dimensions.required:
423 fieldSpec = addDimensionForeignKey(tableSpec, dimension=dimension, nullable=False, primaryKey=False)
424 index.append(fieldSpec.name)
425 # If this is a governor dimension, add a foreign key constraint to the
426 # collection_summary_<dimension> table.
427 if isinstance(dimension, GovernorDimension):
428 tableSpec.foreignKeys.append(
429 ddl.ForeignKeySpec(
430 f"collection_summary_{dimension.name}",
431 source=(collectionFieldSpec.name, fieldSpec.name),
432 target=(collectionFieldSpec.name, fieldSpec.name),
433 )
434 )
435 # Add validity-range field(s) (part of the temporal lookup
436 # index/constraint).
437 tsFieldSpecs = TimespanReprClass.makeFieldSpecs(nullable=False)
438 for fieldSpec in tsFieldSpecs:
439 tableSpec.fields.add(fieldSpec)
440 if TimespanReprClass.hasExclusionConstraint(): 440 ↛ 445line 440 didn't jump to line 445, because the condition on line 440 was never true
441 # This database's timespan representation can define a database-level
442 # constraint that prevents overlapping validity ranges for entries with
443 # the same DatasetType, collection, and data ID.
444 # This also creates an index.
445 index.append(TimespanReprClass)
446 tableSpec.exclusion.add(tuple(index))
447 else:
448 # No database-level constraint possible. We'll have to simulate that
449 # in our DatasetRecordStorage.certify() implementation, and just create
450 # a regular index here in the hope that helps with lookups.
451 index.extend(fieldSpec.name for fieldSpec in tsFieldSpecs)
452 tableSpec.indexes.add(tuple(index)) # type: ignore
453 return tableSpec