Coverage for python/lsst/daf/butler/registry/datasets/byDimensions/tables.py: 96%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = (
25 "addDatasetForeignKey",
26 "makeCalibTableName",
27 "makeCalibTableSpec",
28 "makeStaticTableSpecs",
29 "makeTagTableName",
30 "makeTagTableSpec",
31 "StaticDatasetTablesTuple",
32)
34from typing import (
35 Any,
36 List,
37 Optional,
38 Type,
39 Union,
40)
42from collections import namedtuple
44import sqlalchemy
46from lsst.daf.butler import (
47 DatasetType,
48 ddl,
49 DimensionUniverse,
50 GovernorDimension,
51)
52from lsst.daf.butler import addDimensionForeignKey, TimespanDatabaseRepresentation
53from lsst.daf.butler.registry.interfaces import CollectionManager
56DATASET_TYPE_NAME_LENGTH = 128
59StaticDatasetTablesTuple = namedtuple(
60 "StaticDatasetTablesTuple",
61 [
62 "dataset_type",
63 "dataset",
64 ]
65)
68def addDatasetForeignKey(tableSpec: ddl.TableSpec, dtype: type, *,
69 name: str = "dataset",
70 onDelete: Optional[str] = None,
71 constraint: bool = True,
72 **kwargs: Any) -> ddl.FieldSpec:
73 """Add a foreign key column for datasets and (optionally) a constraint to
74 a table.
76 This is an internal interface for the ``byDimensions`` package; external
77 code should use `DatasetRecordStorageManager.addDatasetForeignKey` instead.
79 Parameters
80 ----------
81 tableSpec : `ddl.TableSpec`
82 Specification for the table that should reference the dataset
83 table. Will be modified in place.
84 dtype: `type`
85 Type of the column, same as the column type of the PK column of
86 a referenced table (``dataset.id``).
87 name: `str`, optional
88 A name to use for the prefix of the new field; the full name is
89 ``{name}_id``.
90 onDelete: `str`, optional
91 One of "CASCADE" or "SET NULL", indicating what should happen to
92 the referencing row if the collection row is deleted. `None`
93 indicates that this should be an integrity error.
94 constraint: `bool`, optional
95 If `False` (`True` is default), add a field that can be joined to
96 the dataset primary key, but do not add a foreign key constraint.
97 **kwargs
98 Additional keyword arguments are forwarded to the `ddl.FieldSpec`
99 constructor (only the ``name`` and ``dtype`` arguments are
100 otherwise provided).
102 Returns
103 -------
104 idSpec : `ddl.FieldSpec`
105 Specification for the ID field.
106 """
107 idFieldSpec = ddl.FieldSpec(f"{name}_id", dtype=dtype, **kwargs)
108 tableSpec.fields.add(idFieldSpec)
109 if constraint:
110 tableSpec.foreignKeys.append(ddl.ForeignKeySpec("dataset", source=(idFieldSpec.name,),
111 target=("id",), onDelete=onDelete))
112 return idFieldSpec
115def makeStaticTableSpecs(collections: Type[CollectionManager],
116 universe: DimensionUniverse,
117 dtype: type,
118 autoincrement: bool,
119 ) -> StaticDatasetTablesTuple:
120 """Construct all static tables used by the classes in this package.
122 Static tables are those that are present in all Registries and do not
123 depend on what DatasetTypes have been registered.
125 Parameters
126 ----------
127 collections: `CollectionManager`
128 Manager object for the collections in this `Registry`.
129 universe : `DimensionUniverse`
130 Universe graph containing all dimensions known to this `Registry`.
131 dtype: `type`
132 Type of the dataset ID (primary key) column.
133 autoincrement: `bool`
134 If `True` then dataset ID column will be auto-incrementing.
136 Returns
137 -------
138 specs : `StaticDatasetTablesTuple`
139 A named tuple containing `ddl.TableSpec` instances.
140 """
141 specs = StaticDatasetTablesTuple(
142 dataset_type=ddl.TableSpec(
143 fields=[
144 ddl.FieldSpec(
145 name="id",
146 dtype=sqlalchemy.BigInteger,
147 autoincrement=True,
148 primaryKey=True,
149 doc=(
150 "Autoincrement ID that uniquely identifies a dataset "
151 "type in other tables. Python code outside the "
152 "`Registry` class should never interact with this; "
153 "its existence is considered an implementation detail."
154 ),
155 ),
156 ddl.FieldSpec(
157 name="name",
158 dtype=sqlalchemy.String,
159 length=DATASET_TYPE_NAME_LENGTH,
160 nullable=False,
161 doc="String name that uniquely identifies a dataset type.",
162 ),
163 ddl.FieldSpec(
164 name="storage_class",
165 dtype=sqlalchemy.String,
166 length=64,
167 nullable=False,
168 doc=(
169 "Name of the storage class associated with all "
170 "datasets of this type. Storage classes are "
171 "generally associated with a Python class, and are "
172 "enumerated in butler configuration."
173 )
174 ),
175 ddl.FieldSpec(
176 name="dimensions_key",
177 dtype=sqlalchemy.BigInteger,
178 nullable=False,
179 doc=(
180 "Unique key for the set of dimensions that identifies "
181 "datasets of this type."
182 ),
183 ),
184 ddl.FieldSpec(
185 name="tag_association_table",
186 dtype=sqlalchemy.String,
187 length=128,
188 nullable=False,
189 doc=(
190 "Name of the table that holds associations between "
191 "datasets of this type and most types of collections."
192 ),
193 ),
194 ddl.FieldSpec(
195 name="calibration_association_table",
196 dtype=sqlalchemy.String,
197 length=128,
198 nullable=True,
199 doc=(
200 "Name of the table that holds associations between "
201 "datasets of this type and CALIBRATION collections. "
202 "NULL values indicate dataset types with "
203 "isCalibration=False."
204 ),
205 ),
206 ],
207 unique=[("name",)],
208 ),
209 dataset=ddl.TableSpec(
210 fields=[
211 ddl.FieldSpec(
212 name="id",
213 dtype=dtype,
214 autoincrement=autoincrement,
215 primaryKey=True,
216 doc="A unique field used as the primary key for dataset.",
217 ),
218 ddl.FieldSpec(
219 name="dataset_type_id",
220 dtype=sqlalchemy.BigInteger,
221 nullable=False,
222 doc=(
223 "Reference to the associated entry in the dataset_type "
224 "table."
225 ),
226 ),
227 ddl.FieldSpec(
228 name="ingest_date",
229 dtype=sqlalchemy.TIMESTAMP,
230 default=sqlalchemy.sql.func.now(),
231 nullable=False,
232 doc="Time of dataset ingestion.",
233 ),
234 # Foreign key field/constraint to run added below.
235 ],
236 foreignKeys=[
237 ddl.ForeignKeySpec("dataset_type", source=("dataset_type_id",), target=("id",)),
238 ]
239 ),
240 )
241 # Add foreign key fields programmatically.
242 collections.addRunForeignKey(specs.dataset, onDelete="CASCADE", nullable=False)
243 return specs
246def makeTagTableName(datasetType: DatasetType, dimensionsKey: int) -> str:
247 """Construct the name for a dynamic (DatasetType-dependent) tag table used
248 by the classes in this package.
250 Parameters
251 ----------
252 datasetType : `DatasetType`
253 Dataset type to construct a name for. Multiple dataset types may
254 share the same table.
255 dimensionsKey : `int`
256 Integer key used to save ``datasetType.dimensions`` to the database.
258 Returns
259 -------
260 name : `str`
261 Name for the table.
262 """
263 return f"dataset_tags_{dimensionsKey:08d}"
266def makeCalibTableName(datasetType: DatasetType, dimensionsKey: int) -> str:
267 """Construct the name for a dynamic (DatasetType-dependent) tag + validity
268 range table used by the classes in this package.
270 Parameters
271 ----------
272 datasetType : `DatasetType`
273 Dataset type to construct a name for. Multiple dataset types may
274 share the same table.
275 dimensionsKey : `int`
276 Integer key used to save ``datasetType.dimensions`` to the database.
278 Returns
279 -------
280 name : `str`
281 Name for the table.
282 """
283 assert datasetType.isCalibration()
284 return f"dataset_calibs_{dimensionsKey:08d}"
287def makeTagTableSpec(datasetType: DatasetType, collections: Type[CollectionManager],
288 dtype: type, *, constraints: bool = True) -> ddl.TableSpec:
289 """Construct the specification for a dynamic (DatasetType-dependent) tag
290 table used by the classes in this package.
292 Parameters
293 ----------
294 datasetType : `DatasetType`
295 Dataset type to construct a spec for. Multiple dataset types may
296 share the same table.
297 collections : `type` [ `CollectionManager` ]
298 `CollectionManager` subclass that can be used to construct foreign keys
299 to the run and/or collection tables.
300 dtype : `type`
301 Type of the FK column, same as the column type of the PK column of
302 a referenced table (``dataset.id``).
303 constraints : `bool`, optional
304 If `False` (`True` is default), do not define foreign key constraints.
306 Returns
307 -------
308 spec : `ddl.TableSpec`
309 Specification for the table.
310 """
311 tableSpec = ddl.TableSpec(
312 fields=[
313 # Foreign key fields to dataset, collection, and usually dimension
314 # tables added below.
315 # The dataset_type_id field here would be redundant with the one
316 # in the main monolithic dataset table, but we need it here for an
317 # important unique constraint.
318 ddl.FieldSpec("dataset_type_id", dtype=sqlalchemy.BigInteger, nullable=False),
319 ]
320 )
321 if constraints:
322 tableSpec.foreignKeys.append(
323 ddl.ForeignKeySpec("dataset_type", source=("dataset_type_id",), target=("id",))
324 )
325 # We'll also have a unique constraint on dataset type, collection, and data
326 # ID. We only include the required part of the data ID, as that's
327 # sufficient and saves us from worrying about nulls in the constraint.
328 constraint = ["dataset_type_id"]
329 # Add foreign key fields to dataset table (part of the primary key)
330 addDatasetForeignKey(tableSpec, dtype, primaryKey=True, onDelete="CASCADE", constraint=constraints)
331 # Add foreign key fields to collection table (part of the primary key and
332 # the data ID unique constraint).
333 collectionFieldSpec = collections.addCollectionForeignKey(tableSpec, primaryKey=True, onDelete="CASCADE",
334 constraint=constraints)
335 constraint.append(collectionFieldSpec.name)
336 # Add foreign key constraint to the collection_summary_dataset_type table.
337 if constraints:
338 tableSpec.foreignKeys.append(
339 ddl.ForeignKeySpec(
340 "collection_summary_dataset_type",
341 source=(collectionFieldSpec.name, "dataset_type_id"),
342 target=(collectionFieldSpec.name, "dataset_type_id"),
343 )
344 )
345 for dimension in datasetType.dimensions.required:
346 fieldSpec = addDimensionForeignKey(tableSpec, dimension=dimension, nullable=False, primaryKey=False,
347 constraint=constraints)
348 constraint.append(fieldSpec.name)
349 # If this is a governor dimension, add a foreign key constraint to the
350 # collection_summary_<dimension> table.
351 if isinstance(dimension, GovernorDimension) and constraints:
352 tableSpec.foreignKeys.append(
353 ddl.ForeignKeySpec(
354 f"collection_summary_{dimension.name}",
355 source=(collectionFieldSpec.name, fieldSpec.name),
356 target=(collectionFieldSpec.name, fieldSpec.name),
357 )
358 )
359 # Actually add the unique constraint.
360 tableSpec.unique.add(tuple(constraint))
361 return tableSpec
364def makeCalibTableSpec(datasetType: DatasetType, collections: Type[CollectionManager],
365 TimespanReprClass: Type[TimespanDatabaseRepresentation],
366 dtype: type) -> ddl.TableSpec:
367 """Construct the specification for a dynamic (DatasetType-dependent) tag +
368 validity range table used by the classes in this package.
370 Parameters
371 ----------
372 datasetType : `DatasetType`
373 Dataset type to construct a spec for. Multiple dataset types may
374 share the same table.
375 collections : `type` [ `CollectionManager` ]
376 `CollectionManager` subclass that can be used to construct foreign keys
377 to the run and/or collection tables.
378 dtype: `type`
379 Type of the FK column, same as the column type of the PK column of
380 a referenced table (``dataset.id``).
382 Returns
383 -------
384 spec : `ddl.TableSpec`
385 Specification for the table.
386 """
387 tableSpec = ddl.TableSpec(
388 fields=[
389 # This table has no natural primary key, compound or otherwise, so
390 # we add an autoincrement key. We may use this field a bit
391 # internally, but its presence is an implementation detail and it
392 # shouldn't appear as a foreign key in any other tables.
393 ddl.FieldSpec("id", dtype=sqlalchemy.BigInteger, autoincrement=True, primaryKey=True),
394 # Foreign key fields to dataset, collection, and usually dimension
395 # tables added below. The dataset_type_id field here is redundant
396 # with the one in the main monolithic dataset table, but this bit
397 # of denormalization lets us define what should be a much more
398 # useful index.
399 ddl.FieldSpec("dataset_type_id", dtype=sqlalchemy.BigInteger, nullable=False),
400 ],
401 foreignKeys=[
402 ddl.ForeignKeySpec("dataset_type", source=("dataset_type_id",), target=("id",)),
403 ]
404 )
405 # Record fields that should go in the temporal lookup index/constraint,
406 # starting with the dataset type.
407 index: List[Union[str, Type[TimespanDatabaseRepresentation]]] = ["dataset_type_id"]
408 # Add foreign key fields to dataset table (not part of the temporal
409 # lookup/constraint).
410 addDatasetForeignKey(tableSpec, dtype, nullable=False, onDelete="CASCADE")
411 # Add foreign key fields to collection table (part of the temporal lookup
412 # index/constraint).
413 collectionFieldSpec = collections.addCollectionForeignKey(tableSpec, nullable=False, onDelete="CASCADE")
414 index.append(collectionFieldSpec.name)
415 # Add foreign key constraint to the collection_summary_dataset_type table.
416 tableSpec.foreignKeys.append(
417 ddl.ForeignKeySpec(
418 "collection_summary_dataset_type",
419 source=(collectionFieldSpec.name, "dataset_type_id"),
420 target=(collectionFieldSpec.name, "dataset_type_id"),
421 )
422 )
423 # Add dimension fields (part of the temporal lookup index.constraint).
424 for dimension in datasetType.dimensions.required:
425 fieldSpec = addDimensionForeignKey(tableSpec, dimension=dimension, nullable=False, primaryKey=False)
426 index.append(fieldSpec.name)
427 # If this is a governor dimension, add a foreign key constraint to the
428 # collection_summary_<dimension> table.
429 if isinstance(dimension, GovernorDimension):
430 tableSpec.foreignKeys.append(
431 ddl.ForeignKeySpec(
432 f"collection_summary_{dimension.name}",
433 source=(collectionFieldSpec.name, fieldSpec.name),
434 target=(collectionFieldSpec.name, fieldSpec.name),
435 )
436 )
437 # Add validity-range field(s) (part of the temporal lookup
438 # index/constraint).
439 tsFieldSpecs = TimespanReprClass.makeFieldSpecs(nullable=False)
440 for fieldSpec in tsFieldSpecs:
441 tableSpec.fields.add(fieldSpec)
442 if TimespanReprClass.hasExclusionConstraint(): 442 ↛ 447line 442 didn't jump to line 447, because the condition on line 442 was never true
443 # This database's timespan representation can define a database-level
444 # constraint that prevents overlapping validity ranges for entries with
445 # the same DatasetType, collection, and data ID.
446 # This also creates an index.
447 index.append(TimespanReprClass)
448 tableSpec.exclusion.add(tuple(index))
449 else:
450 # No database-level constraint possible. We'll have to simulate that
451 # in our DatasetRecordStorage.certify() implementation, and just create
452 # a regular index here in the hope that helps with lookups.
453 index.extend(fieldSpec.name for fieldSpec in tsFieldSpecs)
454 tableSpec.indexes.add(tuple(index)) # type: ignore
455 return tableSpec