Coverage for python/lsst/daf/butler/registry/datasets/byDimensions/tables.py: 97%
67 statements
« prev ^ index » next coverage.py v7.3.1, created at 2023-10-02 07:59 +0000
« prev ^ index » next coverage.py v7.3.1, created at 2023-10-02 07:59 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30__all__ = (
31 "addDatasetForeignKey",
32 "makeCalibTableName",
33 "makeCalibTableSpec",
34 "makeStaticTableSpecs",
35 "makeTagTableName",
36 "makeTagTableSpec",
37 "StaticDatasetTablesTuple",
38)
40from collections import namedtuple
41from typing import Any
43import sqlalchemy
45from ....core import (
46 DatasetType,
47 DimensionUniverse,
48 GovernorDimension,
49 TimespanDatabaseRepresentation,
50 addDimensionForeignKey,
51 ddl,
52)
53from ...interfaces import CollectionManager, VersionTuple
55DATASET_TYPE_NAME_LENGTH = 128
58StaticDatasetTablesTuple = namedtuple(
59 "StaticDatasetTablesTuple",
60 [
61 "dataset_type",
62 "dataset",
63 ],
64)
67def addDatasetForeignKey(
68 tableSpec: ddl.TableSpec,
69 dtype: type,
70 *,
71 name: str = "dataset",
72 onDelete: str | None = None,
73 constraint: bool = True,
74 **kwargs: Any,
75) -> ddl.FieldSpec:
76 """Add a foreign key column for datasets and (optionally) a constraint to
77 a table.
79 This is an internal interface for the ``byDimensions`` package; external
80 code should use `DatasetRecordStorageManager.addDatasetForeignKey` instead.
82 Parameters
83 ----------
84 tableSpec : `ddl.TableSpec`
85 Specification for the table that should reference the dataset
86 table. Will be modified in place.
87 dtype: `type`
88 Type of the column, same as the column type of the PK column of
89 a referenced table (``dataset.id``).
90 name: `str`, optional
91 A name to use for the prefix of the new field; the full name is
92 ``{name}_id``.
93 onDelete: `str`, optional
94 One of "CASCADE" or "SET NULL", indicating what should happen to
95 the referencing row if the collection row is deleted. `None`
96 indicates that this should be an integrity error.
97 constraint: `bool`, optional
98 If `False` (`True` is default), add a field that can be joined to
99 the dataset primary key, but do not add a foreign key constraint.
100 **kwargs
101 Additional keyword arguments are forwarded to the `ddl.FieldSpec`
102 constructor (only the ``name`` and ``dtype`` arguments are
103 otherwise provided).
105 Returns
106 -------
107 idSpec : `ddl.FieldSpec`
108 Specification for the ID field.
109 """
110 idFieldSpec = ddl.FieldSpec(f"{name}_id", dtype=dtype, **kwargs)
111 tableSpec.fields.add(idFieldSpec)
112 if constraint:
113 tableSpec.foreignKeys.append(
114 ddl.ForeignKeySpec("dataset", source=(idFieldSpec.name,), target=("id",), onDelete=onDelete)
115 )
116 return idFieldSpec
119def makeStaticTableSpecs(
120 collections: type[CollectionManager],
121 universe: DimensionUniverse,
122 dtype: type,
123 autoincrement: bool,
124 schema_version: VersionTuple,
125) -> StaticDatasetTablesTuple:
126 """Construct all static tables used by the classes in this package.
128 Static tables are those that are present in all Registries and do not
129 depend on what DatasetTypes have been registered.
131 Parameters
132 ----------
133 collections: `CollectionManager`
134 Manager object for the collections in this `Registry`.
135 universe : `DimensionUniverse`
136 Universe graph containing all dimensions known to this `Registry`.
137 dtype: `type`
138 Type of the dataset ID (primary key) column.
139 autoincrement: `bool`
140 If `True` then dataset ID column will be auto-incrementing.
142 Returns
143 -------
144 specs : `StaticDatasetTablesTuple`
145 A named tuple containing `ddl.TableSpec` instances.
146 """
147 ingest_date_type: type
148 ingest_date_default: Any = None
149 if schema_version.major > 1:
150 ingest_date_type = ddl.AstropyTimeNsecTai
151 else:
152 ingest_date_type = sqlalchemy.TIMESTAMP
153 # New code provides explicit values for ingest_data, but we keep
154 # default just to be consistent with the existing schema.
155 ingest_date_default = sqlalchemy.sql.func.now()
157 specs = StaticDatasetTablesTuple(
158 dataset_type=ddl.TableSpec(
159 fields=[
160 ddl.FieldSpec(
161 name="id",
162 dtype=sqlalchemy.BigInteger,
163 autoincrement=True,
164 primaryKey=True,
165 doc=(
166 "Autoincrement ID that uniquely identifies a dataset "
167 "type in other tables. Python code outside the "
168 "`Registry` class should never interact with this; "
169 "its existence is considered an implementation detail."
170 ),
171 ),
172 ddl.FieldSpec(
173 name="name",
174 dtype=sqlalchemy.String,
175 length=DATASET_TYPE_NAME_LENGTH,
176 nullable=False,
177 doc="String name that uniquely identifies a dataset type.",
178 ),
179 ddl.FieldSpec(
180 name="storage_class",
181 dtype=sqlalchemy.String,
182 length=64,
183 nullable=False,
184 doc=(
185 "Name of the storage class associated with all "
186 "datasets of this type. Storage classes are "
187 "generally associated with a Python class, and are "
188 "enumerated in butler configuration."
189 ),
190 ),
191 ddl.FieldSpec(
192 name="dimensions_key",
193 dtype=sqlalchemy.BigInteger,
194 nullable=False,
195 doc="Unique key for the set of dimensions that identifies datasets of this type.",
196 ),
197 ddl.FieldSpec(
198 name="tag_association_table",
199 dtype=sqlalchemy.String,
200 length=128,
201 nullable=False,
202 doc=(
203 "Name of the table that holds associations between "
204 "datasets of this type and most types of collections."
205 ),
206 ),
207 ddl.FieldSpec(
208 name="calibration_association_table",
209 dtype=sqlalchemy.String,
210 length=128,
211 nullable=True,
212 doc=(
213 "Name of the table that holds associations between "
214 "datasets of this type and CALIBRATION collections. "
215 "NULL values indicate dataset types with "
216 "isCalibration=False."
217 ),
218 ),
219 ],
220 unique=[("name",)],
221 ),
222 dataset=ddl.TableSpec(
223 fields=[
224 ddl.FieldSpec(
225 name="id",
226 dtype=dtype,
227 autoincrement=autoincrement,
228 primaryKey=True,
229 doc="A unique field used as the primary key for dataset.",
230 ),
231 ddl.FieldSpec(
232 name="dataset_type_id",
233 dtype=sqlalchemy.BigInteger,
234 nullable=False,
235 doc="Reference to the associated entry in the dataset_type table.",
236 ),
237 ddl.FieldSpec(
238 name="ingest_date",
239 dtype=ingest_date_type,
240 default=ingest_date_default,
241 nullable=False,
242 doc="Time of dataset ingestion.",
243 ),
244 # Foreign key field/constraint to run added below.
245 ],
246 foreignKeys=[
247 ddl.ForeignKeySpec("dataset_type", source=("dataset_type_id",), target=("id",)),
248 ],
249 ),
250 )
251 # Add foreign key fields programmatically.
252 collections.addRunForeignKey(specs.dataset, onDelete="CASCADE", nullable=False)
253 return specs
256def makeTagTableName(datasetType: DatasetType, dimensionsKey: int) -> str:
257 """Construct the name for a dynamic (DatasetType-dependent) tag table used
258 by the classes in this package.
260 Parameters
261 ----------
262 datasetType : `DatasetType`
263 Dataset type to construct a name for. Multiple dataset types may
264 share the same table.
265 dimensionsKey : `int`
266 Integer key used to save ``datasetType.dimensions`` to the database.
268 Returns
269 -------
270 name : `str`
271 Name for the table.
272 """
273 return f"dataset_tags_{dimensionsKey:08d}"
276def makeCalibTableName(datasetType: DatasetType, dimensionsKey: int) -> str:
277 """Construct the name for a dynamic (DatasetType-dependent) tag + validity
278 range table used by the classes in this package.
280 Parameters
281 ----------
282 datasetType : `DatasetType`
283 Dataset type to construct a name for. Multiple dataset types may
284 share the same table.
285 dimensionsKey : `int`
286 Integer key used to save ``datasetType.dimensions`` to the database.
288 Returns
289 -------
290 name : `str`
291 Name for the table.
292 """
293 assert datasetType.isCalibration()
294 return f"dataset_calibs_{dimensionsKey:08d}"
297def makeTagTableSpec(
298 datasetType: DatasetType, collections: type[CollectionManager], dtype: type, *, constraints: bool = True
299) -> ddl.TableSpec:
300 """Construct the specification for a dynamic (DatasetType-dependent) tag
301 table used by the classes in this package.
303 Parameters
304 ----------
305 datasetType : `DatasetType`
306 Dataset type to construct a spec for. Multiple dataset types may
307 share the same table.
308 collections : `type` [ `CollectionManager` ]
309 `CollectionManager` subclass that can be used to construct foreign keys
310 to the run and/or collection tables.
311 dtype : `type`
312 Type of the FK column, same as the column type of the PK column of
313 a referenced table (``dataset.id``).
314 constraints : `bool`, optional
315 If `False` (`True` is default), do not define foreign key constraints.
317 Returns
318 -------
319 spec : `ddl.TableSpec`
320 Specification for the table.
321 """
322 tableSpec = ddl.TableSpec(
323 fields=[
324 # Foreign key fields to dataset, collection, and usually dimension
325 # tables added below.
326 # The dataset_type_id field here would be redundant with the one
327 # in the main monolithic dataset table, but we need it here for an
328 # important unique constraint.
329 ddl.FieldSpec("dataset_type_id", dtype=sqlalchemy.BigInteger, nullable=False),
330 ]
331 )
332 if constraints:
333 tableSpec.foreignKeys.append(
334 ddl.ForeignKeySpec("dataset_type", source=("dataset_type_id",), target=("id",))
335 )
336 # We'll also have a unique constraint on dataset type, collection, and data
337 # ID. We only include the required part of the data ID, as that's
338 # sufficient and saves us from worrying about nulls in the constraint.
339 constraint = ["dataset_type_id"]
340 # Add foreign key fields to dataset table (part of the primary key)
341 addDatasetForeignKey(tableSpec, dtype, primaryKey=True, onDelete="CASCADE", constraint=constraints)
342 # Add foreign key fields to collection table (part of the primary key and
343 # the data ID unique constraint).
344 collectionFieldSpec = collections.addCollectionForeignKey(
345 tableSpec, primaryKey=True, onDelete="CASCADE", constraint=constraints
346 )
347 constraint.append(collectionFieldSpec.name)
348 # Add foreign key constraint to the collection_summary_dataset_type table.
349 if constraints:
350 tableSpec.foreignKeys.append(
351 ddl.ForeignKeySpec(
352 "collection_summary_dataset_type",
353 source=(collectionFieldSpec.name, "dataset_type_id"),
354 target=(collectionFieldSpec.name, "dataset_type_id"),
355 )
356 )
357 for dimension in datasetType.dimensions.required:
358 fieldSpec = addDimensionForeignKey(
359 tableSpec, dimension=dimension, nullable=False, primaryKey=False, constraint=constraints
360 )
361 constraint.append(fieldSpec.name)
362 # If this is a governor dimension, add a foreign key constraint to the
363 # collection_summary_<dimension> table.
364 if isinstance(dimension, GovernorDimension) and constraints:
365 tableSpec.foreignKeys.append(
366 ddl.ForeignKeySpec(
367 f"collection_summary_{dimension.name}",
368 source=(collectionFieldSpec.name, fieldSpec.name),
369 target=(collectionFieldSpec.name, fieldSpec.name),
370 )
371 )
372 # Actually add the unique constraint.
373 tableSpec.unique.add(tuple(constraint))
374 return tableSpec
377def makeCalibTableSpec(
378 datasetType: DatasetType,
379 collections: type[CollectionManager],
380 TimespanReprClass: type[TimespanDatabaseRepresentation],
381 dtype: type,
382) -> ddl.TableSpec:
383 """Construct the specification for a dynamic (DatasetType-dependent) tag +
384 validity range table used by the classes in this package.
386 Parameters
387 ----------
388 datasetType : `DatasetType`
389 Dataset type to construct a spec for. Multiple dataset types may
390 share the same table.
391 collections : `type` [ `CollectionManager` ]
392 `CollectionManager` subclass that can be used to construct foreign keys
393 to the run and/or collection tables.
394 dtype: `type`
395 Type of the FK column, same as the column type of the PK column of
396 a referenced table (``dataset.id``).
398 Returns
399 -------
400 spec : `ddl.TableSpec`
401 Specification for the table.
402 """
403 tableSpec = ddl.TableSpec(
404 fields=[
405 # This table has no natural primary key, compound or otherwise, so
406 # we add an autoincrement key. We may use this field a bit
407 # internally, but its presence is an implementation detail and it
408 # shouldn't appear as a foreign key in any other tables.
409 ddl.FieldSpec("id", dtype=sqlalchemy.BigInteger, autoincrement=True, primaryKey=True),
410 # Foreign key fields to dataset, collection, and usually dimension
411 # tables added below. The dataset_type_id field here is redundant
412 # with the one in the main monolithic dataset table, but this bit
413 # of denormalization lets us define what should be a much more
414 # useful index.
415 ddl.FieldSpec("dataset_type_id", dtype=sqlalchemy.BigInteger, nullable=False),
416 ],
417 foreignKeys=[
418 ddl.ForeignKeySpec("dataset_type", source=("dataset_type_id",), target=("id",)),
419 ],
420 )
421 # Record fields that should go in the temporal lookup index/constraint,
422 # starting with the dataset type.
423 index: list[str | type[TimespanDatabaseRepresentation]] = ["dataset_type_id"]
424 # Add foreign key fields to dataset table (not part of the temporal
425 # lookup/constraint).
426 addDatasetForeignKey(tableSpec, dtype, nullable=False, onDelete="CASCADE")
427 # Add foreign key fields to collection table (part of the temporal lookup
428 # index/constraint).
429 collectionFieldSpec = collections.addCollectionForeignKey(tableSpec, nullable=False, onDelete="CASCADE")
430 index.append(collectionFieldSpec.name)
431 # Add foreign key constraint to the collection_summary_dataset_type table.
432 tableSpec.foreignKeys.append(
433 ddl.ForeignKeySpec(
434 "collection_summary_dataset_type",
435 source=(collectionFieldSpec.name, "dataset_type_id"),
436 target=(collectionFieldSpec.name, "dataset_type_id"),
437 )
438 )
439 # Add dimension fields (part of the temporal lookup index.constraint).
440 for dimension in datasetType.dimensions.required:
441 fieldSpec = addDimensionForeignKey(tableSpec, dimension=dimension, nullable=False, primaryKey=False)
442 index.append(fieldSpec.name)
443 # If this is a governor dimension, add a foreign key constraint to the
444 # collection_summary_<dimension> table.
445 if isinstance(dimension, GovernorDimension):
446 tableSpec.foreignKeys.append(
447 ddl.ForeignKeySpec(
448 f"collection_summary_{dimension.name}",
449 source=(collectionFieldSpec.name, fieldSpec.name),
450 target=(collectionFieldSpec.name, fieldSpec.name),
451 )
452 )
453 # Add validity-range field(s) (part of the temporal lookup
454 # index/constraint).
455 tsFieldSpecs = TimespanReprClass.makeFieldSpecs(nullable=False)
456 for fieldSpec in tsFieldSpecs:
457 tableSpec.fields.add(fieldSpec)
458 if TimespanReprClass.hasExclusionConstraint(): 458 ↛ 463line 458 didn't jump to line 463, because the condition on line 458 was never true
459 # This database's timespan representation can define a database-level
460 # constraint that prevents overlapping validity ranges for entries with
461 # the same DatasetType, collection, and data ID.
462 # This also creates an index.
463 index.append(TimespanReprClass)
464 tableSpec.exclusion.add(tuple(index))
465 else:
466 # No database-level constraint possible. We'll have to simulate that
467 # in our DatasetRecordStorage.certify() implementation, and just create
468 # a regular index here in the hope that helps with lookups.
469 index.extend(fieldSpec.name for fieldSpec in tsFieldSpecs)
470 tableSpec.indexes.add(ddl.IndexSpec(*index)) # type: ignore
471 return tableSpec