Coverage for python/lsst/daf/butler/registry/datasets/byDimensions/tables.py : 96%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = (
25 "addDatasetForeignKey",
26 "makeCalibTableName",
27 "makeCalibTableSpec",
28 "makeStaticTableSpecs",
29 "makeTagTableName",
30 "makeTagTableSpec",
31 "StaticDatasetTablesTuple",
32)
34from typing import (
35 Any,
36 List,
37 Optional,
38 Type,
39 Union,
40)
42from collections import namedtuple
44import sqlalchemy
46from lsst.daf.butler import (
47 DatasetType,
48 ddl,
49 DimensionUniverse,
50 GovernorDimension,
51)
52from lsst.daf.butler import addDimensionForeignKey, TimespanDatabaseRepresentation
53from lsst.daf.butler.registry.interfaces import CollectionManager
56DATASET_TYPE_NAME_LENGTH = 128
59StaticDatasetTablesTuple = namedtuple(
60 "StaticDatasetTablesTuple",
61 [
62 "dataset_type",
63 "dataset",
64 ]
65)
68def addDatasetForeignKey(tableSpec: ddl.TableSpec, *,
69 name: str = "dataset",
70 onDelete: Optional[str] = None,
71 constraint: bool = True,
72 **kwargs: Any) -> ddl.FieldSpec:
73 """Add a foreign key column for datasets and (optionally) a constraint to
74 a table.
76 This is an internal interface for the ``byDimensions`` package; external
77 code should use `DatasetRecordStorageManager.addDatasetForeignKey` instead.
79 Parameters
80 ----------
81 tableSpec : `ddl.TableSpec`
82 Specification for the table that should reference the dataset
83 table. Will be modified in place.
84 name: `str`, optional
85 A name to use for the prefix of the new field; the full name is
86 ``{name}_id``.
87 onDelete: `str`, optional
88 One of "CASCADE" or "SET NULL", indicating what should happen to
89 the referencing row if the collection row is deleted. `None`
90 indicates that this should be an integrity error.
91 constraint: `bool`, optional
92 If `False` (`True` is default), add a field that can be joined to
93 the dataset primary key, but do not add a foreign key constraint.
94 **kwargs
95 Additional keyword arguments are forwarded to the `ddl.FieldSpec`
96 constructor (only the ``name`` and ``dtype`` arguments are
97 otherwise provided).
99 Returns
100 -------
101 idSpec : `ddl.FieldSpec`
102 Specification for the ID field.
103 """
104 idFieldSpec = ddl.FieldSpec(f"{name}_id", dtype=sqlalchemy.BigInteger, **kwargs)
105 tableSpec.fields.add(idFieldSpec)
106 if constraint:
107 tableSpec.foreignKeys.append(ddl.ForeignKeySpec("dataset", source=(idFieldSpec.name,),
108 target=("id",), onDelete=onDelete))
109 return idFieldSpec
112def makeStaticTableSpecs(collections: Type[CollectionManager],
113 universe: DimensionUniverse,
114 ) -> StaticDatasetTablesTuple:
115 """Construct all static tables used by the classes in this package.
117 Static tables are those that are present in all Registries and do not
118 depend on what DatasetTypes have been registered.
120 Parameters
121 ----------
122 collections: `CollectionManager`
123 Manager object for the collections in this `Registry`.
124 universe : `DimensionUniverse`
125 Universe graph containing all dimensions known to this `Registry`.
127 Returns
128 -------
129 specs : `StaticDatasetTablesTuple`
130 A named tuple containing `ddl.TableSpec` instances.
131 """
132 specs = StaticDatasetTablesTuple(
133 dataset_type=ddl.TableSpec(
134 fields=[
135 ddl.FieldSpec(
136 name="id",
137 dtype=sqlalchemy.BigInteger,
138 autoincrement=True,
139 primaryKey=True,
140 doc=(
141 "Autoincrement ID that uniquely identifies a dataset "
142 "type in other tables. Python code outside the "
143 "`Registry` class should never interact with this; "
144 "its existence is considered an implementation detail."
145 ),
146 ),
147 ddl.FieldSpec(
148 name="name",
149 dtype=sqlalchemy.String,
150 length=DATASET_TYPE_NAME_LENGTH,
151 nullable=False,
152 doc="String name that uniquely identifies a dataset type.",
153 ),
154 ddl.FieldSpec(
155 name="storage_class",
156 dtype=sqlalchemy.String,
157 length=64,
158 nullable=False,
159 doc=(
160 "Name of the storage class associated with all "
161 "datasets of this type. Storage classes are "
162 "generally associated with a Python class, and are "
163 "enumerated in butler configuration."
164 )
165 ),
166 ddl.FieldSpec(
167 name="dimensions_key",
168 dtype=sqlalchemy.BigInteger,
169 nullable=False,
170 doc=(
171 "Unique key for the set of dimensions that identifies "
172 "datasets of this type."
173 ),
174 ),
175 ddl.FieldSpec(
176 name="tag_association_table",
177 dtype=sqlalchemy.String,
178 length=128,
179 nullable=False,
180 doc=(
181 "Name of the table that holds associations between "
182 "datasets of this type and most types of collections."
183 ),
184 ),
185 ddl.FieldSpec(
186 name="calibration_association_table",
187 dtype=sqlalchemy.String,
188 length=128,
189 nullable=True,
190 doc=(
191 "Name of the table that holds associations between "
192 "datasets of this type and CALIBRATION collections. "
193 "NULL values indicate dataset types with "
194 "isCalibration=False."
195 ),
196 ),
197 ],
198 unique=[("name",)],
199 ),
200 dataset=ddl.TableSpec(
201 fields=[
202 ddl.FieldSpec(
203 name="id",
204 dtype=sqlalchemy.BigInteger,
205 autoincrement=True,
206 primaryKey=True,
207 doc="A unique autoincrement field used as the primary key for dataset.",
208 ),
209 ddl.FieldSpec(
210 name="dataset_type_id",
211 dtype=sqlalchemy.BigInteger,
212 nullable=False,
213 doc=(
214 "Reference to the associated entry in the dataset_type "
215 "table."
216 ),
217 ),
218 ddl.FieldSpec(
219 name="ingest_date",
220 dtype=sqlalchemy.TIMESTAMP,
221 default=sqlalchemy.sql.func.now(),
222 nullable=False,
223 doc="Time of dataset ingestion.",
224 ),
225 # Foreign key field/constraint to run added below.
226 ],
227 foreignKeys=[
228 ddl.ForeignKeySpec("dataset_type", source=("dataset_type_id",), target=("id",)),
229 ]
230 ),
231 )
232 # Add foreign key fields programmatically.
233 collections.addRunForeignKey(specs.dataset, onDelete="CASCADE", nullable=False)
234 return specs
237def makeTagTableName(datasetType: DatasetType, dimensionsKey: int) -> str:
238 """Construct the name for a dynamic (DatasetType-dependent) tag table used
239 by the classes in this package.
241 Parameters
242 ----------
243 datasetType : `DatasetType`
244 Dataset type to construct a name for. Multiple dataset types may
245 share the same table.
246 dimensionsKey : `int`
247 Integer key used to save ``datasetType.dimensions`` to the database.
249 Returns
250 -------
251 name : `str`
252 Name for the table.
253 """
254 return f"dataset_tags_{dimensionsKey:08d}"
257def makeCalibTableName(datasetType: DatasetType, dimensionsKey: int) -> str:
258 """Construct the name for a dynamic (DatasetType-dependent) tag + validity
259 range table used by the classes in this package.
261 Parameters
262 ----------
263 datasetType : `DatasetType`
264 Dataset type to construct a name for. Multiple dataset types may
265 share the same table.
266 dimensionsKey : `int`
267 Integer key used to save ``datasetType.dimensions`` to the database.
269 Returns
270 -------
271 name : `str`
272 Name for the table.
273 """
274 assert datasetType.isCalibration()
275 return f"dataset_calibs_{dimensionsKey:08d}"
278def makeTagTableSpec(datasetType: DatasetType, collections: Type[CollectionManager]) -> ddl.TableSpec:
279 """Construct the specification for a dynamic (DatasetType-dependent) tag
280 table used by the classes in this package.
282 Parameters
283 ----------
284 datasetType : `DatasetType`
285 Dataset type to construct a spec for. Multiple dataset types may
286 share the same table.
287 collections : `type` [ `CollectionManager` ]
288 `CollectionManager` subclass that can be used to construct foreign keys
289 to the run and/or collection tables.
291 Returns
292 -------
293 spec : `ddl.TableSpec`
294 Specification for the table.
295 """
296 tableSpec = ddl.TableSpec(
297 fields=[
298 # Foreign key fields to dataset, collection, and usually dimension
299 # tables added below.
300 # The dataset_type_id field here would be redundant with the one
301 # in the main monolithic dataset table, but we need it here for an
302 # important unique constraint.
303 ddl.FieldSpec("dataset_type_id", dtype=sqlalchemy.BigInteger, nullable=False),
304 ],
305 foreignKeys=[
306 ddl.ForeignKeySpec("dataset_type", source=("dataset_type_id",), target=("id",)),
307 ]
308 )
309 # We'll also have a unique constraint on dataset type, collection, and data
310 # ID. We only include the required part of the data ID, as that's
311 # sufficient and saves us from worrying about nulls in the constraint.
312 constraint = ["dataset_type_id"]
313 # Add foreign key fields to dataset table (part of the primary key)
314 addDatasetForeignKey(tableSpec, primaryKey=True, onDelete="CASCADE")
315 # Add foreign key fields to collection table (part of the primary key and
316 # the data ID unique constraint).
317 collectionFieldSpec = collections.addCollectionForeignKey(tableSpec, primaryKey=True, onDelete="CASCADE")
318 constraint.append(collectionFieldSpec.name)
319 # Add foreign key constraint to the collection_summary_dataset_type table.
320 tableSpec.foreignKeys.append(
321 ddl.ForeignKeySpec(
322 "collection_summary_dataset_type",
323 source=(collectionFieldSpec.name, "dataset_type_id"),
324 target=(collectionFieldSpec.name, "dataset_type_id"),
325 )
326 )
327 for dimension in datasetType.dimensions.required:
328 fieldSpec = addDimensionForeignKey(tableSpec, dimension=dimension, nullable=False, primaryKey=False)
329 constraint.append(fieldSpec.name)
330 # If this is a governor dimension, add a foreign key constraint to the
331 # collection_summary_<dimension> table.
332 if isinstance(dimension, GovernorDimension):
333 tableSpec.foreignKeys.append(
334 ddl.ForeignKeySpec(
335 f"collection_summary_{dimension.name}",
336 source=(collectionFieldSpec.name, fieldSpec.name),
337 target=(collectionFieldSpec.name, fieldSpec.name),
338 )
339 )
340 # Actually add the unique constraint.
341 tableSpec.unique.add(tuple(constraint))
342 return tableSpec
345def makeCalibTableSpec(datasetType: DatasetType, collections: Type[CollectionManager],
346 TimespanReprClass: Type[TimespanDatabaseRepresentation]) -> ddl.TableSpec:
347 """Construct the specification for a dynamic (DatasetType-dependent) tag +
348 validity range table used by the classes in this package.
350 Parameters
351 ----------
352 datasetType : `DatasetType`
353 Dataset type to construct a spec for. Multiple dataset types may
354 share the same table.
355 collections : `type` [ `CollectionManager` ]
356 `CollectionManager` subclass that can be used to construct foreign keys
357 to the run and/or collection tables.
359 Returns
360 -------
361 spec : `ddl.TableSpec`
362 Specification for the table.
363 """
364 tableSpec = ddl.TableSpec(
365 fields=[
366 # This table has no natural primary key, compound or otherwise, so
367 # we add an autoincrement key. We may use this field a bit
368 # internally, but its presence is an implementation detail and it
369 # shouldn't appear as a foreign key in any other tables.
370 ddl.FieldSpec("id", dtype=sqlalchemy.BigInteger, autoincrement=True, primaryKey=True),
371 # Foreign key fields to dataset, collection, and usually dimension
372 # tables added below. The dataset_type_id field here is redundant
373 # with the one in the main monolithic dataset table, but this bit
374 # of denormalization lets us define what should be a much more
375 # useful index.
376 ddl.FieldSpec("dataset_type_id", dtype=sqlalchemy.BigInteger, nullable=False),
377 ],
378 foreignKeys=[
379 ddl.ForeignKeySpec("dataset_type", source=("dataset_type_id",), target=("id",)),
380 ]
381 )
382 # Record fields that should go in the temporal lookup index/constraint,
383 # starting with the dataset type.
384 index: List[Union[str, Type[TimespanDatabaseRepresentation]]] = ["dataset_type_id"]
385 # Add foreign key fields to dataset table (not part of the temporal
386 # lookup/constraint).
387 addDatasetForeignKey(tableSpec, nullable=False, onDelete="CASCADE")
388 # Add foreign key fields to collection table (part of the temporal lookup
389 # index/constraint).
390 collectionFieldSpec = collections.addCollectionForeignKey(tableSpec, nullable=False, onDelete="CASCADE")
391 index.append(collectionFieldSpec.name)
392 # Add foreign key constraint to the collection_summary_dataset_type table.
393 tableSpec.foreignKeys.append(
394 ddl.ForeignKeySpec(
395 "collection_summary_dataset_type",
396 source=(collectionFieldSpec.name, "dataset_type_id"),
397 target=(collectionFieldSpec.name, "dataset_type_id"),
398 )
399 )
400 # Add dimension fields (part of the temporal lookup index.constraint).
401 for dimension in datasetType.dimensions.required:
402 fieldSpec = addDimensionForeignKey(tableSpec, dimension=dimension, nullable=False, primaryKey=False)
403 index.append(fieldSpec.name)
404 # If this is a governor dimension, add a foreign key constraint to the
405 # collection_summary_<dimension> table.
406 if isinstance(dimension, GovernorDimension):
407 tableSpec.foreignKeys.append(
408 ddl.ForeignKeySpec(
409 f"collection_summary_{dimension.name}",
410 source=(collectionFieldSpec.name, fieldSpec.name),
411 target=(collectionFieldSpec.name, fieldSpec.name),
412 )
413 )
414 # Add validity-range field(s) (part of the temporal lookup
415 # index/constraint).
416 tsFieldSpecs = TimespanReprClass.makeFieldSpecs(nullable=False)
417 for fieldSpec in tsFieldSpecs:
418 tableSpec.fields.add(fieldSpec)
419 if TimespanReprClass.hasExclusionConstraint(): 419 ↛ 424line 419 didn't jump to line 424, because the condition on line 419 was never true
420 # This database's timespan representation can define a database-level
421 # constraint that prevents overlapping validity ranges for entries with
422 # the same DatasetType, collection, and data ID.
423 # This also creates an index.
424 index.append(TimespanReprClass)
425 tableSpec.exclusion.add(tuple(index))
426 else:
427 # No database-level constraint possible. We'll have to simulate that
428 # in our DatasetRecordStorage.certify() implementation, and just create
429 # a regular index here in the hope that helps with lookups.
430 index.extend(fieldSpec.name for fieldSpec in tsFieldSpecs)
431 tableSpec.indexes.add(tuple(index)) # type: ignore
432 return tableSpec