Coverage for python/lsst/daf/butler/registry/datasets/byDimensions/tables.py : 97%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = (
25 "addDatasetForeignKey",
26 "makeCalibTableName",
27 "makeCalibTableSpec",
28 "makeStaticTableSpecs",
29 "makeTagTableName",
30 "makeTagTableSpec",
31 "StaticDatasetTablesTuple",
32)
34from typing import (
35 Any,
36 Generic,
37 List,
38 Optional,
39 Type,
40 TypeVar,
41 Union,
42)
44from collections import namedtuple
46import sqlalchemy
48from lsst.daf.butler import (
49 DatasetType,
50 ddl,
51 DimensionUniverse,
52 GovernorDimension,
53 NamedKeyDict,
54 NamedKeyMapping,
55)
56from lsst.daf.butler import addDimensionForeignKey, TimespanDatabaseRepresentation
57from lsst.daf.butler.registry.interfaces import (
58 CollectionManager,
59 Database,
60 DimensionRecordStorageManager,
61 StaticTablesContext,
62)
65DATASET_TYPE_NAME_LENGTH = 128
68StaticDatasetTablesTuple = namedtuple(
69 "StaticDatasetTablesTuple",
70 [
71 "dataset_type",
72 "dataset",
73 ]
74)
77def addDatasetForeignKey(tableSpec: ddl.TableSpec, *,
78 name: str = "dataset",
79 onDelete: Optional[str] = None,
80 constraint: bool = True,
81 **kwargs: Any) -> ddl.FieldSpec:
82 """Add a foreign key column for datasets and (optionally) a constraint to
83 a table.
85 This is an internal interface for the ``byDimensions`` package; external
86 code should use `DatasetRecordStorageManager.addDatasetForeignKey` instead.
88 Parameters
89 ----------
90 tableSpec : `ddl.TableSpec`
91 Specification for the table that should reference the dataset
92 table. Will be modified in place.
93 name: `str`, optional
94 A name to use for the prefix of the new field; the full name is
95 ``{name}_id``.
96 onDelete: `str`, optional
97 One of "CASCADE" or "SET NULL", indicating what should happen to
98 the referencing row if the collection row is deleted. `None`
99 indicates that this should be an integrity error.
100 constraint: `bool`, optional
101 If `False` (`True` is default), add a field that can be joined to
102 the dataset primary key, but do not add a foreign key constraint.
103 **kwargs
104 Additional keyword arguments are forwarded to the `ddl.FieldSpec`
105 constructor (only the ``name`` and ``dtype`` arguments are
106 otherwise provided).
108 Returns
109 -------
110 idSpec : `ddl.FieldSpec`
111 Specification for the ID field.
112 """
113 idFieldSpec = ddl.FieldSpec(f"{name}_id", dtype=sqlalchemy.BigInteger, **kwargs)
114 tableSpec.fields.add(idFieldSpec)
115 if constraint:
116 tableSpec.foreignKeys.append(ddl.ForeignKeySpec("dataset", source=(idFieldSpec.name,),
117 target=("id",), onDelete=onDelete))
118 return idFieldSpec
121def makeStaticTableSpecs(collections: Type[CollectionManager],
122 universe: DimensionUniverse,
123 ) -> StaticDatasetTablesTuple:
124 """Construct all static tables used by the classes in this package.
126 Static tables are those that are present in all Registries and do not
127 depend on what DatasetTypes have been registered.
129 Parameters
130 ----------
131 collections: `CollectionManager`
132 Manager object for the collections in this `Registry`.
133 universe : `DimensionUniverse`
134 Universe graph containing all dimensions known to this `Registry`.
136 Returns
137 -------
138 specs : `StaticDatasetTablesTuple`
139 A named tuple containing `ddl.TableSpec` instances.
140 """
141 specs = StaticDatasetTablesTuple(
142 dataset_type=ddl.TableSpec(
143 fields=[
144 ddl.FieldSpec(
145 name="id",
146 dtype=sqlalchemy.BigInteger,
147 autoincrement=True,
148 primaryKey=True,
149 doc=(
150 "Autoincrement ID that uniquely identifies a dataset "
151 "type in other tables. Python code outside the "
152 "`Registry` class should never interact with this; "
153 "its existence is considered an implementation detail."
154 ),
155 ),
156 ddl.FieldSpec(
157 name="name",
158 dtype=sqlalchemy.String,
159 length=DATASET_TYPE_NAME_LENGTH,
160 nullable=False,
161 doc="String name that uniquely identifies a dataset type.",
162 ),
163 ddl.FieldSpec(
164 name="storage_class",
165 dtype=sqlalchemy.String,
166 length=64,
167 nullable=False,
168 doc=(
169 "Name of the storage class associated with all "
170 "datasets of this type. Storage classes are "
171 "generally associated with a Python class, and are "
172 "enumerated in butler configuration."
173 )
174 ),
175 ddl.FieldSpec(
176 name="dimensions_key",
177 dtype=sqlalchemy.BigInteger,
178 nullable=False,
179 doc=(
180 "Unique key for the set of dimensions that identifies "
181 "datasets of this type."
182 ),
183 ),
184 ddl.FieldSpec(
185 name="tag_association_table",
186 dtype=sqlalchemy.String,
187 length=128,
188 nullable=False,
189 doc=(
190 "Name of the table that holds associations between "
191 "datasets of this type and most types of collections."
192 ),
193 ),
194 ddl.FieldSpec(
195 name="calibration_association_table",
196 dtype=sqlalchemy.String,
197 length=128,
198 nullable=True,
199 doc=(
200 "Name of the table that holds associations between "
201 "datasets of this type and CALIBRATION collections. "
202 "NULL values indicate dataset types with "
203 "isCalibration=False."
204 ),
205 ),
206 ],
207 unique=[("name",)],
208 ),
209 dataset=ddl.TableSpec(
210 fields=[
211 ddl.FieldSpec(
212 name="id",
213 dtype=sqlalchemy.BigInteger,
214 autoincrement=True,
215 primaryKey=True,
216 doc="A unique autoincrement field used as the primary key for dataset.",
217 ),
218 ddl.FieldSpec(
219 name="dataset_type_id",
220 dtype=sqlalchemy.BigInteger,
221 nullable=False,
222 doc=(
223 "Reference to the associated entry in the dataset_type "
224 "table."
225 ),
226 ),
227 ddl.FieldSpec(
228 name="ingest_date",
229 dtype=sqlalchemy.TIMESTAMP,
230 default=sqlalchemy.sql.func.now(),
231 nullable=False,
232 doc="Time of dataset ingestion.",
233 ),
234 # Foreign key field/constraint to run added below.
235 ],
236 foreignKeys=[
237 ddl.ForeignKeySpec("dataset_type", source=("dataset_type_id",), target=("id",)),
238 ]
239 ),
240 )
241 # Add foreign key fields programmatically.
242 collections.addRunForeignKey(specs.dataset, onDelete="CASCADE", nullable=False)
243 return specs
246_T = TypeVar("_T")
249class CollectionSummaryTables(Generic[_T]):
250 """Structure that holds the table or table specification objects that
251 summarize the contents of collections.
253 Parameters
254 ----------
255 datasetType
256 Table [specification] that summarizes which dataset types are in each
257 collection.
258 dimensions
259 Mapping of table [specifications] that summarize which governor
260 dimension values are present in the data IDs of each collection.
261 """
262 def __init__(
263 self,
264 datasetType: _T,
265 dimensions: NamedKeyMapping[GovernorDimension, _T],
266 ):
267 self.datasetType = datasetType
268 self.dimensions = dimensions
270 @classmethod
271 def initialize(
272 cls,
273 db: Database,
274 context: StaticTablesContext, *,
275 collections: CollectionManager,
276 dimensions: DimensionRecordStorageManager,
277 ) -> CollectionSummaryTables[sqlalchemy.schema.Table]:
278 """Create all summary tables (or check that they have been created).
280 Parameters
281 ----------
282 db : `Database`
283 Interface to the underlying database engine and namespace.
284 context : `StaticTablesContext`
285 Context object obtained from `Database.declareStaticTables`; used
286 to declare any tables that should always be present.
287 collections: `CollectionManager`
288 Manager object for the collections in this `Registry`.
289 dimensions : `DimensionRecordStorageManager`
290 Manager object for the dimensions in this `Registry`.
292 Returns
293 -------
294 tables : `CollectionSummaryTables` [ `sqlalchemy.schema.Table` ]
295 Structure containing table objects.
296 """
297 specs = cls.makeTableSpecs(collections, dimensions)
298 return CollectionSummaryTables(
299 datasetType=context.addTable("collection_summary_dataset_type", specs.datasetType),
300 dimensions=NamedKeyDict({
301 dimension: context.addTable(f"collection_summary_{dimension.name}", spec)
302 for dimension, spec in specs.dimensions.items()
303 }).freeze(),
304 )
306 @classmethod
307 def makeTableSpecs(
308 cls,
309 collections: CollectionManager,
310 dimensions: DimensionRecordStorageManager,
311 ) -> CollectionSummaryTables[ddl.TableSpec]:
312 """Create specifications for all summary tables.
314 Parameters
315 ----------
316 collections: `CollectionManager`
317 Manager object for the collections in this `Registry`.
318 dimensions : `DimensionRecordStorageManager`
319 Manager object for the dimensions in this `Registry`.
321 Returns
322 -------
323 tables : `CollectionSummaryTables` [ `ddl.TableSpec` ]
324 Structure containing table specifications.
325 """
326 # Spec for collection_summary_dataset_type.
327 datasetTypeTableSpec = ddl.TableSpec(fields=[])
328 collections.addCollectionForeignKey(datasetTypeTableSpec, primaryKey=True, onDelete="CASCADE")
329 datasetTypeTableSpec.fields.add(
330 ddl.FieldSpec("dataset_type_id", dtype=sqlalchemy.BigInteger, primaryKey=True)
331 )
332 datasetTypeTableSpec.foreignKeys.append(
333 ddl.ForeignKeySpec("dataset_type", source=("dataset_type_id",), target=("id",),
334 onDelete="CASCADE")
335 )
336 # Specs for collection_summary_<dimension>.
337 dimensionTableSpecs = NamedKeyDict[GovernorDimension, ddl.TableSpec]()
338 for dimension in dimensions.universe.getGovernorDimensions():
339 tableSpec = ddl.TableSpec(fields=[])
340 collections.addCollectionForeignKey(tableSpec, primaryKey=True, onDelete="CASCADE")
341 addDimensionForeignKey(tableSpec, dimension, primaryKey=True)
342 dimensionTableSpecs[dimension] = tableSpec
343 return CollectionSummaryTables(
344 datasetType=datasetTypeTableSpec,
345 dimensions=dimensionTableSpecs.freeze(),
346 )
349def makeTagTableName(datasetType: DatasetType, dimensionsKey: int) -> str:
350 """Construct the name for a dynamic (DatasetType-dependent) tag table used
351 by the classes in this package.
353 Parameters
354 ----------
355 datasetType : `DatasetType`
356 Dataset type to construct a name for. Multiple dataset types may
357 share the same table.
358 dimensionsKey : `int`
359 Integer key used to save ``datasetType.dimensions`` to the database.
361 Returns
362 -------
363 name : `str`
364 Name for the table.
365 """
366 return f"dataset_tags_{dimensionsKey:08d}"
369def makeCalibTableName(datasetType: DatasetType, dimensionsKey: int) -> str:
370 """Construct the name for a dynamic (DatasetType-dependent) tag + validity
371 range table used by the classes in this package.
373 Parameters
374 ----------
375 datasetType : `DatasetType`
376 Dataset type to construct a name for. Multiple dataset types may
377 share the same table.
378 dimensionsKey : `int`
379 Integer key used to save ``datasetType.dimensions`` to the database.
381 Returns
382 -------
383 name : `str`
384 Name for the table.
385 """
386 assert datasetType.isCalibration()
387 return f"dataset_calibs_{dimensionsKey:08d}"
390def makeTagTableSpec(datasetType: DatasetType, collections: Type[CollectionManager]) -> ddl.TableSpec:
391 """Construct the specification for a dynamic (DatasetType-dependent) tag
392 table used by the classes in this package.
394 Parameters
395 ----------
396 datasetType : `DatasetType`
397 Dataset type to construct a spec for. Multiple dataset types may
398 share the same table.
399 collections : `type` [ `CollectionManager` ]
400 `CollectionManager` subclass that can be used to construct foreign keys
401 to the run and/or collection tables.
403 Returns
404 -------
405 spec : `ddl.TableSpec`
406 Specification for the table.
407 """
408 tableSpec = ddl.TableSpec(
409 fields=[
410 # Foreign key fields to dataset, collection, and usually dimension
411 # tables added below.
412 # The dataset_type_id field here would be redundant with the one
413 # in the main monolithic dataset table, but we need it here for an
414 # important unique constraint.
415 ddl.FieldSpec("dataset_type_id", dtype=sqlalchemy.BigInteger, nullable=False),
416 ],
417 foreignKeys=[
418 ddl.ForeignKeySpec("dataset_type", source=("dataset_type_id",), target=("id",)),
419 ]
420 )
421 # We'll also have a unique constraint on dataset type, collection, and data
422 # ID. We only include the required part of the data ID, as that's
423 # sufficient and saves us from worrying about nulls in the constraint.
424 constraint = ["dataset_type_id"]
425 # Add foreign key fields to dataset table (part of the primary key)
426 addDatasetForeignKey(tableSpec, primaryKey=True, onDelete="CASCADE")
427 # Add foreign key fields to collection table (part of the primary key and
428 # the data ID unique constraint).
429 collectionFieldSpec = collections.addCollectionForeignKey(tableSpec, primaryKey=True, onDelete="CASCADE")
430 constraint.append(collectionFieldSpec.name)
431 # Add foreign key constraint to the collection_summary_dataset_type table.
432 tableSpec.foreignKeys.append(
433 ddl.ForeignKeySpec(
434 "collection_summary_dataset_type",
435 source=(collectionFieldSpec.name, "dataset_type_id"),
436 target=(collectionFieldSpec.name, "dataset_type_id"),
437 )
438 )
439 for dimension in datasetType.dimensions.required:
440 fieldSpec = addDimensionForeignKey(tableSpec, dimension=dimension, nullable=False, primaryKey=False)
441 constraint.append(fieldSpec.name)
442 # If this is a governor dimension, add a foreign key constraint to the
443 # collection_summary_<dimension> table.
444 if isinstance(dimension, GovernorDimension):
445 tableSpec.foreignKeys.append(
446 ddl.ForeignKeySpec(
447 f"collection_summary_{dimension.name}",
448 source=(collectionFieldSpec.name, fieldSpec.name),
449 target=(collectionFieldSpec.name, fieldSpec.name),
450 )
451 )
452 # Actually add the unique constraint.
453 tableSpec.unique.add(tuple(constraint))
454 return tableSpec
457def makeCalibTableSpec(datasetType: DatasetType, collections: Type[CollectionManager],
458 tsRepr: Type[TimespanDatabaseRepresentation]) -> ddl.TableSpec:
459 """Construct the specification for a dynamic (DatasetType-dependent) tag +
460 validity range table used by the classes in this package.
462 Parameters
463 ----------
464 datasetType : `DatasetType`
465 Dataset type to construct a spec for. Multiple dataset types may
466 share the same table.
467 collections : `type` [ `CollectionManager` ]
468 `CollectionManager` subclass that can be used to construct foreign keys
469 to the run and/or collection tables.
471 Returns
472 -------
473 spec : `ddl.TableSpec`
474 Specification for the table.
475 """
476 tableSpec = ddl.TableSpec(
477 fields=[
478 # This table has no natural primary key, compound or otherwise, so
479 # we add an autoincrement key. We may use this field a bit
480 # internally, but its presence is an implementation detail and it
481 # shouldn't appear as a foreign key in any other tables.
482 ddl.FieldSpec("id", dtype=sqlalchemy.BigInteger, autoincrement=True, primaryKey=True),
483 # Foreign key fields to dataset, collection, and usually dimension
484 # tables added below. The dataset_type_id field here is redundant
485 # with the one in the main monolithic dataset table, but this bit
486 # of denormalization lets us define what should be a much more
487 # useful index.
488 ddl.FieldSpec("dataset_type_id", dtype=sqlalchemy.BigInteger, nullable=False),
489 ],
490 foreignKeys=[
491 ddl.ForeignKeySpec("dataset_type", source=("dataset_type_id",), target=("id",)),
492 ]
493 )
494 # Record fields that should go in the temporal lookup index/constraint,
495 # starting with the dataset type.
496 index: List[Union[str, Type[TimespanDatabaseRepresentation]]] = ["dataset_type_id"]
497 # Add foreign key fields to dataset table (not part of the temporal
498 # lookup/constraint).
499 addDatasetForeignKey(tableSpec, nullable=False, onDelete="CASCADE")
500 # Add foreign key fields to collection table (part of the temporal lookup
501 # index/constraint).
502 collectionFieldSpec = collections.addCollectionForeignKey(tableSpec, nullable=False, onDelete="CASCADE")
503 index.append(collectionFieldSpec.name)
504 # Add foreign key constraint to the collection_summary_dataset_type table.
505 tableSpec.foreignKeys.append(
506 ddl.ForeignKeySpec(
507 "collection_summary_dataset_type",
508 source=(collectionFieldSpec.name, "dataset_type_id"),
509 target=(collectionFieldSpec.name, "dataset_type_id"),
510 )
511 )
512 # Add dimension fields (part of the temporal lookup index.constraint).
513 for dimension in datasetType.dimensions.required:
514 fieldSpec = addDimensionForeignKey(tableSpec, dimension=dimension, nullable=False, primaryKey=False)
515 index.append(fieldSpec.name)
516 # If this is a governor dimension, add a foreign key constraint to the
517 # collection_summary_<dimension> table.
518 if isinstance(dimension, GovernorDimension):
519 tableSpec.foreignKeys.append(
520 ddl.ForeignKeySpec(
521 f"collection_summary_{dimension.name}",
522 source=(collectionFieldSpec.name, fieldSpec.name),
523 target=(collectionFieldSpec.name, fieldSpec.name),
524 )
525 )
526 # Add validity-range field(s) (part of the temporal lookup
527 # index/constraint).
528 tsFieldSpecs = tsRepr.makeFieldSpecs(nullable=False)
529 for fieldSpec in tsFieldSpecs:
530 tableSpec.fields.add(fieldSpec)
531 if tsRepr.hasExclusionConstraint(): 531 ↛ 536line 531 didn't jump to line 536, because the condition on line 531 was never true
532 # This database's timespan representation can define a database-level
533 # constraint that prevents overlapping validity ranges for entries with
534 # the same DatasetType, collection, and data ID.
535 # This also creates an index.
536 index.append(tsRepr)
537 tableSpec.exclusion.add(tuple(index))
538 else:
539 # No database-level constraint possible. We'll have to simulate that
540 # in our DatasetRecordStorage.certify() implementation, and just create
541 # a regular index here in the hope that helps with lookups.
542 index.extend(fieldSpec.name for fieldSpec in tsFieldSpecs)
543 tableSpec.indexes.add(tuple(index)) # type: ignore
544 return tableSpec