Coverage for python/lsst/daf/butler/registry/tables.py : 43%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software=you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ["RegistryTablesTuple", "makeRegistryTableSpecs"]
25from collections import namedtuple
27import sqlalchemy
29from ..core.dimensions import DimensionUniverse
30from ..core.dimensions.schema import addDimensionForeignKey
32from ..core import ddl
34from .interfaces import CollectionManager
37RegistryTablesTuple = namedtuple(
38 "RegistryTablesTuple",
39 [
40 "dataset",
41 "dataset_composition",
42 "dataset_type",
43 "dataset_type_dimensions",
44 "dataset_collection",
45 "quantum",
46 "dataset_consumers",
47 "dataset_storage",
48 ]
49)
52def makeRegistryTableSpecs(universe: DimensionUniverse, collections: CollectionManager
53 ) -> RegistryTablesTuple:
54 """Construct descriptions of all tables in the Registry, aside from those
55 that correspond to `DimensionElement` instances.
57 Parameters
58 ----------
59 universe : `DimensionUniverse`
60 All dimensions known to the `Registry`.
61 collections : `Collection`
62 The `CollectionManager` that will be used for this `Registry`; used to
63 create foreign keys to the run and collection tables.
65 Returns
66 -------
67 specs : `RegistryTablesTuple`
68 A named tuple containing `ddl.TableSpec` instances.
69 """
70 # The 'dataset' table is special: we need to add foreign key fields for
71 # each dimension in the universe, as well as a foreign key field for run.
72 dataset = ddl.TableSpec(
73 fields=[
74 ddl.FieldSpec(
75 name="dataset_id",
76 dtype=sqlalchemy.BigInteger,
77 primaryKey=True,
78 autoincrement=True,
79 doc="A unique autoincrement field used as the primary key for dataset.",
80 ),
81 ddl.FieldSpec(
82 name="dataset_type_name",
83 dtype=sqlalchemy.String,
84 length=128,
85 nullable=False,
86 doc=(
87 "The name of the DatasetType associated with this dataset; a "
88 "reference to the dataset_type table."
89 ),
90 ),
91 ddl.FieldSpec(
92 name="quantum_id",
93 dtype=sqlalchemy.BigInteger,
94 doc=(
95 "The id of the quantum that produced this dataset, providing access "
96 "to fine-grained provenance information. May be null for datasets "
97 "not produced by running a PipelineTask."
98 ),
99 ),
100 ddl.FieldSpec(
101 name="dataset_ref_hash",
102 dtype=ddl.Base64Bytes,
103 nbytes=32,
104 nullable=False,
105 doc="Secure hash of the data ID (i.e. dimension link values) and dataset_type_name.",
106 ),
107 ],
108 foreignKeys=[
109 ddl.ForeignKeySpec(
110 table="dataset_type",
111 source=("dataset_type_name",),
112 target=("dataset_type_name",),
113 ),
114 ddl.ForeignKeySpec(
115 table="quantum",
116 source=("quantum_id",),
117 target=("id",),
118 onDelete="SET NULL",
119 ),
120 ],
121 )
122 field = collections.addRunForeignKey(dataset, onDelete="CASCADE", nullable=False)
123 dataset.unique.add(("dataset_ref_hash", field.name))
124 for dimension in universe.dimensions:
125 addDimensionForeignKey(dataset, dimension, primaryKey=False, nullable=True)
127 # The dataset_collection table needs a foreign key to collection.
128 dataset_collection = ddl.TableSpec(
129 doc=(
130 "A table that associates Dataset records with Collections, "
131 "which are implemented simply as string tags."
132 ),
133 fields=[
134 ddl.FieldSpec(
135 name="dataset_id",
136 dtype=sqlalchemy.BigInteger,
137 primaryKey=True,
138 nullable=False,
139 doc="Link to a unique record in the dataset table.",
140 ),
141 ddl.FieldSpec(
142 name="dataset_ref_hash",
143 dtype=ddl.Base64Bytes,
144 nbytes=32,
145 nullable=False,
146 doc="Secure hash of the data ID (i.e. dimension link values) and dataset_type_name.",
147 ),
148 ],
149 foreignKeys=[
150 ddl.ForeignKeySpec(
151 table="dataset",
152 source=("dataset_id",),
153 target=("dataset_id",),
154 onDelete="CASCADE",
155 )
156 ],
157 )
158 field = collections.addCollectionForeignKey(dataset_collection, onDelete="CASCADE", nullable=False)
159 dataset_collection.unique.add(("dataset_ref_hash", field.name))
161 # The quantum table needs a foreign key to run.
162 quantum = ddl.TableSpec(
163 doc="A table used to capture fine-grained provenance for datasets produced by PipelineTasks.",
164 fields=[
165 ddl.FieldSpec(
166 name="id",
167 dtype=sqlalchemy.BigInteger,
168 primaryKey=True,
169 autoincrement=True,
170 doc="A unique autoincrement integer identifier for this quantum.",
171 ),
172 ddl.FieldSpec(
173 name="task",
174 dtype=sqlalchemy.String,
175 length=256,
176 doc="Fully qualified name of the SuperTask that executed this quantum.",
177 ),
178 ddl.FieldSpec(
179 name="start_time",
180 dtype=ddl.AstropyTimeNsecTai,
181 nullable=True,
182 doc="The start time for the quantum.",
183 ),
184 ddl.FieldSpec(
185 name="end_time",
186 dtype=ddl.AstropyTimeNsecTai,
187 nullable=True,
188 doc="The end time for the quantum.",
189 ),
190 ddl.FieldSpec(
191 name="host",
192 dtype=sqlalchemy.String,
193 length=64,
194 nullable=True,
195 doc="The system on which the quantum was executed.",
196 ),
197 ],
198 )
199 collections.addRunForeignKey(quantum, onDelete="CASCADE", nullable=False)
201 # All other table specs are fully static and do not depend on
202 # configuration.
203 return RegistryTablesTuple(
204 dataset=dataset,
205 dataset_composition=ddl.TableSpec(
206 doc="A self-join table that relates components of a dataset to their parents.",
207 fields=[
208 ddl.FieldSpec(
209 name="parent_dataset_id",
210 dtype=sqlalchemy.BigInteger,
211 primaryKey=True,
212 doc="Link to the dataset entry for the parent/composite dataset.",
213 ),
214 ddl.FieldSpec(
215 name="component_dataset_id",
216 dtype=sqlalchemy.BigInteger,
217 primaryKey=True,
218 doc="Link to the dataset entry for a child/component dataset.",
219 ),
220 ddl.FieldSpec(
221 name="component_name",
222 dtype=sqlalchemy.String,
223 length=32,
224 nullable=False,
225 doc="Name of this component within this composite.",
226 ),
227 ],
228 foreignKeys=[
229 ddl.ForeignKeySpec(
230 table="dataset",
231 source=("parent_dataset_id",),
232 target=("dataset_id",),
233 onDelete="CASCADE",
234 ),
235 ddl.ForeignKeySpec(
236 table="dataset",
237 source=("component_dataset_id",),
238 target=("dataset_id",),
239 onDelete="CASCADE",
240 ),
241 ],
242 ),
243 dataset_type=ddl.TableSpec(
244 doc="A Table containing the set of registered DatasetTypes and their StorageClasses.",
245 fields=[
246 ddl.FieldSpec(
247 name="dataset_type_name",
248 dtype=sqlalchemy.String,
249 length=128,
250 primaryKey=True,
251 nullable=False,
252 doc="Globally unique name for this DatasetType.",
253 ),
254 ddl.FieldSpec(
255 name="storage_class",
256 dtype=sqlalchemy.String,
257 length=64,
258 nullable=False,
259 doc=(
260 "Name of the StorageClass associated with this DatasetType. All "
261 "registries must support the full set of standard StorageClasses, "
262 "so the set of allowed StorageClasses and their properties is "
263 "maintained in the registry Python code rather than the database."
264 ),
265 ),
266 ],
267 ),
268 dataset_type_dimensions=ddl.TableSpec(
269 doc=(
270 "A definition table indicating which dimension fields in Dataset are "
271 "non-NULL for Datasets with this DatasetType."
272 ),
273 fields=[
274 ddl.FieldSpec(
275 name="dataset_type_name",
276 dtype=sqlalchemy.String,
277 length=128,
278 primaryKey=True,
279 doc="The name of the DatasetType.",
280 ),
281 ddl.FieldSpec(
282 name="dimension_name",
283 dtype=sqlalchemy.String,
284 length=32,
285 primaryKey=True,
286 doc="The name of a Dimension associated with this DatasetType.",
287 ),
288 ],
289 foreignKeys=[
290 ddl.ForeignKeySpec(
291 table="dataset_type",
292 source=("dataset_type_name",),
293 target=("dataset_type_name",),
294 )
295 ],
296 ),
297 dataset_collection=dataset_collection,
298 quantum=quantum,
299 dataset_consumers=ddl.TableSpec(
300 doc="A table relating Quantum records to the Datasets they used as inputs.",
301 fields=[
302 ddl.FieldSpec(
303 name="quantum_id",
304 dtype=sqlalchemy.BigInteger,
305 nullable=False,
306 doc="A link to the associated Quantum.",
307 ),
308 ddl.FieldSpec(
309 name="dataset_id",
310 dtype=sqlalchemy.BigInteger,
311 nullable=False,
312 doc="A link to the associated Dataset.",
313 ),
314 ddl.FieldSpec(
315 name="actual",
316 dtype=sqlalchemy.Boolean,
317 nullable=False,
318 doc=(
319 "Whether the Dataset was actually used as an input by the Quantum "
320 "(as opposed to just predicted to be used during preflight)."
321 ),
322 ),
323 ],
324 foreignKeys=[
325 ddl.ForeignKeySpec(
326 table="quantum",
327 source=("quantum_id",),
328 target=("id",),
329 onDelete="CASCADE",
330 ),
331 ddl.ForeignKeySpec(
332 table="dataset",
333 source=("dataset_id",),
334 target=("dataset_id",),
335 onDelete="CASCADE",
336 ),
337 ],
338 ),
339 dataset_storage=ddl.TableSpec(
340 doc=(
341 "A table that provides information on whether a Dataset is stored in "
342 "one or more Datastores. The presence or absence of a record in this "
343 "table itself indicates whether the Dataset is present in that "
344 "Datastore. "
345 ),
346 fields=[
347 ddl.FieldSpec(
348 name="dataset_id",
349 dtype=sqlalchemy.BigInteger,
350 primaryKey=True,
351 nullable=False,
352 doc="Link to the dataset table.",
353 ),
354 ddl.FieldSpec(
355 name="datastore_name",
356 dtype=sqlalchemy.String,
357 length=256,
358 primaryKey=True,
359 nullable=False,
360 doc="Name of the Datastore this entry corresponds to.",
361 ),
362 ],
363 foreignKeys=[
364 ddl.ForeignKeySpec(
365 table="dataset", source=("dataset_id",), target=("dataset_id",)
366 )
367 ],
368 ),
369 )