Coverage for python/lsst/daf/butler/registry/tables.py : 60%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software=you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ["RegistryTablesTuple", "makeRegistryTableSpecs"]
25from collections import namedtuple
27import sqlalchemy
29from ..core.dimensions import DimensionUniverse
30from ..core.dimensions.schema import addDimensionForeignKey
32from ..core import ddl
35RegistryTablesTuple = namedtuple(
36 "RegistryTablesTuple",
37 [
38 "dataset",
39 "dataset_composition",
40 "dataset_type",
41 "dataset_type_dimensions",
42 "dataset_collection",
43 "run",
44 "quantum",
45 "dataset_consumers",
46 "dataset_storage",
47 ]
48)
51def makeRegistryTableSpecs(universe: DimensionUniverse) -> RegistryTablesTuple:
52 """Construct descriptions of all tables in the Registry, aside from those
53 that correspond to `DimensionElement` instances.
55 Parameters
56 ----------
57 universe: `DimensionUniverse`
58 All dimensions known to the `Registry`.
60 Returns
61 -------
62 specs : `RegistryTablesTuple`
63 A named tuple containing `ddl.TableSpec` instances.
64 """
65 # The 'dataset' table is special: we need to add foreign key fields for
66 # each dimension in the universe.
67 dataset = ddl.TableSpec(
68 fields=[
69 ddl.FieldSpec(
70 name="dataset_id",
71 dtype=sqlalchemy.BigInteger,
72 primaryKey=True,
73 autoincrement=True,
74 doc="A unique autoincrement field used as the primary key for dataset.",
75 ),
76 ddl.FieldSpec(
77 name="dataset_type_name",
78 dtype=sqlalchemy.String,
79 length=128,
80 nullable=False,
81 doc=(
82 "The name of the DatasetType associated with this dataset; a "
83 "reference to the dataset_type table."
84 ),
85 ),
86 ddl.FieldSpec(
87 name="run_id",
88 dtype=sqlalchemy.BigInteger,
89 nullable=False,
90 doc=(
91 "The Id of the run that produced this dataset, providing access to "
92 "coarse provenance information."
93 ),
94 ),
95 ddl.FieldSpec(
96 name="quantum_id",
97 dtype=sqlalchemy.BigInteger,
98 doc=(
99 "The id of the quantum that produced this dataset, providing access "
100 "to fine-grained provenance information. May be null for datasets "
101 "not produced by running a PipelineTask."
102 ),
103 ),
104 ddl.FieldSpec(
105 name="dataset_ref_hash",
106 dtype=ddl.Base64Bytes,
107 nbytes=32,
108 nullable=False,
109 doc="Secure hash of the data ID (i.e. dimension link values) and dataset_type_name.",
110 ),
111 ],
112 foreignKeys=[
113 ddl.ForeignKeySpec(
114 table="dataset_type",
115 source=("dataset_type_name",),
116 target=("dataset_type_name",),
117 ),
118 ddl.ForeignKeySpec(
119 table="run", source=("run_id",), target=("id",), onDelete="CASCADE"
120 ),
121 ddl.ForeignKeySpec(
122 table="quantum",
123 source=("quantum_id",),
124 target=("id",),
125 onDelete="SET NULL",
126 ),
127 ],
128 )
129 for dimension in universe.dimensions:
130 addDimensionForeignKey(dataset, dimension, primaryKey=False, nullable=True)
131 # All other table specs are fully static and do not depend on
132 # configuration.
133 return RegistryTablesTuple(
134 dataset=dataset,
135 dataset_composition=ddl.TableSpec(
136 doc="A self-join table that relates components of a dataset to their parents.",
137 fields=[
138 ddl.FieldSpec(
139 name="parent_dataset_id",
140 dtype=sqlalchemy.BigInteger,
141 primaryKey=True,
142 doc="Link to the dataset entry for the parent/composite dataset.",
143 ),
144 ddl.FieldSpec(
145 name="component_dataset_id",
146 dtype=sqlalchemy.BigInteger,
147 primaryKey=True,
148 doc="Link to the dataset entry for a child/component dataset.",
149 ),
150 ddl.FieldSpec(
151 name="component_name",
152 dtype=sqlalchemy.String,
153 length=32,
154 nullable=False,
155 doc="Name of this component within this composite.",
156 ),
157 ],
158 foreignKeys=[
159 ddl.ForeignKeySpec(
160 table="dataset",
161 source=("parent_dataset_id",),
162 target=("dataset_id",),
163 onDelete="CASCADE",
164 ),
165 ddl.ForeignKeySpec(
166 table="dataset",
167 source=("component_dataset_id",),
168 target=("dataset_id",),
169 onDelete="CASCADE",
170 ),
171 ],
172 ),
173 dataset_type=ddl.TableSpec(
174 doc="A Table containing the set of registered DatasetTypes and their StorageClasses.",
175 fields=[
176 ddl.FieldSpec(
177 name="dataset_type_name",
178 dtype=sqlalchemy.String,
179 length=128,
180 primaryKey=True,
181 nullable=False,
182 doc="Globally unique name for this DatasetType.",
183 ),
184 ddl.FieldSpec(
185 name="storage_class",
186 dtype=sqlalchemy.String,
187 length=64,
188 nullable=False,
189 doc=(
190 "Name of the StorageClass associated with this DatasetType. All "
191 "registries must support the full set of standard StorageClasses, "
192 "so the set of allowed StorageClasses and their properties is "
193 "maintained in the registry Python code rather than the database."
194 ),
195 ),
196 ],
197 ),
198 dataset_type_dimensions=ddl.TableSpec(
199 doc=(
200 "A definition table indicating which dimension fields in Dataset are "
201 "non-NULL for Datasets with this DatasetType."
202 ),
203 fields=[
204 ddl.FieldSpec(
205 name="dataset_type_name",
206 dtype=sqlalchemy.String,
207 length=128,
208 primaryKey=True,
209 doc="The name of the DatasetType.",
210 ),
211 ddl.FieldSpec(
212 name="dimension_name",
213 dtype=sqlalchemy.String,
214 length=32,
215 primaryKey=True,
216 doc="The name of a Dimension associated with this DatasetType.",
217 ),
218 ],
219 foreignKeys=[
220 ddl.ForeignKeySpec(
221 table="dataset_type",
222 source=("dataset_type_name",),
223 target=("dataset_type_name",),
224 )
225 ],
226 ),
227 dataset_collection=ddl.TableSpec(
228 doc=(
229 "A table that associates Dataset records with Collections, "
230 "which are implemented simply as string tags."
231 ),
232 fields=[
233 ddl.FieldSpec(
234 name="dataset_id",
235 dtype=sqlalchemy.BigInteger,
236 primaryKey=True,
237 nullable=False,
238 doc="Link to a unique record in the dataset table.",
239 ),
240 ddl.FieldSpec(
241 name="dataset_ref_hash",
242 dtype=ddl.Base64Bytes,
243 nbytes=32,
244 nullable=False,
245 doc="Secure hash of the data ID (i.e. dimension link values) and dataset_type_name.",
246 ),
247 ddl.FieldSpec(
248 name="collection",
249 dtype=sqlalchemy.String,
250 length=128,
251 primaryKey=True,
252 nullable=False,
253 doc="Name of a Collection with which this Dataset is associated.",
254 ),
255 ],
256 foreignKeys=[
257 ddl.ForeignKeySpec(
258 table="dataset",
259 source=("dataset_id",),
260 target=("dataset_id",),
261 onDelete="CASCADE",
262 )
263 ],
264 unique=[("dataset_ref_hash", "collection")],
265 ),
266 run=ddl.TableSpec(
267 doc="A table used to capture coarse provenance for all datasets.",
268 fields=[
269 ddl.FieldSpec(
270 name="id",
271 dtype=sqlalchemy.BigInteger,
272 primaryKey=True,
273 autoincrement=True,
274 doc="A unique autoincrement integer identifier for this run.",
275 ),
276 ddl.FieldSpec(
277 name="name",
278 dtype=sqlalchemy.String,
279 length=128,
280 doc="The name of the run.",
281 ),
282 ddl.FieldSpec(
283 name="start_time",
284 dtype=sqlalchemy.DateTime,
285 nullable=True,
286 doc="The start time for the run.",
287 ),
288 ddl.FieldSpec(
289 name="end_time",
290 dtype=sqlalchemy.DateTime,
291 nullable=True,
292 doc="The end time for the run.",
293 ),
294 ddl.FieldSpec(
295 name="host",
296 dtype=sqlalchemy.String,
297 length=64,
298 nullable=True,
299 doc="The system on which the run was executed.",
300 ),
301 ],
302 unique=[("name",)],
303 ),
304 quantum=ddl.TableSpec(
305 doc="A table used to capture fine-grained provenance for datasets produced by PipelineTasks.",
306 fields=[
307 ddl.FieldSpec(
308 name="id",
309 dtype=sqlalchemy.BigInteger,
310 primaryKey=True,
311 autoincrement=True,
312 doc="A unique autoincrement integer identifier for this quantum.",
313 ),
314 ddl.FieldSpec(
315 name="task",
316 dtype=sqlalchemy.String,
317 length=256,
318 doc="Fully qualified name of the SuperTask that executed this quantum.",
319 ),
320 ddl.FieldSpec(
321 name="run_id",
322 dtype=sqlalchemy.BigInteger,
323 doc="Link to the run this quantum is a part of.",
324 ),
325 ddl.FieldSpec(
326 name="start_time",
327 dtype=sqlalchemy.DateTime,
328 nullable=True,
329 doc="The start time for the quantum.",
330 ),
331 ddl.FieldSpec(
332 name="end_time",
333 dtype=sqlalchemy.DateTime,
334 nullable=True,
335 doc="The end time for the quantum.",
336 ),
337 ddl.FieldSpec(
338 name="host",
339 dtype=sqlalchemy.String,
340 length=64,
341 nullable=True,
342 doc="The system on which the quantum was executed.",
343 ),
344 ],
345 foreignKeys=[
346 ddl.ForeignKeySpec(table="run", source=("run_id",), target=("id",), onDelete="CASCADE")
347 ],
348 ),
349 dataset_consumers=ddl.TableSpec(
350 doc="A table relating Quantum records to the Datasets they used as inputs.",
351 fields=[
352 ddl.FieldSpec(
353 name="quantum_id",
354 dtype=sqlalchemy.BigInteger,
355 nullable=False,
356 doc="A link to the associated Quantum.",
357 ),
358 ddl.FieldSpec(
359 name="dataset_id",
360 dtype=sqlalchemy.BigInteger,
361 nullable=False,
362 doc="A link to the associated Dataset.",
363 ),
364 ddl.FieldSpec(
365 name="actual",
366 dtype=sqlalchemy.Boolean,
367 nullable=False,
368 doc=(
369 "Whether the Dataset was actually used as an input by the Quantum "
370 "(as opposed to just predicted to be used during preflight)."
371 ),
372 ),
373 ],
374 foreignKeys=[
375 ddl.ForeignKeySpec(
376 table="quantum",
377 source=("quantum_id",),
378 target=("id",),
379 onDelete="CASCADE",
380 ),
381 ddl.ForeignKeySpec(
382 table="dataset",
383 source=("dataset_id",),
384 target=("dataset_id",),
385 onDelete="CASCADE",
386 ),
387 ],
388 ),
389 dataset_storage=ddl.TableSpec(
390 doc=(
391 "A table that provides information on whether a Dataset is stored in "
392 "one or more Datastores. The presence or absence of a record in this "
393 "table itself indicates whether the Dataset is present in that "
394 "Datastore. "
395 ),
396 fields=[
397 ddl.FieldSpec(
398 name="dataset_id",
399 dtype=sqlalchemy.BigInteger,
400 primaryKey=True,
401 nullable=False,
402 doc="Link to the dataset table.",
403 ),
404 ddl.FieldSpec(
405 name="datastore_name",
406 dtype=sqlalchemy.String,
407 length=256,
408 primaryKey=True,
409 nullable=False,
410 doc="Name of the Datastore this entry corresponds to.",
411 ),
412 ],
413 foreignKeys=[
414 ddl.ForeignKeySpec(
415 table="dataset", source=("dataset_id",), target=("dataset_id",)
416 )
417 ],
418 ),
419 )