Coverage for python/lsst/daf/butler/registry/tables.py : 38%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software=you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ["RegistryTablesTuple", "makeRegistryTableSpecs"]
25from collections import namedtuple
27import sqlalchemy
29from ..core.dimensions import DimensionUniverse
30from ..core.dimensions.schema import addDimensionForeignKey
32from ..core import ddl
34from .interfaces import CollectionManager
37RegistryTablesTuple = namedtuple(
38 "RegistryTablesTuple",
39 [
40 "dataset",
41 "dataset_composition",
42 "dataset_type",
43 "dataset_type_dimensions",
44 "dataset_collection",
45 "quantum",
46 "dataset_consumers",
47 "dataset_location",
48 "dataset_location_trash",
49 ]
50)
53def makeRegistryTableSpecs(universe: DimensionUniverse, collections: CollectionManager
54 ) -> RegistryTablesTuple:
55 """Construct descriptions of all tables in the Registry, aside from those
56 that correspond to `DimensionElement` instances.
58 Parameters
59 ----------
60 universe : `DimensionUniverse`
61 All dimensions known to the `Registry`.
62 collections : `Collection`
63 The `CollectionManager` that will be used for this `Registry`; used to
64 create foreign keys to the run and collection tables.
66 Returns
67 -------
68 specs : `RegistryTablesTuple`
69 A named tuple containing `ddl.TableSpec` instances.
70 """
71 # The 'dataset' table is special: we need to add foreign key fields for
72 # each dimension in the universe, as well as a foreign key field for run.
73 dataset = ddl.TableSpec(
74 fields=[
75 ddl.FieldSpec(
76 name="dataset_id",
77 dtype=sqlalchemy.BigInteger,
78 primaryKey=True,
79 autoincrement=True,
80 doc="A unique autoincrement field used as the primary key for dataset.",
81 ),
82 ddl.FieldSpec(
83 name="dataset_type_name",
84 dtype=sqlalchemy.String,
85 length=128,
86 nullable=False,
87 doc=(
88 "The name of the DatasetType associated with this dataset; a "
89 "reference to the dataset_type table."
90 ),
91 ),
92 ddl.FieldSpec(
93 name="quantum_id",
94 dtype=sqlalchemy.BigInteger,
95 doc=(
96 "The id of the quantum that produced this dataset, providing access "
97 "to fine-grained provenance information. May be null for datasets "
98 "not produced by running a PipelineTask."
99 ),
100 ),
101 ddl.FieldSpec(
102 name="dataset_ref_hash",
103 dtype=ddl.Base64Bytes,
104 nbytes=32,
105 nullable=False,
106 doc="Secure hash of the data ID (i.e. dimension link values) and dataset_type_name.",
107 ),
108 ],
109 foreignKeys=[
110 ddl.ForeignKeySpec(
111 table="dataset_type",
112 source=("dataset_type_name",),
113 target=("dataset_type_name",),
114 ),
115 ddl.ForeignKeySpec(
116 table="quantum",
117 source=("quantum_id",),
118 target=("id",),
119 onDelete="SET NULL",
120 ),
121 ],
122 )
123 field = collections.addRunForeignKey(dataset, onDelete="CASCADE", nullable=False)
124 dataset.unique.add(("dataset_ref_hash", field.name))
125 for dimension in universe.dimensions:
126 addDimensionForeignKey(dataset, dimension, primaryKey=False, nullable=True)
128 # The dataset_collection table needs a foreign key to collection.
129 dataset_collection = ddl.TableSpec(
130 doc=(
131 "A table that associates Dataset records with Collections, "
132 "which are implemented simply as string tags."
133 ),
134 fields=[
135 ddl.FieldSpec(
136 name="dataset_id",
137 dtype=sqlalchemy.BigInteger,
138 primaryKey=True,
139 nullable=False,
140 doc="Link to a unique record in the dataset table.",
141 ),
142 ddl.FieldSpec(
143 name="dataset_ref_hash",
144 dtype=ddl.Base64Bytes,
145 nbytes=32,
146 nullable=False,
147 doc="Secure hash of the data ID (i.e. dimension link values) and dataset_type_name.",
148 ),
149 ],
150 foreignKeys=[
151 ddl.ForeignKeySpec(
152 table="dataset",
153 source=("dataset_id",),
154 target=("dataset_id",),
155 onDelete="CASCADE",
156 )
157 ],
158 )
159 field = collections.addCollectionForeignKey(dataset_collection, onDelete="CASCADE", nullable=False)
160 dataset_collection.unique.add(("dataset_ref_hash", field.name))
162 # The quantum table needs a foreign key to run.
163 quantum = ddl.TableSpec(
164 doc="A table used to capture fine-grained provenance for datasets produced by PipelineTasks.",
165 fields=[
166 ddl.FieldSpec(
167 name="id",
168 dtype=sqlalchemy.BigInteger,
169 primaryKey=True,
170 autoincrement=True,
171 doc="A unique autoincrement integer identifier for this quantum.",
172 ),
173 ddl.FieldSpec(
174 name="task",
175 dtype=sqlalchemy.String,
176 length=256,
177 doc="Fully qualified name of the SuperTask that executed this quantum.",
178 ),
179 ddl.FieldSpec(
180 name="start_time",
181 dtype=ddl.AstropyTimeNsecTai,
182 nullable=True,
183 doc="The start time for the quantum.",
184 ),
185 ddl.FieldSpec(
186 name="end_time",
187 dtype=ddl.AstropyTimeNsecTai,
188 nullable=True,
189 doc="The end time for the quantum.",
190 ),
191 ddl.FieldSpec(
192 name="host",
193 dtype=sqlalchemy.String,
194 length=64,
195 nullable=True,
196 doc="The system on which the quantum was executed.",
197 ),
198 ],
199 )
200 collections.addRunForeignKey(quantum, onDelete="CASCADE", nullable=False)
202 # We want the dataset_location and dataset_location_trash tables
203 # to have the same definition
204 dataset_location_spec = dict(
205 doc=(
206 "A table that provides information on whether a Dataset is stored in "
207 "one or more Datastores. The presence or absence of a record in this "
208 "table itself indicates whether the Dataset is present in that "
209 "Datastore. "
210 ),
211 fields=[
212 ddl.FieldSpec(
213 name="dataset_id",
214 dtype=sqlalchemy.BigInteger,
215 primaryKey=True,
216 nullable=False,
217 doc="Link to the dataset table.",
218 ),
219 ddl.FieldSpec(
220 name="datastore_name",
221 dtype=sqlalchemy.String,
222 length=256,
223 primaryKey=True,
224 nullable=False,
225 doc="Name of the Datastore this entry corresponds to.",
226 ),
227 ],
228 )
230 dataset_location = ddl.TableSpec(**dataset_location_spec,
231 foreignKeys=[
232 ddl.ForeignKeySpec(
233 table="dataset", source=("dataset_id",), target=("dataset_id",)
234 )
235 ])
237 dataset_location_trash = ddl.TableSpec(**dataset_location_spec)
239 # All other table specs are fully static and do not depend on
240 # configuration.
241 return RegistryTablesTuple(
242 dataset=dataset,
243 dataset_composition=ddl.TableSpec(
244 doc="A self-join table that relates components of a dataset to their parents.",
245 fields=[
246 ddl.FieldSpec(
247 name="parent_dataset_id",
248 dtype=sqlalchemy.BigInteger,
249 primaryKey=True,
250 doc="Link to the dataset entry for the parent/composite dataset.",
251 ),
252 ddl.FieldSpec(
253 name="component_dataset_id",
254 dtype=sqlalchemy.BigInteger,
255 primaryKey=True,
256 doc="Link to the dataset entry for a child/component dataset.",
257 ),
258 ddl.FieldSpec(
259 name="component_name",
260 dtype=sqlalchemy.String,
261 length=32,
262 nullable=False,
263 doc="Name of this component within this composite.",
264 ),
265 ],
266 foreignKeys=[
267 ddl.ForeignKeySpec(
268 table="dataset",
269 source=("parent_dataset_id",),
270 target=("dataset_id",),
271 onDelete="CASCADE",
272 ),
273 ddl.ForeignKeySpec(
274 table="dataset",
275 source=("component_dataset_id",),
276 target=("dataset_id",),
277 onDelete="CASCADE",
278 ),
279 ],
280 ),
281 dataset_type=ddl.TableSpec(
282 doc="A Table containing the set of registered DatasetTypes and their StorageClasses.",
283 fields=[
284 ddl.FieldSpec(
285 name="dataset_type_name",
286 dtype=sqlalchemy.String,
287 length=128,
288 primaryKey=True,
289 nullable=False,
290 doc="Globally unique name for this DatasetType.",
291 ),
292 ddl.FieldSpec(
293 name="storage_class",
294 dtype=sqlalchemy.String,
295 length=64,
296 nullable=False,
297 doc=(
298 "Name of the StorageClass associated with this DatasetType. All "
299 "registries must support the full set of standard StorageClasses, "
300 "so the set of allowed StorageClasses and their properties is "
301 "maintained in the registry Python code rather than the database."
302 ),
303 ),
304 ],
305 ),
306 dataset_type_dimensions=ddl.TableSpec(
307 doc=(
308 "A definition table indicating which dimension fields in Dataset are "
309 "non-NULL for Datasets with this DatasetType."
310 ),
311 fields=[
312 ddl.FieldSpec(
313 name="dataset_type_name",
314 dtype=sqlalchemy.String,
315 length=128,
316 primaryKey=True,
317 doc="The name of the DatasetType.",
318 ),
319 ddl.FieldSpec(
320 name="dimension_name",
321 dtype=sqlalchemy.String,
322 length=32,
323 primaryKey=True,
324 doc="The name of a Dimension associated with this DatasetType.",
325 ),
326 ],
327 foreignKeys=[
328 ddl.ForeignKeySpec(
329 table="dataset_type",
330 source=("dataset_type_name",),
331 target=("dataset_type_name",),
332 )
333 ],
334 ),
335 dataset_collection=dataset_collection,
336 quantum=quantum,
337 dataset_consumers=ddl.TableSpec(
338 doc="A table relating Quantum records to the Datasets they used as inputs.",
339 fields=[
340 ddl.FieldSpec(
341 name="quantum_id",
342 dtype=sqlalchemy.BigInteger,
343 nullable=False,
344 doc="A link to the associated Quantum.",
345 ),
346 ddl.FieldSpec(
347 name="dataset_id",
348 dtype=sqlalchemy.BigInteger,
349 nullable=False,
350 doc="A link to the associated Dataset.",
351 ),
352 ddl.FieldSpec(
353 name="actual",
354 dtype=sqlalchemy.Boolean,
355 nullable=False,
356 doc=(
357 "Whether the Dataset was actually used as an input by the Quantum "
358 "(as opposed to just predicted to be used during preflight)."
359 ),
360 ),
361 ],
362 foreignKeys=[
363 ddl.ForeignKeySpec(
364 table="quantum",
365 source=("quantum_id",),
366 target=("id",),
367 onDelete="CASCADE",
368 ),
369 ddl.ForeignKeySpec(
370 table="dataset",
371 source=("dataset_id",),
372 target=("dataset_id",),
373 onDelete="CASCADE",
374 ),
375 ],
376 ),
377 dataset_location=dataset_location,
378 dataset_location_trash=dataset_location_trash,
379 )