Coverage for python/lsst/daf/butler/registry/tables.py : 38%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software=you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ["RegistryTablesTuple", "makeRegistryTableSpecs"]
25from collections import namedtuple
27import sqlalchemy
29from ..core.dimensions import DimensionUniverse
30from ..core.dimensions.schema import addDimensionForeignKey
32from ..core import ddl
34from .interfaces import CollectionManager
37RegistryTablesTuple = namedtuple(
38 "RegistryTablesTuple",
39 [
40 "dataset",
41 "dataset_composition",
42 "dataset_type",
43 "dataset_type_dimensions",
44 "dataset_collection",
45 "quantum",
46 "dataset_consumers",
47 "dataset_location",
48 "dataset_location_trash",
49 ]
50)
53def makeRegistryTableSpecs(universe: DimensionUniverse, collections: CollectionManager
54 ) -> RegistryTablesTuple:
55 """Construct descriptions of all tables in the Registry, aside from those
56 that correspond to `DimensionElement` instances.
58 Parameters
59 ----------
60 universe : `DimensionUniverse`
61 All dimensions known to the `Registry`.
62 collections : `Collection`
63 The `CollectionManager` that will be used for this `Registry`; used to
64 create foreign keys to the run and collection tables.
66 Returns
67 -------
68 specs : `RegistryTablesTuple`
69 A named tuple containing `ddl.TableSpec` instances.
70 """
71 # The 'dataset' table is special: we need to add foreign key fields for
72 # each dimension in the universe, as well as a foreign key field for run.
73 dataset = ddl.TableSpec(
74 fields=[
75 ddl.FieldSpec(
76 name="dataset_id",
77 dtype=sqlalchemy.BigInteger,
78 primaryKey=True,
79 autoincrement=True,
80 doc="A unique autoincrement field used as the primary key for dataset.",
81 ),
82 ddl.FieldSpec(
83 name="dataset_type_name",
84 dtype=sqlalchemy.String,
85 length=128,
86 nullable=False,
87 doc=(
88 "The name of the DatasetType associated with this dataset; a "
89 "reference to the dataset_type table."
90 ),
91 ),
92 ddl.FieldSpec(
93 name="quantum_id",
94 dtype=sqlalchemy.BigInteger,
95 doc=(
96 "The id of the quantum that produced this dataset, providing access "
97 "to fine-grained provenance information. May be null for datasets "
98 "not produced by running a PipelineTask."
99 ),
100 ),
101 ddl.FieldSpec(
102 name="dataset_ref_hash",
103 dtype=ddl.Base64Bytes,
104 nbytes=32,
105 nullable=False,
106 doc="Secure hash of the data ID (i.e. dimension link values) and dataset_type_name.",
107 ),
108 ],
109 foreignKeys=[
110 ddl.ForeignKeySpec(
111 table="dataset_type",
112 source=("dataset_type_name",),
113 target=("dataset_type_name",),
114 ),
115 ddl.ForeignKeySpec(
116 table="quantum",
117 source=("quantum_id",),
118 target=("id",),
119 onDelete="SET NULL",
120 ),
121 ],
122 recycleIds=False
123 )
124 field = collections.addRunForeignKey(dataset, onDelete="CASCADE", nullable=False)
125 dataset.unique.add(("dataset_ref_hash", field.name))
126 for dimension in universe.dimensions:
127 addDimensionForeignKey(dataset, dimension, primaryKey=False, nullable=True)
129 # The dataset_collection table needs a foreign key to collection.
130 dataset_collection = ddl.TableSpec(
131 doc=(
132 "A table that associates Dataset records with Collections, "
133 "which are implemented simply as string tags."
134 ),
135 fields=[
136 ddl.FieldSpec(
137 name="dataset_id",
138 dtype=sqlalchemy.BigInteger,
139 primaryKey=True,
140 nullable=False,
141 doc="Link to a unique record in the dataset table.",
142 ),
143 ddl.FieldSpec(
144 name="dataset_ref_hash",
145 dtype=ddl.Base64Bytes,
146 nbytes=32,
147 nullable=False,
148 doc="Secure hash of the data ID (i.e. dimension link values) and dataset_type_name.",
149 ),
150 ],
151 foreignKeys=[
152 ddl.ForeignKeySpec(
153 table="dataset",
154 source=("dataset_id",),
155 target=("dataset_id",),
156 onDelete="CASCADE",
157 )
158 ],
159 )
160 field = collections.addCollectionForeignKey(dataset_collection, onDelete="CASCADE", nullable=False)
161 dataset_collection.unique.add(("dataset_ref_hash", field.name))
163 # The quantum table needs a foreign key to run.
164 quantum = ddl.TableSpec(
165 doc="A table used to capture fine-grained provenance for datasets produced by PipelineTasks.",
166 fields=[
167 ddl.FieldSpec(
168 name="id",
169 dtype=sqlalchemy.BigInteger,
170 primaryKey=True,
171 autoincrement=True,
172 doc="A unique autoincrement integer identifier for this quantum.",
173 ),
174 ddl.FieldSpec(
175 name="task",
176 dtype=sqlalchemy.String,
177 length=256,
178 doc="Fully qualified name of the SuperTask that executed this quantum.",
179 ),
180 ddl.FieldSpec(
181 name="start_time",
182 dtype=ddl.AstropyTimeNsecTai,
183 nullable=True,
184 doc="The start time for the quantum.",
185 ),
186 ddl.FieldSpec(
187 name="end_time",
188 dtype=ddl.AstropyTimeNsecTai,
189 nullable=True,
190 doc="The end time for the quantum.",
191 ),
192 ddl.FieldSpec(
193 name="host",
194 dtype=sqlalchemy.String,
195 length=64,
196 nullable=True,
197 doc="The system on which the quantum was executed.",
198 ),
199 ],
200 )
201 collections.addRunForeignKey(quantum, onDelete="CASCADE", nullable=False)
203 # We want the dataset_location and dataset_location_trash tables
204 # to have the same definition
205 dataset_location_spec = dict(
206 doc=(
207 "A table that provides information on whether a Dataset is stored in "
208 "one or more Datastores. The presence or absence of a record in this "
209 "table itself indicates whether the Dataset is present in that "
210 "Datastore. "
211 ),
212 fields=[
213 ddl.FieldSpec(
214 name="dataset_id",
215 dtype=sqlalchemy.BigInteger,
216 primaryKey=True,
217 nullable=False,
218 doc="Link to the dataset table.",
219 ),
220 ddl.FieldSpec(
221 name="datastore_name",
222 dtype=sqlalchemy.String,
223 length=256,
224 primaryKey=True,
225 nullable=False,
226 doc="Name of the Datastore this entry corresponds to.",
227 ),
228 ],
229 )
231 dataset_location = ddl.TableSpec(**dataset_location_spec,
232 foreignKeys=[
233 ddl.ForeignKeySpec(
234 table="dataset", source=("dataset_id",), target=("dataset_id",)
235 )
236 ])
238 dataset_location_trash = ddl.TableSpec(**dataset_location_spec)
240 # All other table specs are fully static and do not depend on
241 # configuration.
242 return RegistryTablesTuple(
243 dataset=dataset,
244 dataset_composition=ddl.TableSpec(
245 doc="A self-join table that relates components of a dataset to their parents.",
246 fields=[
247 ddl.FieldSpec(
248 name="parent_dataset_id",
249 dtype=sqlalchemy.BigInteger,
250 primaryKey=True,
251 doc="Link to the dataset entry for the parent/composite dataset.",
252 ),
253 ddl.FieldSpec(
254 name="component_dataset_id",
255 dtype=sqlalchemy.BigInteger,
256 doc="Link to the dataset entry for a child/component dataset.",
257 ),
258 ddl.FieldSpec(
259 name="component_name",
260 dtype=sqlalchemy.String,
261 length=32,
262 primaryKey=True,
263 doc="Name of this component within this composite.",
264 ),
265 ],
266 foreignKeys=[
267 ddl.ForeignKeySpec(
268 table="dataset",
269 source=("parent_dataset_id",),
270 target=("dataset_id",),
271 onDelete="CASCADE",
272 ),
273 ddl.ForeignKeySpec(
274 table="dataset",
275 source=("component_dataset_id",),
276 target=("dataset_id",),
277 onDelete="CASCADE",
278 ),
279 ],
280 ),
281 dataset_type=ddl.TableSpec(
282 doc="A Table containing the set of registered DatasetTypes and their StorageClasses.",
283 fields=[
284 ddl.FieldSpec(
285 name="dataset_type_name",
286 dtype=sqlalchemy.String,
287 length=128,
288 primaryKey=True,
289 nullable=False,
290 doc="Globally unique name for this DatasetType.",
291 ),
292 ddl.FieldSpec(
293 name="storage_class",
294 dtype=sqlalchemy.String,
295 length=64,
296 nullable=False,
297 doc=(
298 "Name of the StorageClass associated with this DatasetType. All "
299 "registries must support the full set of standard StorageClasses, "
300 "so the set of allowed StorageClasses and their properties is "
301 "maintained in the registry Python code rather than the database."
302 ),
303 ),
304 ],
305 ),
306 dataset_type_dimensions=ddl.TableSpec(
307 doc=(
308 "A definition table indicating which dimension fields in Dataset are "
309 "non-NULL for Datasets with this DatasetType."
310 ),
311 fields=[
312 ddl.FieldSpec(
313 name="dataset_type_name",
314 dtype=sqlalchemy.String,
315 length=128,
316 primaryKey=True,
317 doc="The name of the DatasetType.",
318 ),
319 ddl.FieldSpec(
320 name="dimension_name",
321 dtype=sqlalchemy.String,
322 length=32,
323 primaryKey=True,
324 doc="The name of a Dimension associated with this DatasetType.",
325 ),
326 ],
327 foreignKeys=[
328 ddl.ForeignKeySpec(
329 table="dataset_type",
330 source=("dataset_type_name",),
331 target=("dataset_type_name",),
332 )
333 ],
334 ),
335 dataset_collection=dataset_collection,
336 quantum=quantum,
337 dataset_consumers=ddl.TableSpec(
338 doc="A table relating Quantum records to the Datasets they used as inputs.",
339 fields=[
340 ddl.FieldSpec(
341 name="quantum_id",
342 dtype=sqlalchemy.BigInteger,
343 nullable=False,
344 doc="A link to the associated Quantum.",
345 ),
346 ddl.FieldSpec(
347 name="dataset_id",
348 dtype=sqlalchemy.BigInteger,
349 nullable=True,
350 doc="A link to the associated dataset; null if the dataset has been deleted.",
351 ),
352 ddl.FieldSpec(
353 name="actual",
354 dtype=sqlalchemy.Boolean,
355 nullable=False,
356 doc=(
357 "Whether the Dataset was actually used as an input by the Quantum "
358 "(as opposed to just predicted to be used during preflight)."
359 ),
360 ),
361 ],
362 foreignKeys=[
363 ddl.ForeignKeySpec(
364 table="quantum",
365 source=("quantum_id",),
366 target=("id",),
367 onDelete="CASCADE",
368 ),
369 ddl.ForeignKeySpec(
370 table="dataset",
371 source=("dataset_id",),
372 target=("dataset_id",),
373 onDelete="SET NULL",
374 ),
375 ],
376 ),
377 dataset_location=dataset_location,
378 dataset_location_trash=dataset_location_trash,
379 )