Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software=you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ["RegistryTablesTuple", "makeRegistryTableSpecs"] 

24 

25from collections import namedtuple 

26 

27import sqlalchemy 

28 

29from ..core.dimensions import DimensionUniverse 

30from ..core.dimensions.schema import addDimensionForeignKey 

31 

32from ..core import ddl 

33 

34from .interfaces import CollectionManager 

35 

36 

37RegistryTablesTuple = namedtuple( 

38 "RegistryTablesTuple", 

39 [ 

40 "dataset", 

41 "dataset_composition", 

42 "dataset_type", 

43 "dataset_type_dimensions", 

44 "dataset_collection", 

45 "quantum", 

46 "dataset_consumers", 

47 "dataset_storage", 

48 ] 

49) 

50 

51 

52def makeRegistryTableSpecs(universe: DimensionUniverse, collections: CollectionManager 

53 ) -> RegistryTablesTuple: 

54 """Construct descriptions of all tables in the Registry, aside from those 

55 that correspond to `DimensionElement` instances. 

56 

57 Parameters 

58 ---------- 

59 universe : `DimensionUniverse` 

60 All dimensions known to the `Registry`. 

61 collections : `Collection` 

62 The `CollectionManager` that will be used for this `Registry`; used to 

63 create foreign keys to the run and collection tables. 

64 

65 Returns 

66 ------- 

67 specs : `RegistryTablesTuple` 

68 A named tuple containing `ddl.TableSpec` instances. 

69 """ 

70 # The 'dataset' table is special: we need to add foreign key fields for 

71 # each dimension in the universe, as well as a foreign key field for run. 

72 dataset = ddl.TableSpec( 

73 fields=[ 

74 ddl.FieldSpec( 

75 name="dataset_id", 

76 dtype=sqlalchemy.BigInteger, 

77 primaryKey=True, 

78 autoincrement=True, 

79 doc="A unique autoincrement field used as the primary key for dataset.", 

80 ), 

81 ddl.FieldSpec( 

82 name="dataset_type_name", 

83 dtype=sqlalchemy.String, 

84 length=128, 

85 nullable=False, 

86 doc=( 

87 "The name of the DatasetType associated with this dataset; a " 

88 "reference to the dataset_type table." 

89 ), 

90 ), 

91 ddl.FieldSpec( 

92 name="quantum_id", 

93 dtype=sqlalchemy.BigInteger, 

94 doc=( 

95 "The id of the quantum that produced this dataset, providing access " 

96 "to fine-grained provenance information. May be null for datasets " 

97 "not produced by running a PipelineTask." 

98 ), 

99 ), 

100 ddl.FieldSpec( 

101 name="dataset_ref_hash", 

102 dtype=ddl.Base64Bytes, 

103 nbytes=32, 

104 nullable=False, 

105 doc="Secure hash of the data ID (i.e. dimension link values) and dataset_type_name.", 

106 ), 

107 ], 

108 foreignKeys=[ 

109 ddl.ForeignKeySpec( 

110 table="dataset_type", 

111 source=("dataset_type_name",), 

112 target=("dataset_type_name",), 

113 ), 

114 ddl.ForeignKeySpec( 

115 table="quantum", 

116 source=("quantum_id",), 

117 target=("id",), 

118 onDelete="SET NULL", 

119 ), 

120 ], 

121 ) 

122 field = collections.addRunForeignKey(dataset, onDelete="CASCADE", nullable=False) 

123 dataset.unique.add(("dataset_ref_hash", field.name)) 

124 for dimension in universe.dimensions: 

125 addDimensionForeignKey(dataset, dimension, primaryKey=False, nullable=True) 

126 

127 # The dataset_collection table needs a foreign key to collection. 

128 dataset_collection = ddl.TableSpec( 

129 doc=( 

130 "A table that associates Dataset records with Collections, " 

131 "which are implemented simply as string tags." 

132 ), 

133 fields=[ 

134 ddl.FieldSpec( 

135 name="dataset_id", 

136 dtype=sqlalchemy.BigInteger, 

137 primaryKey=True, 

138 nullable=False, 

139 doc="Link to a unique record in the dataset table.", 

140 ), 

141 ddl.FieldSpec( 

142 name="dataset_ref_hash", 

143 dtype=ddl.Base64Bytes, 

144 nbytes=32, 

145 nullable=False, 

146 doc="Secure hash of the data ID (i.e. dimension link values) and dataset_type_name.", 

147 ), 

148 ], 

149 foreignKeys=[ 

150 ddl.ForeignKeySpec( 

151 table="dataset", 

152 source=("dataset_id",), 

153 target=("dataset_id",), 

154 onDelete="CASCADE", 

155 ) 

156 ], 

157 ) 

158 field = collections.addCollectionForeignKey(dataset_collection, onDelete="CASCADE", nullable=False) 

159 dataset_collection.unique.add(("dataset_ref_hash", field.name)) 

160 

161 # The quantum table needs a foreign key to run. 

162 quantum = ddl.TableSpec( 

163 doc="A table used to capture fine-grained provenance for datasets produced by PipelineTasks.", 

164 fields=[ 

165 ddl.FieldSpec( 

166 name="id", 

167 dtype=sqlalchemy.BigInteger, 

168 primaryKey=True, 

169 autoincrement=True, 

170 doc="A unique autoincrement integer identifier for this quantum.", 

171 ), 

172 ddl.FieldSpec( 

173 name="task", 

174 dtype=sqlalchemy.String, 

175 length=256, 

176 doc="Fully qualified name of the SuperTask that executed this quantum.", 

177 ), 

178 ddl.FieldSpec( 

179 name="start_time", 

180 dtype=sqlalchemy.DateTime, 

181 nullable=True, 

182 doc="The start time for the quantum.", 

183 ), 

184 ddl.FieldSpec( 

185 name="end_time", 

186 dtype=sqlalchemy.DateTime, 

187 nullable=True, 

188 doc="The end time for the quantum.", 

189 ), 

190 ddl.FieldSpec( 

191 name="host", 

192 dtype=sqlalchemy.String, 

193 length=64, 

194 nullable=True, 

195 doc="The system on which the quantum was executed.", 

196 ), 

197 ], 

198 ) 

199 collections.addRunForeignKey(quantum, onDelete="CASCADE", nullable=False) 

200 

201 # All other table specs are fully static and do not depend on 

202 # configuration. 

203 return RegistryTablesTuple( 

204 dataset=dataset, 

205 dataset_composition=ddl.TableSpec( 

206 doc="A self-join table that relates components of a dataset to their parents.", 

207 fields=[ 

208 ddl.FieldSpec( 

209 name="parent_dataset_id", 

210 dtype=sqlalchemy.BigInteger, 

211 primaryKey=True, 

212 doc="Link to the dataset entry for the parent/composite dataset.", 

213 ), 

214 ddl.FieldSpec( 

215 name="component_dataset_id", 

216 dtype=sqlalchemy.BigInteger, 

217 primaryKey=True, 

218 doc="Link to the dataset entry for a child/component dataset.", 

219 ), 

220 ddl.FieldSpec( 

221 name="component_name", 

222 dtype=sqlalchemy.String, 

223 length=32, 

224 nullable=False, 

225 doc="Name of this component within this composite.", 

226 ), 

227 ], 

228 foreignKeys=[ 

229 ddl.ForeignKeySpec( 

230 table="dataset", 

231 source=("parent_dataset_id",), 

232 target=("dataset_id",), 

233 onDelete="CASCADE", 

234 ), 

235 ddl.ForeignKeySpec( 

236 table="dataset", 

237 source=("component_dataset_id",), 

238 target=("dataset_id",), 

239 onDelete="CASCADE", 

240 ), 

241 ], 

242 ), 

243 dataset_type=ddl.TableSpec( 

244 doc="A Table containing the set of registered DatasetTypes and their StorageClasses.", 

245 fields=[ 

246 ddl.FieldSpec( 

247 name="dataset_type_name", 

248 dtype=sqlalchemy.String, 

249 length=128, 

250 primaryKey=True, 

251 nullable=False, 

252 doc="Globally unique name for this DatasetType.", 

253 ), 

254 ddl.FieldSpec( 

255 name="storage_class", 

256 dtype=sqlalchemy.String, 

257 length=64, 

258 nullable=False, 

259 doc=( 

260 "Name of the StorageClass associated with this DatasetType. All " 

261 "registries must support the full set of standard StorageClasses, " 

262 "so the set of allowed StorageClasses and their properties is " 

263 "maintained in the registry Python code rather than the database." 

264 ), 

265 ), 

266 ], 

267 ), 

268 dataset_type_dimensions=ddl.TableSpec( 

269 doc=( 

270 "A definition table indicating which dimension fields in Dataset are " 

271 "non-NULL for Datasets with this DatasetType." 

272 ), 

273 fields=[ 

274 ddl.FieldSpec( 

275 name="dataset_type_name", 

276 dtype=sqlalchemy.String, 

277 length=128, 

278 primaryKey=True, 

279 doc="The name of the DatasetType.", 

280 ), 

281 ddl.FieldSpec( 

282 name="dimension_name", 

283 dtype=sqlalchemy.String, 

284 length=32, 

285 primaryKey=True, 

286 doc="The name of a Dimension associated with this DatasetType.", 

287 ), 

288 ], 

289 foreignKeys=[ 

290 ddl.ForeignKeySpec( 

291 table="dataset_type", 

292 source=("dataset_type_name",), 

293 target=("dataset_type_name",), 

294 ) 

295 ], 

296 ), 

297 dataset_collection=dataset_collection, 

298 quantum=quantum, 

299 dataset_consumers=ddl.TableSpec( 

300 doc="A table relating Quantum records to the Datasets they used as inputs.", 

301 fields=[ 

302 ddl.FieldSpec( 

303 name="quantum_id", 

304 dtype=sqlalchemy.BigInteger, 

305 nullable=False, 

306 doc="A link to the associated Quantum.", 

307 ), 

308 ddl.FieldSpec( 

309 name="dataset_id", 

310 dtype=sqlalchemy.BigInteger, 

311 nullable=False, 

312 doc="A link to the associated Dataset.", 

313 ), 

314 ddl.FieldSpec( 

315 name="actual", 

316 dtype=sqlalchemy.Boolean, 

317 nullable=False, 

318 doc=( 

319 "Whether the Dataset was actually used as an input by the Quantum " 

320 "(as opposed to just predicted to be used during preflight)." 

321 ), 

322 ), 

323 ], 

324 foreignKeys=[ 

325 ddl.ForeignKeySpec( 

326 table="quantum", 

327 source=("quantum_id",), 

328 target=("id",), 

329 onDelete="CASCADE", 

330 ), 

331 ddl.ForeignKeySpec( 

332 table="dataset", 

333 source=("dataset_id",), 

334 target=("dataset_id",), 

335 onDelete="CASCADE", 

336 ), 

337 ], 

338 ), 

339 dataset_storage=ddl.TableSpec( 

340 doc=( 

341 "A table that provides information on whether a Dataset is stored in " 

342 "one or more Datastores. The presence or absence of a record in this " 

343 "table itself indicates whether the Dataset is present in that " 

344 "Datastore. " 

345 ), 

346 fields=[ 

347 ddl.FieldSpec( 

348 name="dataset_id", 

349 dtype=sqlalchemy.BigInteger, 

350 primaryKey=True, 

351 nullable=False, 

352 doc="Link to the dataset table.", 

353 ), 

354 ddl.FieldSpec( 

355 name="datastore_name", 

356 dtype=sqlalchemy.String, 

357 length=256, 

358 primaryKey=True, 

359 nullable=False, 

360 doc="Name of the Datastore this entry corresponds to.", 

361 ), 

362 ], 

363 foreignKeys=[ 

364 ddl.ForeignKeySpec( 

365 table="dataset", source=("dataset_id",), target=("dataset_id",) 

366 ) 

367 ], 

368 ), 

369 )