Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software=you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ["RegistryTablesTuple", "makeRegistryTableSpecs"] 

24 

25from collections import namedtuple 

26 

27import sqlalchemy 

28 

29from ..core.dimensions import DimensionUniverse 

30from ..core.dimensions.schema import addDimensionForeignKey 

31 

32from ..core import ddl 

33 

34 

35RegistryTablesTuple = namedtuple( 

36 "RegistryTablesTuple", 

37 [ 

38 "dataset", 

39 "dataset_composition", 

40 "dataset_type", 

41 "dataset_type_dimensions", 

42 "dataset_collection", 

43 "run", 

44 "quantum", 

45 "dataset_consumers", 

46 "dataset_storage", 

47 ] 

48) 

49 

50 

51def makeRegistryTableSpecs(universe: DimensionUniverse) -> RegistryTablesTuple: 

52 """Construct descriptions of all tables in the Registry, aside from those 

53 that correspond to `DimensionElement` instances. 

54 

55 Parameters 

56 ---------- 

57 universe: `DimensionUniverse` 

58 All dimensions known to the `Registry`. 

59 

60 Returns 

61 ------- 

62 specs : `RegistryTablesTuple` 

63 A named tuple containing `ddl.TableSpec` instances. 

64 """ 

65 # The 'dataset' table is special: we need to add foreign key fields for 

66 # each dimension in the universe. 

67 dataset = ddl.TableSpec( 

68 fields=[ 

69 ddl.FieldSpec( 

70 name="dataset_id", 

71 dtype=sqlalchemy.BigInteger, 

72 primaryKey=True, 

73 autoincrement=True, 

74 doc="A unique autoincrement field used as the primary key for dataset.", 

75 ), 

76 ddl.FieldSpec( 

77 name="dataset_type_name", 

78 dtype=sqlalchemy.String, 

79 length=128, 

80 nullable=False, 

81 doc=( 

82 "The name of the DatasetType associated with this dataset; a " 

83 "reference to the dataset_type table." 

84 ), 

85 ), 

86 ddl.FieldSpec( 

87 name="run_id", 

88 dtype=sqlalchemy.BigInteger, 

89 nullable=False, 

90 doc=( 

91 "The Id of the run that produced this dataset, providing access to " 

92 "coarse provenance information." 

93 ), 

94 ), 

95 ddl.FieldSpec( 

96 name="quantum_id", 

97 dtype=sqlalchemy.BigInteger, 

98 doc=( 

99 "The id of the quantum that produced this dataset, providing access " 

100 "to fine-grained provenance information. May be null for datasets " 

101 "not produced by running a PipelineTask." 

102 ), 

103 ), 

104 ddl.FieldSpec( 

105 name="dataset_ref_hash", 

106 dtype=ddl.Base64Bytes, 

107 nbytes=32, 

108 nullable=False, 

109 doc="Secure hash of the data ID (i.e. dimension link values) and dataset_type_name.", 

110 ), 

111 ], 

112 foreignKeys=[ 

113 ddl.ForeignKeySpec( 

114 table="dataset_type", 

115 source=("dataset_type_name",), 

116 target=("dataset_type_name",), 

117 ), 

118 ddl.ForeignKeySpec( 

119 table="run", source=("run_id",), target=("id",), onDelete="CASCADE" 

120 ), 

121 ddl.ForeignKeySpec( 

122 table="quantum", 

123 source=("quantum_id",), 

124 target=("id",), 

125 onDelete="SET NULL", 

126 ), 

127 ], 

128 ) 

129 for dimension in universe.dimensions: 

130 addDimensionForeignKey(dataset, dimension, primaryKey=False, nullable=True) 

131 # All other table specs are fully static and do not depend on 

132 # configuration. 

133 return RegistryTablesTuple( 

134 dataset=dataset, 

135 dataset_composition=ddl.TableSpec( 

136 doc="A self-join table that relates components of a dataset to their parents.", 

137 fields=[ 

138 ddl.FieldSpec( 

139 name="parent_dataset_id", 

140 dtype=sqlalchemy.BigInteger, 

141 primaryKey=True, 

142 doc="Link to the dataset entry for the parent/composite dataset.", 

143 ), 

144 ddl.FieldSpec( 

145 name="component_dataset_id", 

146 dtype=sqlalchemy.BigInteger, 

147 primaryKey=True, 

148 doc="Link to the dataset entry for a child/component dataset.", 

149 ), 

150 ddl.FieldSpec( 

151 name="component_name", 

152 dtype=sqlalchemy.String, 

153 length=32, 

154 nullable=False, 

155 doc="Name of this component within this composite.", 

156 ), 

157 ], 

158 foreignKeys=[ 

159 ddl.ForeignKeySpec( 

160 table="dataset", 

161 source=("parent_dataset_id",), 

162 target=("dataset_id",), 

163 onDelete="CASCADE", 

164 ), 

165 ddl.ForeignKeySpec( 

166 table="dataset", 

167 source=("component_dataset_id",), 

168 target=("dataset_id",), 

169 onDelete="CASCADE", 

170 ), 

171 ], 

172 ), 

173 dataset_type=ddl.TableSpec( 

174 doc="A Table containing the set of registered DatasetTypes and their StorageClasses.", 

175 fields=[ 

176 ddl.FieldSpec( 

177 name="dataset_type_name", 

178 dtype=sqlalchemy.String, 

179 length=128, 

180 primaryKey=True, 

181 nullable=False, 

182 doc="Globally unique name for this DatasetType.", 

183 ), 

184 ddl.FieldSpec( 

185 name="storage_class", 

186 dtype=sqlalchemy.String, 

187 length=64, 

188 nullable=False, 

189 doc=( 

190 "Name of the StorageClass associated with this DatasetType. All " 

191 "registries must support the full set of standard StorageClasses, " 

192 "so the set of allowed StorageClasses and their properties is " 

193 "maintained in the registry Python code rather than the database." 

194 ), 

195 ), 

196 ], 

197 ), 

198 dataset_type_dimensions=ddl.TableSpec( 

199 doc=( 

200 "A definition table indicating which dimension fields in Dataset are " 

201 "non-NULL for Datasets with this DatasetType." 

202 ), 

203 fields=[ 

204 ddl.FieldSpec( 

205 name="dataset_type_name", 

206 dtype=sqlalchemy.String, 

207 length=128, 

208 primaryKey=True, 

209 doc="The name of the DatasetType.", 

210 ), 

211 ddl.FieldSpec( 

212 name="dimension_name", 

213 dtype=sqlalchemy.String, 

214 length=32, 

215 primaryKey=True, 

216 doc="The name of a Dimension associated with this DatasetType.", 

217 ), 

218 ], 

219 foreignKeys=[ 

220 ddl.ForeignKeySpec( 

221 table="dataset_type", 

222 source=("dataset_type_name",), 

223 target=("dataset_type_name",), 

224 ) 

225 ], 

226 ), 

227 dataset_collection=ddl.TableSpec( 

228 doc=( 

229 "A table that associates Dataset records with Collections, " 

230 "which are implemented simply as string tags." 

231 ), 

232 fields=[ 

233 ddl.FieldSpec( 

234 name="dataset_id", 

235 dtype=sqlalchemy.BigInteger, 

236 primaryKey=True, 

237 nullable=False, 

238 doc="Link to a unique record in the dataset table.", 

239 ), 

240 ddl.FieldSpec( 

241 name="dataset_ref_hash", 

242 dtype=ddl.Base64Bytes, 

243 nbytes=32, 

244 nullable=False, 

245 doc="Secure hash of the data ID (i.e. dimension link values) and dataset_type_name.", 

246 ), 

247 ddl.FieldSpec( 

248 name="collection", 

249 dtype=sqlalchemy.String, 

250 length=128, 

251 primaryKey=True, 

252 nullable=False, 

253 doc="Name of a Collection with which this Dataset is associated.", 

254 ), 

255 ], 

256 foreignKeys=[ 

257 ddl.ForeignKeySpec( 

258 table="dataset", 

259 source=("dataset_id",), 

260 target=("dataset_id",), 

261 onDelete="CASCADE", 

262 ) 

263 ], 

264 unique=[("dataset_ref_hash", "collection")], 

265 ), 

266 run=ddl.TableSpec( 

267 doc="A table used to capture coarse provenance for all datasets.", 

268 fields=[ 

269 ddl.FieldSpec( 

270 name="id", 

271 dtype=sqlalchemy.BigInteger, 

272 primaryKey=True, 

273 autoincrement=True, 

274 doc="A unique autoincrement integer identifier for this run.", 

275 ), 

276 ddl.FieldSpec( 

277 name="name", 

278 dtype=sqlalchemy.String, 

279 length=128, 

280 doc="The name of the run.", 

281 ), 

282 ddl.FieldSpec( 

283 name="start_time", 

284 dtype=sqlalchemy.DateTime, 

285 nullable=True, 

286 doc="The start time for the run.", 

287 ), 

288 ddl.FieldSpec( 

289 name="end_time", 

290 dtype=sqlalchemy.DateTime, 

291 nullable=True, 

292 doc="The end time for the run.", 

293 ), 

294 ddl.FieldSpec( 

295 name="host", 

296 dtype=sqlalchemy.String, 

297 length=64, 

298 nullable=True, 

299 doc="The system on which the run was executed.", 

300 ), 

301 ], 

302 unique=[("name",)], 

303 ), 

304 quantum=ddl.TableSpec( 

305 doc="A table used to capture fine-grained provenance for datasets produced by PipelineTasks.", 

306 fields=[ 

307 ddl.FieldSpec( 

308 name="id", 

309 dtype=sqlalchemy.BigInteger, 

310 primaryKey=True, 

311 autoincrement=True, 

312 doc="A unique autoincrement integer identifier for this quantum.", 

313 ), 

314 ddl.FieldSpec( 

315 name="task", 

316 dtype=sqlalchemy.String, 

317 length=256, 

318 doc="Fully qualified name of the SuperTask that executed this quantum.", 

319 ), 

320 ddl.FieldSpec( 

321 name="run_id", 

322 dtype=sqlalchemy.BigInteger, 

323 doc="Link to the run this quantum is a part of.", 

324 ), 

325 ddl.FieldSpec( 

326 name="start_time", 

327 dtype=sqlalchemy.DateTime, 

328 nullable=True, 

329 doc="The start time for the quantum.", 

330 ), 

331 ddl.FieldSpec( 

332 name="end_time", 

333 dtype=sqlalchemy.DateTime, 

334 nullable=True, 

335 doc="The end time for the quantum.", 

336 ), 

337 ddl.FieldSpec( 

338 name="host", 

339 dtype=sqlalchemy.String, 

340 length=64, 

341 nullable=True, 

342 doc="The system on which the quantum was executed.", 

343 ), 

344 ], 

345 foreignKeys=[ 

346 ddl.ForeignKeySpec(table="run", source=("run_id",), target=("id",), onDelete="CASCADE") 

347 ], 

348 ), 

349 dataset_consumers=ddl.TableSpec( 

350 doc="A table relating Quantum records to the Datasets they used as inputs.", 

351 fields=[ 

352 ddl.FieldSpec( 

353 name="quantum_id", 

354 dtype=sqlalchemy.BigInteger, 

355 nullable=False, 

356 doc="A link to the associated Quantum.", 

357 ), 

358 ddl.FieldSpec( 

359 name="dataset_id", 

360 dtype=sqlalchemy.BigInteger, 

361 nullable=False, 

362 doc="A link to the associated Dataset.", 

363 ), 

364 ddl.FieldSpec( 

365 name="actual", 

366 dtype=sqlalchemy.Boolean, 

367 nullable=False, 

368 doc=( 

369 "Whether the Dataset was actually used as an input by the Quantum " 

370 "(as opposed to just predicted to be used during preflight)." 

371 ), 

372 ), 

373 ], 

374 foreignKeys=[ 

375 ddl.ForeignKeySpec( 

376 table="quantum", 

377 source=("quantum_id",), 

378 target=("id",), 

379 onDelete="CASCADE", 

380 ), 

381 ddl.ForeignKeySpec( 

382 table="dataset", 

383 source=("dataset_id",), 

384 target=("dataset_id",), 

385 onDelete="CASCADE", 

386 ), 

387 ], 

388 ), 

389 dataset_storage=ddl.TableSpec( 

390 doc=( 

391 "A table that provides information on whether a Dataset is stored in " 

392 "one or more Datastores. The presence or absence of a record in this " 

393 "table itself indicates whether the Dataset is present in that " 

394 "Datastore. " 

395 ), 

396 fields=[ 

397 ddl.FieldSpec( 

398 name="dataset_id", 

399 dtype=sqlalchemy.BigInteger, 

400 primaryKey=True, 

401 nullable=False, 

402 doc="Link to the dataset table.", 

403 ), 

404 ddl.FieldSpec( 

405 name="datastore_name", 

406 dtype=sqlalchemy.String, 

407 length=256, 

408 primaryKey=True, 

409 nullable=False, 

410 doc="Name of the Datastore this entry corresponds to.", 

411 ), 

412 ], 

413 foreignKeys=[ 

414 ddl.ForeignKeySpec( 

415 table="dataset", source=("dataset_id",), target=("dataset_id",) 

416 ) 

417 ], 

418 ), 

419 )