Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software=you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ["RegistryTablesTuple", "makeRegistryTableSpecs"] 

24 

25from collections import namedtuple 

26 

27import sqlalchemy 

28 

29from ..core.dimensions import DimensionUniverse 

30from ..core.dimensions.schema import addDimensionForeignKey 

31 

32from ..core import ddl 

33 

34from .interfaces import CollectionManager 

35 

36 

37RegistryTablesTuple = namedtuple( 

38 "RegistryTablesTuple", 

39 [ 

40 "dataset", 

41 "dataset_composition", 

42 "dataset_type", 

43 "dataset_type_dimensions", 

44 "dataset_collection", 

45 "quantum", 

46 "dataset_consumers", 

47 "dataset_location", 

48 "dataset_location_trash", 

49 ] 

50) 

51 

52 

53def makeRegistryTableSpecs(universe: DimensionUniverse, collections: CollectionManager 

54 ) -> RegistryTablesTuple: 

55 """Construct descriptions of all tables in the Registry, aside from those 

56 that correspond to `DimensionElement` instances. 

57 

58 Parameters 

59 ---------- 

60 universe : `DimensionUniverse` 

61 All dimensions known to the `Registry`. 

62 collections : `Collection` 

63 The `CollectionManager` that will be used for this `Registry`; used to 

64 create foreign keys to the run and collection tables. 

65 

66 Returns 

67 ------- 

68 specs : `RegistryTablesTuple` 

69 A named tuple containing `ddl.TableSpec` instances. 

70 """ 

71 # The 'dataset' table is special: we need to add foreign key fields for 

72 # each dimension in the universe, as well as a foreign key field for run. 

73 dataset = ddl.TableSpec( 

74 fields=[ 

75 ddl.FieldSpec( 

76 name="dataset_id", 

77 dtype=sqlalchemy.BigInteger, 

78 primaryKey=True, 

79 autoincrement=True, 

80 doc="A unique autoincrement field used as the primary key for dataset.", 

81 ), 

82 ddl.FieldSpec( 

83 name="dataset_type_name", 

84 dtype=sqlalchemy.String, 

85 length=128, 

86 nullable=False, 

87 doc=( 

88 "The name of the DatasetType associated with this dataset; a " 

89 "reference to the dataset_type table." 

90 ), 

91 ), 

92 ddl.FieldSpec( 

93 name="quantum_id", 

94 dtype=sqlalchemy.BigInteger, 

95 doc=( 

96 "The id of the quantum that produced this dataset, providing access " 

97 "to fine-grained provenance information. May be null for datasets " 

98 "not produced by running a PipelineTask." 

99 ), 

100 ), 

101 ddl.FieldSpec( 

102 name="dataset_ref_hash", 

103 dtype=ddl.Base64Bytes, 

104 nbytes=32, 

105 nullable=False, 

106 doc="Secure hash of the data ID (i.e. dimension link values) and dataset_type_name.", 

107 ), 

108 ], 

109 foreignKeys=[ 

110 ddl.ForeignKeySpec( 

111 table="dataset_type", 

112 source=("dataset_type_name",), 

113 target=("dataset_type_name",), 

114 ), 

115 ddl.ForeignKeySpec( 

116 table="quantum", 

117 source=("quantum_id",), 

118 target=("id",), 

119 onDelete="SET NULL", 

120 ), 

121 ], 

122 ) 

123 field = collections.addRunForeignKey(dataset, onDelete="CASCADE", nullable=False) 

124 dataset.unique.add(("dataset_ref_hash", field.name)) 

125 for dimension in universe.dimensions: 

126 addDimensionForeignKey(dataset, dimension, primaryKey=False, nullable=True) 

127 

128 # The dataset_collection table needs a foreign key to collection. 

129 dataset_collection = ddl.TableSpec( 

130 doc=( 

131 "A table that associates Dataset records with Collections, " 

132 "which are implemented simply as string tags." 

133 ), 

134 fields=[ 

135 ddl.FieldSpec( 

136 name="dataset_id", 

137 dtype=sqlalchemy.BigInteger, 

138 primaryKey=True, 

139 nullable=False, 

140 doc="Link to a unique record in the dataset table.", 

141 ), 

142 ddl.FieldSpec( 

143 name="dataset_ref_hash", 

144 dtype=ddl.Base64Bytes, 

145 nbytes=32, 

146 nullable=False, 

147 doc="Secure hash of the data ID (i.e. dimension link values) and dataset_type_name.", 

148 ), 

149 ], 

150 foreignKeys=[ 

151 ddl.ForeignKeySpec( 

152 table="dataset", 

153 source=("dataset_id",), 

154 target=("dataset_id",), 

155 onDelete="CASCADE", 

156 ) 

157 ], 

158 ) 

159 field = collections.addCollectionForeignKey(dataset_collection, onDelete="CASCADE", nullable=False) 

160 dataset_collection.unique.add(("dataset_ref_hash", field.name)) 

161 

162 # The quantum table needs a foreign key to run. 

163 quantum = ddl.TableSpec( 

164 doc="A table used to capture fine-grained provenance for datasets produced by PipelineTasks.", 

165 fields=[ 

166 ddl.FieldSpec( 

167 name="id", 

168 dtype=sqlalchemy.BigInteger, 

169 primaryKey=True, 

170 autoincrement=True, 

171 doc="A unique autoincrement integer identifier for this quantum.", 

172 ), 

173 ddl.FieldSpec( 

174 name="task", 

175 dtype=sqlalchemy.String, 

176 length=256, 

177 doc="Fully qualified name of the SuperTask that executed this quantum.", 

178 ), 

179 ddl.FieldSpec( 

180 name="start_time", 

181 dtype=ddl.AstropyTimeNsecTai, 

182 nullable=True, 

183 doc="The start time for the quantum.", 

184 ), 

185 ddl.FieldSpec( 

186 name="end_time", 

187 dtype=ddl.AstropyTimeNsecTai, 

188 nullable=True, 

189 doc="The end time for the quantum.", 

190 ), 

191 ddl.FieldSpec( 

192 name="host", 

193 dtype=sqlalchemy.String, 

194 length=64, 

195 nullable=True, 

196 doc="The system on which the quantum was executed.", 

197 ), 

198 ], 

199 ) 

200 collections.addRunForeignKey(quantum, onDelete="CASCADE", nullable=False) 

201 

202 # We want the dataset_location and dataset_location_trash tables 

203 # to have the same definition 

204 dataset_location_spec = dict( 

205 doc=( 

206 "A table that provides information on whether a Dataset is stored in " 

207 "one or more Datastores. The presence or absence of a record in this " 

208 "table itself indicates whether the Dataset is present in that " 

209 "Datastore. " 

210 ), 

211 fields=[ 

212 ddl.FieldSpec( 

213 name="dataset_id", 

214 dtype=sqlalchemy.BigInteger, 

215 primaryKey=True, 

216 nullable=False, 

217 doc="Link to the dataset table.", 

218 ), 

219 ddl.FieldSpec( 

220 name="datastore_name", 

221 dtype=sqlalchemy.String, 

222 length=256, 

223 primaryKey=True, 

224 nullable=False, 

225 doc="Name of the Datastore this entry corresponds to.", 

226 ), 

227 ], 

228 ) 

229 

230 dataset_location = ddl.TableSpec(**dataset_location_spec, 

231 foreignKeys=[ 

232 ddl.ForeignKeySpec( 

233 table="dataset", source=("dataset_id",), target=("dataset_id",) 

234 ) 

235 ]) 

236 

237 dataset_location_trash = ddl.TableSpec(**dataset_location_spec) 

238 

239 # All other table specs are fully static and do not depend on 

240 # configuration. 

241 return RegistryTablesTuple( 

242 dataset=dataset, 

243 dataset_composition=ddl.TableSpec( 

244 doc="A self-join table that relates components of a dataset to their parents.", 

245 fields=[ 

246 ddl.FieldSpec( 

247 name="parent_dataset_id", 

248 dtype=sqlalchemy.BigInteger, 

249 primaryKey=True, 

250 doc="Link to the dataset entry for the parent/composite dataset.", 

251 ), 

252 ddl.FieldSpec( 

253 name="component_dataset_id", 

254 dtype=sqlalchemy.BigInteger, 

255 primaryKey=True, 

256 doc="Link to the dataset entry for a child/component dataset.", 

257 ), 

258 ddl.FieldSpec( 

259 name="component_name", 

260 dtype=sqlalchemy.String, 

261 length=32, 

262 nullable=False, 

263 doc="Name of this component within this composite.", 

264 ), 

265 ], 

266 foreignKeys=[ 

267 ddl.ForeignKeySpec( 

268 table="dataset", 

269 source=("parent_dataset_id",), 

270 target=("dataset_id",), 

271 onDelete="CASCADE", 

272 ), 

273 ddl.ForeignKeySpec( 

274 table="dataset", 

275 source=("component_dataset_id",), 

276 target=("dataset_id",), 

277 onDelete="CASCADE", 

278 ), 

279 ], 

280 ), 

281 dataset_type=ddl.TableSpec( 

282 doc="A Table containing the set of registered DatasetTypes and their StorageClasses.", 

283 fields=[ 

284 ddl.FieldSpec( 

285 name="dataset_type_name", 

286 dtype=sqlalchemy.String, 

287 length=128, 

288 primaryKey=True, 

289 nullable=False, 

290 doc="Globally unique name for this DatasetType.", 

291 ), 

292 ddl.FieldSpec( 

293 name="storage_class", 

294 dtype=sqlalchemy.String, 

295 length=64, 

296 nullable=False, 

297 doc=( 

298 "Name of the StorageClass associated with this DatasetType. All " 

299 "registries must support the full set of standard StorageClasses, " 

300 "so the set of allowed StorageClasses and their properties is " 

301 "maintained in the registry Python code rather than the database." 

302 ), 

303 ), 

304 ], 

305 ), 

306 dataset_type_dimensions=ddl.TableSpec( 

307 doc=( 

308 "A definition table indicating which dimension fields in Dataset are " 

309 "non-NULL for Datasets with this DatasetType." 

310 ), 

311 fields=[ 

312 ddl.FieldSpec( 

313 name="dataset_type_name", 

314 dtype=sqlalchemy.String, 

315 length=128, 

316 primaryKey=True, 

317 doc="The name of the DatasetType.", 

318 ), 

319 ddl.FieldSpec( 

320 name="dimension_name", 

321 dtype=sqlalchemy.String, 

322 length=32, 

323 primaryKey=True, 

324 doc="The name of a Dimension associated with this DatasetType.", 

325 ), 

326 ], 

327 foreignKeys=[ 

328 ddl.ForeignKeySpec( 

329 table="dataset_type", 

330 source=("dataset_type_name",), 

331 target=("dataset_type_name",), 

332 ) 

333 ], 

334 ), 

335 dataset_collection=dataset_collection, 

336 quantum=quantum, 

337 dataset_consumers=ddl.TableSpec( 

338 doc="A table relating Quantum records to the Datasets they used as inputs.", 

339 fields=[ 

340 ddl.FieldSpec( 

341 name="quantum_id", 

342 dtype=sqlalchemy.BigInteger, 

343 nullable=False, 

344 doc="A link to the associated Quantum.", 

345 ), 

346 ddl.FieldSpec( 

347 name="dataset_id", 

348 dtype=sqlalchemy.BigInteger, 

349 nullable=False, 

350 doc="A link to the associated Dataset.", 

351 ), 

352 ddl.FieldSpec( 

353 name="actual", 

354 dtype=sqlalchemy.Boolean, 

355 nullable=False, 

356 doc=( 

357 "Whether the Dataset was actually used as an input by the Quantum " 

358 "(as opposed to just predicted to be used during preflight)." 

359 ), 

360 ), 

361 ], 

362 foreignKeys=[ 

363 ddl.ForeignKeySpec( 

364 table="quantum", 

365 source=("quantum_id",), 

366 target=("id",), 

367 onDelete="CASCADE", 

368 ), 

369 ddl.ForeignKeySpec( 

370 table="dataset", 

371 source=("dataset_id",), 

372 target=("dataset_id",), 

373 onDelete="CASCADE", 

374 ), 

375 ], 

376 ), 

377 dataset_location=dataset_location, 

378 dataset_location_trash=dataset_location_trash, 

379 )