Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software=you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ["RegistryTablesTuple", "makeRegistryTableSpecs"] 

24 

25from collections import namedtuple 

26 

27import sqlalchemy 

28 

29from ..core.dimensions import DimensionUniverse 

30from ..core.dimensions.schema import addDimensionForeignKey 

31 

32from ..core import ddl 

33 

34from .interfaces import CollectionManager 

35 

36 

37RegistryTablesTuple = namedtuple( 

38 "RegistryTablesTuple", 

39 [ 

40 "dataset", 

41 "dataset_composition", 

42 "dataset_type", 

43 "dataset_type_dimensions", 

44 "dataset_collection", 

45 "quantum", 

46 "dataset_consumers", 

47 "dataset_location", 

48 "dataset_location_trash", 

49 ] 

50) 

51 

52 

53def makeRegistryTableSpecs(universe: DimensionUniverse, collections: CollectionManager 

54 ) -> RegistryTablesTuple: 

55 """Construct descriptions of all tables in the Registry, aside from those 

56 that correspond to `DimensionElement` instances. 

57 

58 Parameters 

59 ---------- 

60 universe : `DimensionUniverse` 

61 All dimensions known to the `Registry`. 

62 collections : `Collection` 

63 The `CollectionManager` that will be used for this `Registry`; used to 

64 create foreign keys to the run and collection tables. 

65 

66 Returns 

67 ------- 

68 specs : `RegistryTablesTuple` 

69 A named tuple containing `ddl.TableSpec` instances. 

70 """ 

71 # The 'dataset' table is special: we need to add foreign key fields for 

72 # each dimension in the universe, as well as a foreign key field for run. 

73 dataset = ddl.TableSpec( 

74 fields=[ 

75 ddl.FieldSpec( 

76 name="dataset_id", 

77 dtype=sqlalchemy.BigInteger, 

78 primaryKey=True, 

79 autoincrement=True, 

80 doc="A unique autoincrement field used as the primary key for dataset.", 

81 ), 

82 ddl.FieldSpec( 

83 name="dataset_type_name", 

84 dtype=sqlalchemy.String, 

85 length=128, 

86 nullable=False, 

87 doc=( 

88 "The name of the DatasetType associated with this dataset; a " 

89 "reference to the dataset_type table." 

90 ), 

91 ), 

92 ddl.FieldSpec( 

93 name="quantum_id", 

94 dtype=sqlalchemy.BigInteger, 

95 doc=( 

96 "The id of the quantum that produced this dataset, providing access " 

97 "to fine-grained provenance information. May be null for datasets " 

98 "not produced by running a PipelineTask." 

99 ), 

100 ), 

101 ddl.FieldSpec( 

102 name="dataset_ref_hash", 

103 dtype=ddl.Base64Bytes, 

104 nbytes=32, 

105 nullable=False, 

106 doc="Secure hash of the data ID (i.e. dimension link values) and dataset_type_name.", 

107 ), 

108 ], 

109 foreignKeys=[ 

110 ddl.ForeignKeySpec( 

111 table="dataset_type", 

112 source=("dataset_type_name",), 

113 target=("dataset_type_name",), 

114 ), 

115 ddl.ForeignKeySpec( 

116 table="quantum", 

117 source=("quantum_id",), 

118 target=("id",), 

119 onDelete="SET NULL", 

120 ), 

121 ], 

122 recycleIds=False 

123 ) 

124 field = collections.addRunForeignKey(dataset, onDelete="CASCADE", nullable=False) 

125 dataset.unique.add(("dataset_ref_hash", field.name)) 

126 for dimension in universe.dimensions: 

127 addDimensionForeignKey(dataset, dimension, primaryKey=False, nullable=True) 

128 

129 # The dataset_collection table needs a foreign key to collection. 

130 dataset_collection = ddl.TableSpec( 

131 doc=( 

132 "A table that associates Dataset records with Collections, " 

133 "which are implemented simply as string tags." 

134 ), 

135 fields=[ 

136 ddl.FieldSpec( 

137 name="dataset_id", 

138 dtype=sqlalchemy.BigInteger, 

139 primaryKey=True, 

140 nullable=False, 

141 doc="Link to a unique record in the dataset table.", 

142 ), 

143 ddl.FieldSpec( 

144 name="dataset_ref_hash", 

145 dtype=ddl.Base64Bytes, 

146 nbytes=32, 

147 nullable=False, 

148 doc="Secure hash of the data ID (i.e. dimension link values) and dataset_type_name.", 

149 ), 

150 ], 

151 foreignKeys=[ 

152 ddl.ForeignKeySpec( 

153 table="dataset", 

154 source=("dataset_id",), 

155 target=("dataset_id",), 

156 onDelete="CASCADE", 

157 ) 

158 ], 

159 ) 

160 field = collections.addCollectionForeignKey(dataset_collection, onDelete="CASCADE", nullable=False) 

161 dataset_collection.unique.add(("dataset_ref_hash", field.name)) 

162 

163 # The quantum table needs a foreign key to run. 

164 quantum = ddl.TableSpec( 

165 doc="A table used to capture fine-grained provenance for datasets produced by PipelineTasks.", 

166 fields=[ 

167 ddl.FieldSpec( 

168 name="id", 

169 dtype=sqlalchemy.BigInteger, 

170 primaryKey=True, 

171 autoincrement=True, 

172 doc="A unique autoincrement integer identifier for this quantum.", 

173 ), 

174 ddl.FieldSpec( 

175 name="task", 

176 dtype=sqlalchemy.String, 

177 length=256, 

178 doc="Fully qualified name of the SuperTask that executed this quantum.", 

179 ), 

180 ddl.FieldSpec( 

181 name="start_time", 

182 dtype=ddl.AstropyTimeNsecTai, 

183 nullable=True, 

184 doc="The start time for the quantum.", 

185 ), 

186 ddl.FieldSpec( 

187 name="end_time", 

188 dtype=ddl.AstropyTimeNsecTai, 

189 nullable=True, 

190 doc="The end time for the quantum.", 

191 ), 

192 ddl.FieldSpec( 

193 name="host", 

194 dtype=sqlalchemy.String, 

195 length=64, 

196 nullable=True, 

197 doc="The system on which the quantum was executed.", 

198 ), 

199 ], 

200 ) 

201 collections.addRunForeignKey(quantum, onDelete="CASCADE", nullable=False) 

202 

203 # We want the dataset_location and dataset_location_trash tables 

204 # to have the same definition 

205 dataset_location_spec = dict( 

206 doc=( 

207 "A table that provides information on whether a Dataset is stored in " 

208 "one or more Datastores. The presence or absence of a record in this " 

209 "table itself indicates whether the Dataset is present in that " 

210 "Datastore. " 

211 ), 

212 fields=[ 

213 ddl.FieldSpec( 

214 name="dataset_id", 

215 dtype=sqlalchemy.BigInteger, 

216 primaryKey=True, 

217 nullable=False, 

218 doc="Link to the dataset table.", 

219 ), 

220 ddl.FieldSpec( 

221 name="datastore_name", 

222 dtype=sqlalchemy.String, 

223 length=256, 

224 primaryKey=True, 

225 nullable=False, 

226 doc="Name of the Datastore this entry corresponds to.", 

227 ), 

228 ], 

229 ) 

230 

231 dataset_location = ddl.TableSpec(**dataset_location_spec, 

232 foreignKeys=[ 

233 ddl.ForeignKeySpec( 

234 table="dataset", source=("dataset_id",), target=("dataset_id",) 

235 ) 

236 ]) 

237 

238 dataset_location_trash = ddl.TableSpec(**dataset_location_spec) 

239 

240 # All other table specs are fully static and do not depend on 

241 # configuration. 

242 return RegistryTablesTuple( 

243 dataset=dataset, 

244 dataset_composition=ddl.TableSpec( 

245 doc="A self-join table that relates components of a dataset to their parents.", 

246 fields=[ 

247 ddl.FieldSpec( 

248 name="parent_dataset_id", 

249 dtype=sqlalchemy.BigInteger, 

250 primaryKey=True, 

251 doc="Link to the dataset entry for the parent/composite dataset.", 

252 ), 

253 ddl.FieldSpec( 

254 name="component_dataset_id", 

255 dtype=sqlalchemy.BigInteger, 

256 doc="Link to the dataset entry for a child/component dataset.", 

257 ), 

258 ddl.FieldSpec( 

259 name="component_name", 

260 dtype=sqlalchemy.String, 

261 length=32, 

262 primaryKey=True, 

263 doc="Name of this component within this composite.", 

264 ), 

265 ], 

266 foreignKeys=[ 

267 ddl.ForeignKeySpec( 

268 table="dataset", 

269 source=("parent_dataset_id",), 

270 target=("dataset_id",), 

271 onDelete="CASCADE", 

272 ), 

273 ddl.ForeignKeySpec( 

274 table="dataset", 

275 source=("component_dataset_id",), 

276 target=("dataset_id",), 

277 onDelete="CASCADE", 

278 ), 

279 ], 

280 ), 

281 dataset_type=ddl.TableSpec( 

282 doc="A Table containing the set of registered DatasetTypes and their StorageClasses.", 

283 fields=[ 

284 ddl.FieldSpec( 

285 name="dataset_type_name", 

286 dtype=sqlalchemy.String, 

287 length=128, 

288 primaryKey=True, 

289 nullable=False, 

290 doc="Globally unique name for this DatasetType.", 

291 ), 

292 ddl.FieldSpec( 

293 name="storage_class", 

294 dtype=sqlalchemy.String, 

295 length=64, 

296 nullable=False, 

297 doc=( 

298 "Name of the StorageClass associated with this DatasetType. All " 

299 "registries must support the full set of standard StorageClasses, " 

300 "so the set of allowed StorageClasses and their properties is " 

301 "maintained in the registry Python code rather than the database." 

302 ), 

303 ), 

304 ], 

305 ), 

306 dataset_type_dimensions=ddl.TableSpec( 

307 doc=( 

308 "A definition table indicating which dimension fields in Dataset are " 

309 "non-NULL for Datasets with this DatasetType." 

310 ), 

311 fields=[ 

312 ddl.FieldSpec( 

313 name="dataset_type_name", 

314 dtype=sqlalchemy.String, 

315 length=128, 

316 primaryKey=True, 

317 doc="The name of the DatasetType.", 

318 ), 

319 ddl.FieldSpec( 

320 name="dimension_name", 

321 dtype=sqlalchemy.String, 

322 length=32, 

323 primaryKey=True, 

324 doc="The name of a Dimension associated with this DatasetType.", 

325 ), 

326 ], 

327 foreignKeys=[ 

328 ddl.ForeignKeySpec( 

329 table="dataset_type", 

330 source=("dataset_type_name",), 

331 target=("dataset_type_name",), 

332 ) 

333 ], 

334 ), 

335 dataset_collection=dataset_collection, 

336 quantum=quantum, 

337 dataset_consumers=ddl.TableSpec( 

338 doc="A table relating Quantum records to the Datasets they used as inputs.", 

339 fields=[ 

340 ddl.FieldSpec( 

341 name="quantum_id", 

342 dtype=sqlalchemy.BigInteger, 

343 nullable=False, 

344 doc="A link to the associated Quantum.", 

345 ), 

346 ddl.FieldSpec( 

347 name="dataset_id", 

348 dtype=sqlalchemy.BigInteger, 

349 nullable=True, 

350 doc="A link to the associated dataset; null if the dataset has been deleted.", 

351 ), 

352 ddl.FieldSpec( 

353 name="actual", 

354 dtype=sqlalchemy.Boolean, 

355 nullable=False, 

356 doc=( 

357 "Whether the Dataset was actually used as an input by the Quantum " 

358 "(as opposed to just predicted to be used during preflight)." 

359 ), 

360 ), 

361 ], 

362 foreignKeys=[ 

363 ddl.ForeignKeySpec( 

364 table="quantum", 

365 source=("quantum_id",), 

366 target=("id",), 

367 onDelete="CASCADE", 

368 ), 

369 ddl.ForeignKeySpec( 

370 table="dataset", 

371 source=("dataset_id",), 

372 target=("dataset_id",), 

373 onDelete="SET NULL", 

374 ), 

375 ], 

376 ), 

377 dataset_location=dataset_location, 

378 dataset_location_trash=dataset_location_trash, 

379 )