Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ( 

25 "addDatasetForeignKey", 

26 "makeCalibTableName", 

27 "makeCalibTableSpec", 

28 "makeStaticTableSpecs", 

29 "makeTagTableName", 

30 "makeTagTableSpec", 

31 "StaticDatasetTablesTuple", 

32) 

33 

34from typing import ( 

35 Any, 

36 List, 

37 Optional, 

38 Type, 

39 Union, 

40) 

41 

42from collections import namedtuple 

43 

44import sqlalchemy 

45 

46from lsst.daf.butler import ( 

47 DatasetType, 

48 ddl, 

49 DimensionUniverse, 

50) 

51from lsst.daf.butler import addDimensionForeignKey, DatabaseTimespanRepresentation 

52from lsst.daf.butler.registry.interfaces import CollectionManager 

53 

54 

55DATASET_TYPE_NAME_LENGTH = 128 

56 

57 

58StaticDatasetTablesTuple = namedtuple( 

59 "StaticDatasetTablesTuple", 

60 [ 

61 "dataset_type", 

62 "dataset", 

63 ] 

64) 

65 

66 

67def addDatasetForeignKey(tableSpec: ddl.TableSpec, *, 

68 name: str = "dataset", 

69 onDelete: Optional[str] = None, 

70 constraint: bool = True, 

71 **kwargs: Any) -> ddl.FieldSpec: 

72 """Add a foreign key column for datasets and (optionally) a constraint to 

73 a table. 

74 

75 This is an internal interface for the ``byDimensions`` package; external 

76 code should use `DatasetRecordStorageManager.addDatasetForeignKey` instead. 

77 

78 Parameters 

79 ---------- 

80 tableSpec : `ddl.TableSpec` 

81 Specification for the table that should reference the dataset 

82 table. Will be modified in place. 

83 name: `str`, optional 

84 A name to use for the prefix of the new field; the full name is 

85 ``{name}_id``. 

86 onDelete: `str`, optional 

87 One of "CASCADE" or "SET NULL", indicating what should happen to 

88 the referencing row if the collection row is deleted. `None` 

89 indicates that this should be an integrity error. 

90 constraint: `bool`, optional 

91 If `False` (`True` is default), add a field that can be joined to 

92 the dataset primary key, but do not add a foreign key constraint. 

93 **kwargs 

94 Additional keyword arguments are forwarded to the `ddl.FieldSpec` 

95 constructor (only the ``name`` and ``dtype`` arguments are 

96 otherwise provided). 

97 

98 Returns 

99 ------- 

100 idSpec : `ddl.FieldSpec` 

101 Specification for the ID field. 

102 """ 

103 idFieldSpec = ddl.FieldSpec(f"{name}_id", dtype=sqlalchemy.BigInteger, **kwargs) 

104 tableSpec.fields.add(idFieldSpec) 

105 if constraint: 

106 tableSpec.foreignKeys.append(ddl.ForeignKeySpec("dataset", source=(idFieldSpec.name,), 

107 target=("id",), onDelete=onDelete)) 

108 return idFieldSpec 

109 

110 

111def makeStaticTableSpecs(collections: Type[CollectionManager], 

112 universe: DimensionUniverse, 

113 ) -> StaticDatasetTablesTuple: 

114 """Construct all static tables used by the classes in this package. 

115 

116 Static tables are those that are present in all Registries and do not 

117 depend on what DatasetTypes have been registered. 

118 

119 Parameters 

120 ---------- 

121 collections: `CollectionManager` 

122 Manager object for the collections in this `Registry`. 

123 universe : `DimensionUniverse` 

124 Universe graph containing all dimensions known to this `Registry`. 

125 

126 Returns 

127 ------- 

128 specs : `StaticDatasetTablesTuple` 

129 A named tuple containing `ddl.TableSpec` instances. 

130 """ 

131 specs = StaticDatasetTablesTuple( 

132 dataset_type=ddl.TableSpec( 

133 fields=[ 

134 ddl.FieldSpec( 

135 name="id", 

136 dtype=sqlalchemy.BigInteger, 

137 autoincrement=True, 

138 primaryKey=True, 

139 doc=( 

140 "Autoincrement ID that uniquely identifies a dataset " 

141 "type in other tables. Python code outside the " 

142 "`Registry` class should never interact with this; " 

143 "its existence is considered an implementation detail." 

144 ), 

145 ), 

146 ddl.FieldSpec( 

147 name="name", 

148 dtype=sqlalchemy.String, 

149 length=DATASET_TYPE_NAME_LENGTH, 

150 nullable=False, 

151 doc="String name that uniquely identifies a dataset type.", 

152 ), 

153 ddl.FieldSpec( 

154 name="storage_class", 

155 dtype=sqlalchemy.String, 

156 length=64, 

157 nullable=False, 

158 doc=( 

159 "Name of the storage class associated with all " 

160 "datasets of this type. Storage classes are " 

161 "generally associated with a Python class, and are " 

162 "enumerated in butler configuration." 

163 ) 

164 ), 

165 ddl.FieldSpec( 

166 name="dimensions_encoded", 

167 dtype=ddl.Base64Bytes, 

168 nbytes=universe.getEncodeLength(), 

169 nullable=False, 

170 doc=( 

171 "An opaque (but reversible) encoding of the set of " 

172 "dimensions used to identify dataset of this type." 

173 ), 

174 ), 

175 ddl.FieldSpec( 

176 name="tag_association_table", 

177 dtype=sqlalchemy.String, 

178 length=128, 

179 nullable=False, 

180 doc=( 

181 "Name of the table that holds associations between " 

182 "datasets of this type and most types of collections." 

183 ), 

184 ), 

185 ddl.FieldSpec( 

186 name="calibration_association_table", 

187 dtype=sqlalchemy.String, 

188 length=128, 

189 nullable=True, 

190 doc=( 

191 "Name of the table that holds associations between " 

192 "datasets of this type and CALIBRATION collections. " 

193 "NULL values indicate dataset types with " 

194 "isCalibration=False." 

195 ), 

196 ), 

197 ], 

198 unique=[("name",)], 

199 ), 

200 dataset=ddl.TableSpec( 

201 fields=[ 

202 ddl.FieldSpec( 

203 name="id", 

204 dtype=sqlalchemy.BigInteger, 

205 autoincrement=True, 

206 primaryKey=True, 

207 doc="A unique autoincrement field used as the primary key for dataset.", 

208 ), 

209 ddl.FieldSpec( 

210 name="dataset_type_id", 

211 dtype=sqlalchemy.BigInteger, 

212 nullable=False, 

213 doc=( 

214 "Reference to the associated entry in the dataset_type " 

215 "table." 

216 ), 

217 ), 

218 # Foreign key field/constraint to run added below. 

219 ], 

220 foreignKeys=[ 

221 ddl.ForeignKeySpec("dataset_type", source=("dataset_type_id",), target=("id",)), 

222 ] 

223 ), 

224 ) 

225 # Add foreign key fields programmatically. 

226 collections.addRunForeignKey(specs.dataset, onDelete="CASCADE", nullable=False) 

227 return specs 

228 

229 

230def makeTagTableName(datasetType: DatasetType) -> str: 

231 """Construct the name for a dynamic (DatasetType-dependent) tag table used 

232 by the classes in this package. 

233 

234 Parameters 

235 ---------- 

236 datasetType : `DatasetType` 

237 Dataset type to construct a name for. Multiple dataset types may 

238 share the same table. 

239 

240 Returns 

241 ------- 

242 name : `str` 

243 Name for the table. 

244 """ 

245 return f"dataset_tags_{datasetType.dimensions.encode().hex()}" 

246 

247 

248def makeCalibTableName(datasetType: DatasetType) -> str: 

249 """Construct the name for a dynamic (DatasetType-dependent) tag + validity 

250 range table used by the classes in this package. 

251 

252 Parameters 

253 ---------- 

254 datasetType : `DatasetType` 

255 Dataset type to construct a name for. Multiple dataset types may 

256 share the same table. 

257 

258 Returns 

259 ------- 

260 name : `str` 

261 Name for the table. 

262 """ 

263 assert datasetType.isCalibration() 

264 return f"dataset_calibs_{datasetType.dimensions.encode().hex()}" 

265 

266 

267def makeTagTableSpec(datasetType: DatasetType, collections: Type[CollectionManager]) -> ddl.TableSpec: 

268 """Construct the specification for a dynamic (DatasetType-dependent) tag 

269 table used by the classes in this package. 

270 

271 Parameters 

272 ---------- 

273 datasetType : `DatasetType` 

274 Dataset type to construct a spec for. Multiple dataset types may 

275 share the same table. 

276 collections : `type` [ `CollectionManager` ] 

277 `CollectionManager` subclass that can be used to construct foreign keys 

278 to the run and/or collection tables. 

279 

280 Returns 

281 ------- 

282 spec : `ddl.TableSpec` 

283 Specification for the table. 

284 """ 

285 tableSpec = ddl.TableSpec( 

286 fields=[ 

287 # Foreign key fields to dataset, collection, and usually dimension 

288 # tables added below. 

289 # The dataset_type_id field here would be redundant with the one 

290 # in the main monolithic dataset table, but we need it here for an 

291 # important unique constraint. 

292 ddl.FieldSpec("dataset_type_id", dtype=sqlalchemy.BigInteger, nullable=False), 

293 ], 

294 foreignKeys=[ 

295 ddl.ForeignKeySpec("dataset_type", source=("dataset_type_id",), target=("id",)), 

296 ] 

297 ) 

298 # We'll also have a unique constraint on dataset type, collection, and data 

299 # ID. We only include the required part of the data ID, as that's 

300 # sufficient and saves us from worrying about nulls in the constraint. 

301 constraint = ["dataset_type_id"] 

302 # Add foreign key fields to dataset table (part of the primary key) 

303 addDatasetForeignKey(tableSpec, primaryKey=True, onDelete="CASCADE") 

304 # Add foreign key fields to collection table (part of the primary key and 

305 # the data ID unique constraint). 

306 fieldSpec = collections.addCollectionForeignKey(tableSpec, primaryKey=True, onDelete="CASCADE") 

307 constraint.append(fieldSpec.name) 

308 for dimension in datasetType.dimensions.required: 

309 fieldSpec = addDimensionForeignKey(tableSpec, dimension=dimension, nullable=False, primaryKey=False) 

310 constraint.append(fieldSpec.name) 

311 # Actually add the unique constraint. 

312 tableSpec.unique.add(tuple(constraint)) 

313 return tableSpec 

314 

315 

316def makeCalibTableSpec(datasetType: DatasetType, collections: Type[CollectionManager], 

317 tsRepr: Type[DatabaseTimespanRepresentation]) -> ddl.TableSpec: 

318 """Construct the specification for a dynamic (DatasetType-dependent) tag + 

319 validity range table used by the classes in this package. 

320 

321 Parameters 

322 ---------- 

323 datasetType : `DatasetType` 

324 Dataset type to construct a spec for. Multiple dataset types may 

325 share the same table. 

326 collections : `type` [ `CollectionManager` ] 

327 `CollectionManager` subclass that can be used to construct foreign keys 

328 to the run and/or collection tables. 

329 

330 Returns 

331 ------- 

332 spec : `ddl.TableSpec` 

333 Specification for the table. 

334 """ 

335 tableSpec = ddl.TableSpec( 

336 fields=[ 

337 # This table has no natural primary key, compound or otherwise, so 

338 # we add an autoincrement key. We may use this field a bit 

339 # internally, but its presence is an implementation detail and it 

340 # shouldn't appear as a foreign key in any other tables. 

341 ddl.FieldSpec("id", dtype=sqlalchemy.BigInteger, autoincrement=True, primaryKey=True), 

342 # Foreign key fields to dataset, collection, and usually dimension 

343 # tables added below. The dataset_type_id field here is redundant 

344 # with the one in the main monolithic dataset table, but this bit 

345 # of denormalization lets us define what should be a much more 

346 # useful index. 

347 ddl.FieldSpec("dataset_type_id", dtype=sqlalchemy.BigInteger, nullable=False), 

348 ], 

349 foreignKeys=[ 

350 ddl.ForeignKeySpec("dataset_type", source=("dataset_type_id",), target=("id",)), 

351 ] 

352 ) 

353 # Record fields that should go in the temporal lookup index/constraint, 

354 # starting with the dataset type. 

355 index: List[Union[str, Type[DatabaseTimespanRepresentation]]] = ["dataset_type_id"] 

356 # Add foreign key fields to dataset table (not part of the temporal 

357 # lookup/constraint). 

358 addDatasetForeignKey(tableSpec, nullable=False, onDelete="CASCADE") 

359 # Add foreign key fields to collection table (part of the temporal lookup 

360 # index/constraint). 

361 fieldSpec = collections.addCollectionForeignKey(tableSpec, nullable=False, onDelete="CASCADE") 

362 index.append(fieldSpec.name) 

363 # Add dimension fields (part of the temporal lookup index.constraint). 

364 for dimension in datasetType.dimensions.required: 

365 fieldSpec = addDimensionForeignKey(tableSpec, dimension=dimension, nullable=False, primaryKey=False) 

366 index.append(fieldSpec.name) 

367 # Add validity-range field(s) (part of the temporal lookup 

368 # index/constraint). 

369 tsFieldSpecs = tsRepr.makeFieldSpecs(nullable=False) 

370 for fieldSpec in tsFieldSpecs: 

371 tableSpec.fields.add(fieldSpec) 

372 if tsRepr.hasExclusionConstraint(): 372 ↛ 377line 372 didn't jump to line 377, because the condition on line 372 was never true

373 # This database's timespan representation can define a database-level 

374 # constraint that prevents overlapping validity ranges for entries with 

375 # the same DatasetType, collection, and data ID. 

376 # This also creates an index. 

377 index.append(tsRepr) 

378 tableSpec.exclusion.add(tuple(index)) 

379 else: 

380 # No database-level constraint possible. We'll have to simulate that 

381 # in our DatasetRecordStorage.certify() implementation, and just create 

382 # a regular index here in the hope that helps with lookups. 

383 index.extend(fieldSpec.name for fieldSpec in tsFieldSpecs) 

384 tableSpec.indexes.add(tuple(index)) # type: ignore 

385 return tableSpec