Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ( 

25 "addDatasetForeignKey", 

26 "makeCalibTableName", 

27 "makeCalibTableSpec", 

28 "makeStaticTableSpecs", 

29 "makeTagTableName", 

30 "makeTagTableSpec", 

31 "StaticDatasetTablesTuple", 

32) 

33 

34from typing import ( 

35 Any, 

36 List, 

37 Optional, 

38 Type, 

39 Union, 

40) 

41 

42from collections import namedtuple 

43 

44import sqlalchemy 

45 

46from lsst.daf.butler import ( 

47 DatasetType, 

48 ddl, 

49 DimensionUniverse, 

50 GovernorDimension, 

51) 

52from lsst.daf.butler import addDimensionForeignKey, TimespanDatabaseRepresentation 

53from lsst.daf.butler.registry.interfaces import CollectionManager 

54 

55 

56DATASET_TYPE_NAME_LENGTH = 128 

57 

58 

59StaticDatasetTablesTuple = namedtuple( 

60 "StaticDatasetTablesTuple", 

61 [ 

62 "dataset_type", 

63 "dataset", 

64 ] 

65) 

66 

67 

68def addDatasetForeignKey(tableSpec: ddl.TableSpec, *, 

69 name: str = "dataset", 

70 onDelete: Optional[str] = None, 

71 constraint: bool = True, 

72 **kwargs: Any) -> ddl.FieldSpec: 

73 """Add a foreign key column for datasets and (optionally) a constraint to 

74 a table. 

75 

76 This is an internal interface for the ``byDimensions`` package; external 

77 code should use `DatasetRecordStorageManager.addDatasetForeignKey` instead. 

78 

79 Parameters 

80 ---------- 

81 tableSpec : `ddl.TableSpec` 

82 Specification for the table that should reference the dataset 

83 table. Will be modified in place. 

84 name: `str`, optional 

85 A name to use for the prefix of the new field; the full name is 

86 ``{name}_id``. 

87 onDelete: `str`, optional 

88 One of "CASCADE" or "SET NULL", indicating what should happen to 

89 the referencing row if the collection row is deleted. `None` 

90 indicates that this should be an integrity error. 

91 constraint: `bool`, optional 

92 If `False` (`True` is default), add a field that can be joined to 

93 the dataset primary key, but do not add a foreign key constraint. 

94 **kwargs 

95 Additional keyword arguments are forwarded to the `ddl.FieldSpec` 

96 constructor (only the ``name`` and ``dtype`` arguments are 

97 otherwise provided). 

98 

99 Returns 

100 ------- 

101 idSpec : `ddl.FieldSpec` 

102 Specification for the ID field. 

103 """ 

104 idFieldSpec = ddl.FieldSpec(f"{name}_id", dtype=sqlalchemy.BigInteger, **kwargs) 

105 tableSpec.fields.add(idFieldSpec) 

106 if constraint: 

107 tableSpec.foreignKeys.append(ddl.ForeignKeySpec("dataset", source=(idFieldSpec.name,), 

108 target=("id",), onDelete=onDelete)) 

109 return idFieldSpec 

110 

111 

112def makeStaticTableSpecs(collections: Type[CollectionManager], 

113 universe: DimensionUniverse, 

114 ) -> StaticDatasetTablesTuple: 

115 """Construct all static tables used by the classes in this package. 

116 

117 Static tables are those that are present in all Registries and do not 

118 depend on what DatasetTypes have been registered. 

119 

120 Parameters 

121 ---------- 

122 collections: `CollectionManager` 

123 Manager object for the collections in this `Registry`. 

124 universe : `DimensionUniverse` 

125 Universe graph containing all dimensions known to this `Registry`. 

126 

127 Returns 

128 ------- 

129 specs : `StaticDatasetTablesTuple` 

130 A named tuple containing `ddl.TableSpec` instances. 

131 """ 

132 specs = StaticDatasetTablesTuple( 

133 dataset_type=ddl.TableSpec( 

134 fields=[ 

135 ddl.FieldSpec( 

136 name="id", 

137 dtype=sqlalchemy.BigInteger, 

138 autoincrement=True, 

139 primaryKey=True, 

140 doc=( 

141 "Autoincrement ID that uniquely identifies a dataset " 

142 "type in other tables. Python code outside the " 

143 "`Registry` class should never interact with this; " 

144 "its existence is considered an implementation detail." 

145 ), 

146 ), 

147 ddl.FieldSpec( 

148 name="name", 

149 dtype=sqlalchemy.String, 

150 length=DATASET_TYPE_NAME_LENGTH, 

151 nullable=False, 

152 doc="String name that uniquely identifies a dataset type.", 

153 ), 

154 ddl.FieldSpec( 

155 name="storage_class", 

156 dtype=sqlalchemy.String, 

157 length=64, 

158 nullable=False, 

159 doc=( 

160 "Name of the storage class associated with all " 

161 "datasets of this type. Storage classes are " 

162 "generally associated with a Python class, and are " 

163 "enumerated in butler configuration." 

164 ) 

165 ), 

166 ddl.FieldSpec( 

167 name="dimensions_key", 

168 dtype=sqlalchemy.BigInteger, 

169 nullable=False, 

170 doc=( 

171 "Unique key for the set of dimensions that identifies " 

172 "datasets of this type." 

173 ), 

174 ), 

175 ddl.FieldSpec( 

176 name="tag_association_table", 

177 dtype=sqlalchemy.String, 

178 length=128, 

179 nullable=False, 

180 doc=( 

181 "Name of the table that holds associations between " 

182 "datasets of this type and most types of collections." 

183 ), 

184 ), 

185 ddl.FieldSpec( 

186 name="calibration_association_table", 

187 dtype=sqlalchemy.String, 

188 length=128, 

189 nullable=True, 

190 doc=( 

191 "Name of the table that holds associations between " 

192 "datasets of this type and CALIBRATION collections. " 

193 "NULL values indicate dataset types with " 

194 "isCalibration=False." 

195 ), 

196 ), 

197 ], 

198 unique=[("name",)], 

199 ), 

200 dataset=ddl.TableSpec( 

201 fields=[ 

202 ddl.FieldSpec( 

203 name="id", 

204 dtype=sqlalchemy.BigInteger, 

205 autoincrement=True, 

206 primaryKey=True, 

207 doc="A unique autoincrement field used as the primary key for dataset.", 

208 ), 

209 ddl.FieldSpec( 

210 name="dataset_type_id", 

211 dtype=sqlalchemy.BigInteger, 

212 nullable=False, 

213 doc=( 

214 "Reference to the associated entry in the dataset_type " 

215 "table." 

216 ), 

217 ), 

218 ddl.FieldSpec( 

219 name="ingest_date", 

220 dtype=sqlalchemy.TIMESTAMP, 

221 default=sqlalchemy.sql.func.now(), 

222 nullable=False, 

223 doc="Time of dataset ingestion.", 

224 ), 

225 # Foreign key field/constraint to run added below. 

226 ], 

227 foreignKeys=[ 

228 ddl.ForeignKeySpec("dataset_type", source=("dataset_type_id",), target=("id",)), 

229 ] 

230 ), 

231 ) 

232 # Add foreign key fields programmatically. 

233 collections.addRunForeignKey(specs.dataset, onDelete="CASCADE", nullable=False) 

234 return specs 

235 

236 

237def makeTagTableName(datasetType: DatasetType, dimensionsKey: int) -> str: 

238 """Construct the name for a dynamic (DatasetType-dependent) tag table used 

239 by the classes in this package. 

240 

241 Parameters 

242 ---------- 

243 datasetType : `DatasetType` 

244 Dataset type to construct a name for. Multiple dataset types may 

245 share the same table. 

246 dimensionsKey : `int` 

247 Integer key used to save ``datasetType.dimensions`` to the database. 

248 

249 Returns 

250 ------- 

251 name : `str` 

252 Name for the table. 

253 """ 

254 return f"dataset_tags_{dimensionsKey:08d}" 

255 

256 

257def makeCalibTableName(datasetType: DatasetType, dimensionsKey: int) -> str: 

258 """Construct the name for a dynamic (DatasetType-dependent) tag + validity 

259 range table used by the classes in this package. 

260 

261 Parameters 

262 ---------- 

263 datasetType : `DatasetType` 

264 Dataset type to construct a name for. Multiple dataset types may 

265 share the same table. 

266 dimensionsKey : `int` 

267 Integer key used to save ``datasetType.dimensions`` to the database. 

268 

269 Returns 

270 ------- 

271 name : `str` 

272 Name for the table. 

273 """ 

274 assert datasetType.isCalibration() 

275 return f"dataset_calibs_{dimensionsKey:08d}" 

276 

277 

278def makeTagTableSpec(datasetType: DatasetType, collections: Type[CollectionManager]) -> ddl.TableSpec: 

279 """Construct the specification for a dynamic (DatasetType-dependent) tag 

280 table used by the classes in this package. 

281 

282 Parameters 

283 ---------- 

284 datasetType : `DatasetType` 

285 Dataset type to construct a spec for. Multiple dataset types may 

286 share the same table. 

287 collections : `type` [ `CollectionManager` ] 

288 `CollectionManager` subclass that can be used to construct foreign keys 

289 to the run and/or collection tables. 

290 

291 Returns 

292 ------- 

293 spec : `ddl.TableSpec` 

294 Specification for the table. 

295 """ 

296 tableSpec = ddl.TableSpec( 

297 fields=[ 

298 # Foreign key fields to dataset, collection, and usually dimension 

299 # tables added below. 

300 # The dataset_type_id field here would be redundant with the one 

301 # in the main monolithic dataset table, but we need it here for an 

302 # important unique constraint. 

303 ddl.FieldSpec("dataset_type_id", dtype=sqlalchemy.BigInteger, nullable=False), 

304 ], 

305 foreignKeys=[ 

306 ddl.ForeignKeySpec("dataset_type", source=("dataset_type_id",), target=("id",)), 

307 ] 

308 ) 

309 # We'll also have a unique constraint on dataset type, collection, and data 

310 # ID. We only include the required part of the data ID, as that's 

311 # sufficient and saves us from worrying about nulls in the constraint. 

312 constraint = ["dataset_type_id"] 

313 # Add foreign key fields to dataset table (part of the primary key) 

314 addDatasetForeignKey(tableSpec, primaryKey=True, onDelete="CASCADE") 

315 # Add foreign key fields to collection table (part of the primary key and 

316 # the data ID unique constraint). 

317 collectionFieldSpec = collections.addCollectionForeignKey(tableSpec, primaryKey=True, onDelete="CASCADE") 

318 constraint.append(collectionFieldSpec.name) 

319 # Add foreign key constraint to the collection_summary_dataset_type table. 

320 tableSpec.foreignKeys.append( 

321 ddl.ForeignKeySpec( 

322 "collection_summary_dataset_type", 

323 source=(collectionFieldSpec.name, "dataset_type_id"), 

324 target=(collectionFieldSpec.name, "dataset_type_id"), 

325 ) 

326 ) 

327 for dimension in datasetType.dimensions.required: 

328 fieldSpec = addDimensionForeignKey(tableSpec, dimension=dimension, nullable=False, primaryKey=False) 

329 constraint.append(fieldSpec.name) 

330 # If this is a governor dimension, add a foreign key constraint to the 

331 # collection_summary_<dimension> table. 

332 if isinstance(dimension, GovernorDimension): 

333 tableSpec.foreignKeys.append( 

334 ddl.ForeignKeySpec( 

335 f"collection_summary_{dimension.name}", 

336 source=(collectionFieldSpec.name, fieldSpec.name), 

337 target=(collectionFieldSpec.name, fieldSpec.name), 

338 ) 

339 ) 

340 # Actually add the unique constraint. 

341 tableSpec.unique.add(tuple(constraint)) 

342 return tableSpec 

343 

344 

345def makeCalibTableSpec(datasetType: DatasetType, collections: Type[CollectionManager], 

346 TimespanReprClass: Type[TimespanDatabaseRepresentation]) -> ddl.TableSpec: 

347 """Construct the specification for a dynamic (DatasetType-dependent) tag + 

348 validity range table used by the classes in this package. 

349 

350 Parameters 

351 ---------- 

352 datasetType : `DatasetType` 

353 Dataset type to construct a spec for. Multiple dataset types may 

354 share the same table. 

355 collections : `type` [ `CollectionManager` ] 

356 `CollectionManager` subclass that can be used to construct foreign keys 

357 to the run and/or collection tables. 

358 

359 Returns 

360 ------- 

361 spec : `ddl.TableSpec` 

362 Specification for the table. 

363 """ 

364 tableSpec = ddl.TableSpec( 

365 fields=[ 

366 # This table has no natural primary key, compound or otherwise, so 

367 # we add an autoincrement key. We may use this field a bit 

368 # internally, but its presence is an implementation detail and it 

369 # shouldn't appear as a foreign key in any other tables. 

370 ddl.FieldSpec("id", dtype=sqlalchemy.BigInteger, autoincrement=True, primaryKey=True), 

371 # Foreign key fields to dataset, collection, and usually dimension 

372 # tables added below. The dataset_type_id field here is redundant 

373 # with the one in the main monolithic dataset table, but this bit 

374 # of denormalization lets us define what should be a much more 

375 # useful index. 

376 ddl.FieldSpec("dataset_type_id", dtype=sqlalchemy.BigInteger, nullable=False), 

377 ], 

378 foreignKeys=[ 

379 ddl.ForeignKeySpec("dataset_type", source=("dataset_type_id",), target=("id",)), 

380 ] 

381 ) 

382 # Record fields that should go in the temporal lookup index/constraint, 

383 # starting with the dataset type. 

384 index: List[Union[str, Type[TimespanDatabaseRepresentation]]] = ["dataset_type_id"] 

385 # Add foreign key fields to dataset table (not part of the temporal 

386 # lookup/constraint). 

387 addDatasetForeignKey(tableSpec, nullable=False, onDelete="CASCADE") 

388 # Add foreign key fields to collection table (part of the temporal lookup 

389 # index/constraint). 

390 collectionFieldSpec = collections.addCollectionForeignKey(tableSpec, nullable=False, onDelete="CASCADE") 

391 index.append(collectionFieldSpec.name) 

392 # Add foreign key constraint to the collection_summary_dataset_type table. 

393 tableSpec.foreignKeys.append( 

394 ddl.ForeignKeySpec( 

395 "collection_summary_dataset_type", 

396 source=(collectionFieldSpec.name, "dataset_type_id"), 

397 target=(collectionFieldSpec.name, "dataset_type_id"), 

398 ) 

399 ) 

400 # Add dimension fields (part of the temporal lookup index.constraint). 

401 for dimension in datasetType.dimensions.required: 

402 fieldSpec = addDimensionForeignKey(tableSpec, dimension=dimension, nullable=False, primaryKey=False) 

403 index.append(fieldSpec.name) 

404 # If this is a governor dimension, add a foreign key constraint to the 

405 # collection_summary_<dimension> table. 

406 if isinstance(dimension, GovernorDimension): 

407 tableSpec.foreignKeys.append( 

408 ddl.ForeignKeySpec( 

409 f"collection_summary_{dimension.name}", 

410 source=(collectionFieldSpec.name, fieldSpec.name), 

411 target=(collectionFieldSpec.name, fieldSpec.name), 

412 ) 

413 ) 

414 # Add validity-range field(s) (part of the temporal lookup 

415 # index/constraint). 

416 tsFieldSpecs = TimespanReprClass.makeFieldSpecs(nullable=False) 

417 for fieldSpec in tsFieldSpecs: 

418 tableSpec.fields.add(fieldSpec) 

419 if TimespanReprClass.hasExclusionConstraint(): 419 ↛ 424line 419 didn't jump to line 424, because the condition on line 419 was never true

420 # This database's timespan representation can define a database-level 

421 # constraint that prevents overlapping validity ranges for entries with 

422 # the same DatasetType, collection, and data ID. 

423 # This also creates an index. 

424 index.append(TimespanReprClass) 

425 tableSpec.exclusion.add(tuple(index)) 

426 else: 

427 # No database-level constraint possible. We'll have to simulate that 

428 # in our DatasetRecordStorage.certify() implementation, and just create 

429 # a regular index here in the hope that helps with lookups. 

430 index.extend(fieldSpec.name for fieldSpec in tsFieldSpecs) 

431 tableSpec.indexes.add(tuple(index)) # type: ignore 

432 return tableSpec