Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ( 

25 "addDatasetForeignKey", 

26 "makeCalibTableName", 

27 "makeCalibTableSpec", 

28 "makeStaticTableSpecs", 

29 "makeTagTableName", 

30 "makeTagTableSpec", 

31 "StaticDatasetTablesTuple", 

32) 

33 

34from typing import ( 

35 Any, 

36 List, 

37 Optional, 

38 Type, 

39 Union, 

40) 

41 

42from collections import namedtuple 

43 

44import sqlalchemy 

45 

46from lsst.daf.butler import ( 

47 DatasetType, 

48 ddl, 

49 DimensionUniverse, 

50 GovernorDimension, 

51) 

52from lsst.daf.butler import addDimensionForeignKey, TimespanDatabaseRepresentation 

53from lsst.daf.butler.registry.interfaces import CollectionManager 

54 

55 

56DATASET_TYPE_NAME_LENGTH = 128 

57 

58 

59StaticDatasetTablesTuple = namedtuple( 

60 "StaticDatasetTablesTuple", 

61 [ 

62 "dataset_type", 

63 "dataset", 

64 ] 

65) 

66 

67 

68def addDatasetForeignKey(tableSpec: ddl.TableSpec, dtype: type, *, 

69 name: str = "dataset", 

70 onDelete: Optional[str] = None, 

71 constraint: bool = True, 

72 **kwargs: Any) -> ddl.FieldSpec: 

73 """Add a foreign key column for datasets and (optionally) a constraint to 

74 a table. 

75 

76 This is an internal interface for the ``byDimensions`` package; external 

77 code should use `DatasetRecordStorageManager.addDatasetForeignKey` instead. 

78 

79 Parameters 

80 ---------- 

81 tableSpec : `ddl.TableSpec` 

82 Specification for the table that should reference the dataset 

83 table. Will be modified in place. 

84 dtype: `type` 

85 Type of the column, same as the column type of the PK column of 

86 a referenced table (``dataset.id``). 

87 name: `str`, optional 

88 A name to use for the prefix of the new field; the full name is 

89 ``{name}_id``. 

90 onDelete: `str`, optional 

91 One of "CASCADE" or "SET NULL", indicating what should happen to 

92 the referencing row if the collection row is deleted. `None` 

93 indicates that this should be an integrity error. 

94 constraint: `bool`, optional 

95 If `False` (`True` is default), add a field that can be joined to 

96 the dataset primary key, but do not add a foreign key constraint. 

97 **kwargs 

98 Additional keyword arguments are forwarded to the `ddl.FieldSpec` 

99 constructor (only the ``name`` and ``dtype`` arguments are 

100 otherwise provided). 

101 

102 Returns 

103 ------- 

104 idSpec : `ddl.FieldSpec` 

105 Specification for the ID field. 

106 """ 

107 idFieldSpec = ddl.FieldSpec(f"{name}_id", dtype=dtype, **kwargs) 

108 tableSpec.fields.add(idFieldSpec) 

109 if constraint: 

110 tableSpec.foreignKeys.append(ddl.ForeignKeySpec("dataset", source=(idFieldSpec.name,), 

111 target=("id",), onDelete=onDelete)) 

112 return idFieldSpec 

113 

114 

115def makeStaticTableSpecs(collections: Type[CollectionManager], 

116 universe: DimensionUniverse, 

117 dtype: type, 

118 autoincrement: bool, 

119 ) -> StaticDatasetTablesTuple: 

120 """Construct all static tables used by the classes in this package. 

121 

122 Static tables are those that are present in all Registries and do not 

123 depend on what DatasetTypes have been registered. 

124 

125 Parameters 

126 ---------- 

127 collections: `CollectionManager` 

128 Manager object for the collections in this `Registry`. 

129 universe : `DimensionUniverse` 

130 Universe graph containing all dimensions known to this `Registry`. 

131 dtype: `type` 

132 Type of the dataset ID (primary key) column. 

133 autoincrement: `bool` 

134 If `True` then dataset ID column will be auto-incrementing. 

135 

136 Returns 

137 ------- 

138 specs : `StaticDatasetTablesTuple` 

139 A named tuple containing `ddl.TableSpec` instances. 

140 """ 

141 specs = StaticDatasetTablesTuple( 

142 dataset_type=ddl.TableSpec( 

143 fields=[ 

144 ddl.FieldSpec( 

145 name="id", 

146 dtype=sqlalchemy.BigInteger, 

147 autoincrement=True, 

148 primaryKey=True, 

149 doc=( 

150 "Autoincrement ID that uniquely identifies a dataset " 

151 "type in other tables. Python code outside the " 

152 "`Registry` class should never interact with this; " 

153 "its existence is considered an implementation detail." 

154 ), 

155 ), 

156 ddl.FieldSpec( 

157 name="name", 

158 dtype=sqlalchemy.String, 

159 length=DATASET_TYPE_NAME_LENGTH, 

160 nullable=False, 

161 doc="String name that uniquely identifies a dataset type.", 

162 ), 

163 ddl.FieldSpec( 

164 name="storage_class", 

165 dtype=sqlalchemy.String, 

166 length=64, 

167 nullable=False, 

168 doc=( 

169 "Name of the storage class associated with all " 

170 "datasets of this type. Storage classes are " 

171 "generally associated with a Python class, and are " 

172 "enumerated in butler configuration." 

173 ) 

174 ), 

175 ddl.FieldSpec( 

176 name="dimensions_key", 

177 dtype=sqlalchemy.BigInteger, 

178 nullable=False, 

179 doc=( 

180 "Unique key for the set of dimensions that identifies " 

181 "datasets of this type." 

182 ), 

183 ), 

184 ddl.FieldSpec( 

185 name="tag_association_table", 

186 dtype=sqlalchemy.String, 

187 length=128, 

188 nullable=False, 

189 doc=( 

190 "Name of the table that holds associations between " 

191 "datasets of this type and most types of collections." 

192 ), 

193 ), 

194 ddl.FieldSpec( 

195 name="calibration_association_table", 

196 dtype=sqlalchemy.String, 

197 length=128, 

198 nullable=True, 

199 doc=( 

200 "Name of the table that holds associations between " 

201 "datasets of this type and CALIBRATION collections. " 

202 "NULL values indicate dataset types with " 

203 "isCalibration=False." 

204 ), 

205 ), 

206 ], 

207 unique=[("name",)], 

208 ), 

209 dataset=ddl.TableSpec( 

210 fields=[ 

211 ddl.FieldSpec( 

212 name="id", 

213 dtype=dtype, 

214 autoincrement=autoincrement, 

215 primaryKey=True, 

216 doc="A unique field used as the primary key for dataset.", 

217 ), 

218 ddl.FieldSpec( 

219 name="dataset_type_id", 

220 dtype=sqlalchemy.BigInteger, 

221 nullable=False, 

222 doc=( 

223 "Reference to the associated entry in the dataset_type " 

224 "table." 

225 ), 

226 ), 

227 ddl.FieldSpec( 

228 name="ingest_date", 

229 dtype=sqlalchemy.TIMESTAMP, 

230 default=sqlalchemy.sql.func.now(), 

231 nullable=False, 

232 doc="Time of dataset ingestion.", 

233 ), 

234 # Foreign key field/constraint to run added below. 

235 ], 

236 foreignKeys=[ 

237 ddl.ForeignKeySpec("dataset_type", source=("dataset_type_id",), target=("id",)), 

238 ] 

239 ), 

240 ) 

241 # Add foreign key fields programmatically. 

242 collections.addRunForeignKey(specs.dataset, onDelete="CASCADE", nullable=False) 

243 return specs 

244 

245 

246def makeTagTableName(datasetType: DatasetType, dimensionsKey: int) -> str: 

247 """Construct the name for a dynamic (DatasetType-dependent) tag table used 

248 by the classes in this package. 

249 

250 Parameters 

251 ---------- 

252 datasetType : `DatasetType` 

253 Dataset type to construct a name for. Multiple dataset types may 

254 share the same table. 

255 dimensionsKey : `int` 

256 Integer key used to save ``datasetType.dimensions`` to the database. 

257 

258 Returns 

259 ------- 

260 name : `str` 

261 Name for the table. 

262 """ 

263 return f"dataset_tags_{dimensionsKey:08d}" 

264 

265 

266def makeCalibTableName(datasetType: DatasetType, dimensionsKey: int) -> str: 

267 """Construct the name for a dynamic (DatasetType-dependent) tag + validity 

268 range table used by the classes in this package. 

269 

270 Parameters 

271 ---------- 

272 datasetType : `DatasetType` 

273 Dataset type to construct a name for. Multiple dataset types may 

274 share the same table. 

275 dimensionsKey : `int` 

276 Integer key used to save ``datasetType.dimensions`` to the database. 

277 

278 Returns 

279 ------- 

280 name : `str` 

281 Name for the table. 

282 """ 

283 assert datasetType.isCalibration() 

284 return f"dataset_calibs_{dimensionsKey:08d}" 

285 

286 

287def makeTagTableSpec(datasetType: DatasetType, collections: Type[CollectionManager], 

288 dtype: type, *, constraints: bool = True) -> ddl.TableSpec: 

289 """Construct the specification for a dynamic (DatasetType-dependent) tag 

290 table used by the classes in this package. 

291 

292 Parameters 

293 ---------- 

294 datasetType : `DatasetType` 

295 Dataset type to construct a spec for. Multiple dataset types may 

296 share the same table. 

297 collections : `type` [ `CollectionManager` ] 

298 `CollectionManager` subclass that can be used to construct foreign keys 

299 to the run and/or collection tables. 

300 dtype : `type` 

301 Type of the FK column, same as the column type of the PK column of 

302 a referenced table (``dataset.id``). 

303 constraints : `bool`, optional 

304 If `False` (`True` is default), do not define foreign key constraints. 

305 

306 Returns 

307 ------- 

308 spec : `ddl.TableSpec` 

309 Specification for the table. 

310 """ 

311 tableSpec = ddl.TableSpec( 

312 fields=[ 

313 # Foreign key fields to dataset, collection, and usually dimension 

314 # tables added below. 

315 # The dataset_type_id field here would be redundant with the one 

316 # in the main monolithic dataset table, but we need it here for an 

317 # important unique constraint. 

318 ddl.FieldSpec("dataset_type_id", dtype=sqlalchemy.BigInteger, nullable=False), 

319 ] 

320 ) 

321 if constraints: 

322 tableSpec.foreignKeys.append( 

323 ddl.ForeignKeySpec("dataset_type", source=("dataset_type_id",), target=("id",)) 

324 ) 

325 # We'll also have a unique constraint on dataset type, collection, and data 

326 # ID. We only include the required part of the data ID, as that's 

327 # sufficient and saves us from worrying about nulls in the constraint. 

328 constraint = ["dataset_type_id"] 

329 # Add foreign key fields to dataset table (part of the primary key) 

330 addDatasetForeignKey(tableSpec, dtype, primaryKey=True, onDelete="CASCADE", constraint=constraints) 

331 # Add foreign key fields to collection table (part of the primary key and 

332 # the data ID unique constraint). 

333 collectionFieldSpec = collections.addCollectionForeignKey(tableSpec, primaryKey=True, onDelete="CASCADE", 

334 constraint=constraints) 

335 constraint.append(collectionFieldSpec.name) 

336 # Add foreign key constraint to the collection_summary_dataset_type table. 

337 if constraints: 

338 tableSpec.foreignKeys.append( 

339 ddl.ForeignKeySpec( 

340 "collection_summary_dataset_type", 

341 source=(collectionFieldSpec.name, "dataset_type_id"), 

342 target=(collectionFieldSpec.name, "dataset_type_id"), 

343 ) 

344 ) 

345 for dimension in datasetType.dimensions.required: 

346 fieldSpec = addDimensionForeignKey(tableSpec, dimension=dimension, nullable=False, primaryKey=False, 

347 constraint=constraints) 

348 constraint.append(fieldSpec.name) 

349 # If this is a governor dimension, add a foreign key constraint to the 

350 # collection_summary_<dimension> table. 

351 if isinstance(dimension, GovernorDimension) and constraints: 

352 tableSpec.foreignKeys.append( 

353 ddl.ForeignKeySpec( 

354 f"collection_summary_{dimension.name}", 

355 source=(collectionFieldSpec.name, fieldSpec.name), 

356 target=(collectionFieldSpec.name, fieldSpec.name), 

357 ) 

358 ) 

359 # Actually add the unique constraint. 

360 tableSpec.unique.add(tuple(constraint)) 

361 return tableSpec 

362 

363 

364def makeCalibTableSpec(datasetType: DatasetType, collections: Type[CollectionManager], 

365 TimespanReprClass: Type[TimespanDatabaseRepresentation], 

366 dtype: type) -> ddl.TableSpec: 

367 """Construct the specification for a dynamic (DatasetType-dependent) tag + 

368 validity range table used by the classes in this package. 

369 

370 Parameters 

371 ---------- 

372 datasetType : `DatasetType` 

373 Dataset type to construct a spec for. Multiple dataset types may 

374 share the same table. 

375 collections : `type` [ `CollectionManager` ] 

376 `CollectionManager` subclass that can be used to construct foreign keys 

377 to the run and/or collection tables. 

378 dtype: `type` 

379 Type of the FK column, same as the column type of the PK column of 

380 a referenced table (``dataset.id``). 

381 

382 Returns 

383 ------- 

384 spec : `ddl.TableSpec` 

385 Specification for the table. 

386 """ 

387 tableSpec = ddl.TableSpec( 

388 fields=[ 

389 # This table has no natural primary key, compound or otherwise, so 

390 # we add an autoincrement key. We may use this field a bit 

391 # internally, but its presence is an implementation detail and it 

392 # shouldn't appear as a foreign key in any other tables. 

393 ddl.FieldSpec("id", dtype=sqlalchemy.BigInteger, autoincrement=True, primaryKey=True), 

394 # Foreign key fields to dataset, collection, and usually dimension 

395 # tables added below. The dataset_type_id field here is redundant 

396 # with the one in the main monolithic dataset table, but this bit 

397 # of denormalization lets us define what should be a much more 

398 # useful index. 

399 ddl.FieldSpec("dataset_type_id", dtype=sqlalchemy.BigInteger, nullable=False), 

400 ], 

401 foreignKeys=[ 

402 ddl.ForeignKeySpec("dataset_type", source=("dataset_type_id",), target=("id",)), 

403 ] 

404 ) 

405 # Record fields that should go in the temporal lookup index/constraint, 

406 # starting with the dataset type. 

407 index: List[Union[str, Type[TimespanDatabaseRepresentation]]] = ["dataset_type_id"] 

408 # Add foreign key fields to dataset table (not part of the temporal 

409 # lookup/constraint). 

410 addDatasetForeignKey(tableSpec, dtype, nullable=False, onDelete="CASCADE") 

411 # Add foreign key fields to collection table (part of the temporal lookup 

412 # index/constraint). 

413 collectionFieldSpec = collections.addCollectionForeignKey(tableSpec, nullable=False, onDelete="CASCADE") 

414 index.append(collectionFieldSpec.name) 

415 # Add foreign key constraint to the collection_summary_dataset_type table. 

416 tableSpec.foreignKeys.append( 

417 ddl.ForeignKeySpec( 

418 "collection_summary_dataset_type", 

419 source=(collectionFieldSpec.name, "dataset_type_id"), 

420 target=(collectionFieldSpec.name, "dataset_type_id"), 

421 ) 

422 ) 

423 # Add dimension fields (part of the temporal lookup index.constraint). 

424 for dimension in datasetType.dimensions.required: 

425 fieldSpec = addDimensionForeignKey(tableSpec, dimension=dimension, nullable=False, primaryKey=False) 

426 index.append(fieldSpec.name) 

427 # If this is a governor dimension, add a foreign key constraint to the 

428 # collection_summary_<dimension> table. 

429 if isinstance(dimension, GovernorDimension): 

430 tableSpec.foreignKeys.append( 

431 ddl.ForeignKeySpec( 

432 f"collection_summary_{dimension.name}", 

433 source=(collectionFieldSpec.name, fieldSpec.name), 

434 target=(collectionFieldSpec.name, fieldSpec.name), 

435 ) 

436 ) 

437 # Add validity-range field(s) (part of the temporal lookup 

438 # index/constraint). 

439 tsFieldSpecs = TimespanReprClass.makeFieldSpecs(nullable=False) 

440 for fieldSpec in tsFieldSpecs: 

441 tableSpec.fields.add(fieldSpec) 

442 if TimespanReprClass.hasExclusionConstraint(): 442 ↛ 447line 442 didn't jump to line 447, because the condition on line 442 was never true

443 # This database's timespan representation can define a database-level 

444 # constraint that prevents overlapping validity ranges for entries with 

445 # the same DatasetType, collection, and data ID. 

446 # This also creates an index. 

447 index.append(TimespanReprClass) 

448 tableSpec.exclusion.add(tuple(index)) 

449 else: 

450 # No database-level constraint possible. We'll have to simulate that 

451 # in our DatasetRecordStorage.certify() implementation, and just create 

452 # a regular index here in the hope that helps with lookups. 

453 index.extend(fieldSpec.name for fieldSpec in tsFieldSpecs) 

454 tableSpec.indexes.add(tuple(index)) # type: ignore 

455 return tableSpec