Coverage for python/lsst/daf/butler/registry/datasets/byDimensions/tables.py: 96%

60 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2022-12-01 19:54 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ( 

25 "addDatasetForeignKey", 

26 "makeCalibTableName", 

27 "makeCalibTableSpec", 

28 "makeStaticTableSpecs", 

29 "makeTagTableName", 

30 "makeTagTableSpec", 

31 "StaticDatasetTablesTuple", 

32) 

33 

34from typing import ( 

35 Any, 

36 List, 

37 Optional, 

38 Type, 

39 Union, 

40) 

41 

42from collections import namedtuple 

43 

44import sqlalchemy 

45 

46from lsst.daf.butler import ( 

47 DatasetType, 

48 ddl, 

49 DimensionUniverse, 

50 GovernorDimension, 

51) 

52from lsst.daf.butler import addDimensionForeignKey, TimespanDatabaseRepresentation 

53from lsst.daf.butler.registry.interfaces import CollectionManager 

54 

55 

56DATASET_TYPE_NAME_LENGTH = 128 

57 

58 

59StaticDatasetTablesTuple = namedtuple( 

60 "StaticDatasetTablesTuple", 

61 [ 

62 "dataset_type", 

63 "dataset", 

64 ] 

65) 

66 

67 

68def addDatasetForeignKey(tableSpec: ddl.TableSpec, dtype: type, *, 

69 name: str = "dataset", 

70 onDelete: Optional[str] = None, 

71 constraint: bool = True, 

72 **kwargs: Any) -> ddl.FieldSpec: 

73 """Add a foreign key column for datasets and (optionally) a constraint to 

74 a table. 

75 

76 This is an internal interface for the ``byDimensions`` package; external 

77 code should use `DatasetRecordStorageManager.addDatasetForeignKey` instead. 

78 

79 Parameters 

80 ---------- 

81 tableSpec : `ddl.TableSpec` 

82 Specification for the table that should reference the dataset 

83 table. Will be modified in place. 

84 dtype: `type` 

85 Type of the column, same as the column type of the PK column of 

86 a referenced table (``dataset.id``). 

87 name: `str`, optional 

88 A name to use for the prefix of the new field; the full name is 

89 ``{name}_id``. 

90 onDelete: `str`, optional 

91 One of "CASCADE" or "SET NULL", indicating what should happen to 

92 the referencing row if the collection row is deleted. `None` 

93 indicates that this should be an integrity error. 

94 constraint: `bool`, optional 

95 If `False` (`True` is default), add a field that can be joined to 

96 the dataset primary key, but do not add a foreign key constraint. 

97 **kwargs 

98 Additional keyword arguments are forwarded to the `ddl.FieldSpec` 

99 constructor (only the ``name`` and ``dtype`` arguments are 

100 otherwise provided). 

101 

102 Returns 

103 ------- 

104 idSpec : `ddl.FieldSpec` 

105 Specification for the ID field. 

106 """ 

107 idFieldSpec = ddl.FieldSpec(f"{name}_id", dtype=dtype, **kwargs) 

108 tableSpec.fields.add(idFieldSpec) 

109 if constraint: 

110 tableSpec.foreignKeys.append(ddl.ForeignKeySpec("dataset", source=(idFieldSpec.name,), 

111 target=("id",), onDelete=onDelete)) 

112 return idFieldSpec 

113 

114 

115def makeStaticTableSpecs(collections: Type[CollectionManager], 

116 universe: DimensionUniverse, 

117 dtype: type, 

118 autoincrement: bool, 

119 ) -> StaticDatasetTablesTuple: 

120 """Construct all static tables used by the classes in this package. 

121 

122 Static tables are those that are present in all Registries and do not 

123 depend on what DatasetTypes have been registered. 

124 

125 Parameters 

126 ---------- 

127 collections: `CollectionManager` 

128 Manager object for the collections in this `Registry`. 

129 universe : `DimensionUniverse` 

130 Universe graph containing all dimensions known to this `Registry`. 

131 dtype: `type` 

132 Type of the dataset ID (primary key) column. 

133 autoincrement: `bool` 

134 If `True` then dataset ID column will be auto-incrementing. 

135 

136 Returns 

137 ------- 

138 specs : `StaticDatasetTablesTuple` 

139 A named tuple containing `ddl.TableSpec` instances. 

140 """ 

141 specs = StaticDatasetTablesTuple( 

142 dataset_type=ddl.TableSpec( 

143 fields=[ 

144 ddl.FieldSpec( 

145 name="id", 

146 dtype=sqlalchemy.BigInteger, 

147 autoincrement=True, 

148 primaryKey=True, 

149 doc=( 

150 "Autoincrement ID that uniquely identifies a dataset " 

151 "type in other tables. Python code outside the " 

152 "`Registry` class should never interact with this; " 

153 "its existence is considered an implementation detail." 

154 ), 

155 ), 

156 ddl.FieldSpec( 

157 name="name", 

158 dtype=sqlalchemy.String, 

159 length=DATASET_TYPE_NAME_LENGTH, 

160 nullable=False, 

161 doc="String name that uniquely identifies a dataset type.", 

162 ), 

163 ddl.FieldSpec( 

164 name="storage_class", 

165 dtype=sqlalchemy.String, 

166 length=64, 

167 nullable=False, 

168 doc=( 

169 "Name of the storage class associated with all " 

170 "datasets of this type. Storage classes are " 

171 "generally associated with a Python class, and are " 

172 "enumerated in butler configuration." 

173 ) 

174 ), 

175 ddl.FieldSpec( 

176 name="dimensions_key", 

177 dtype=sqlalchemy.BigInteger, 

178 nullable=False, 

179 doc=( 

180 "Unique key for the set of dimensions that identifies " 

181 "datasets of this type." 

182 ), 

183 ), 

184 ddl.FieldSpec( 

185 name="tag_association_table", 

186 dtype=sqlalchemy.String, 

187 length=128, 

188 nullable=False, 

189 doc=( 

190 "Name of the table that holds associations between " 

191 "datasets of this type and most types of collections." 

192 ), 

193 ), 

194 ddl.FieldSpec( 

195 name="calibration_association_table", 

196 dtype=sqlalchemy.String, 

197 length=128, 

198 nullable=True, 

199 doc=( 

200 "Name of the table that holds associations between " 

201 "datasets of this type and CALIBRATION collections. " 

202 "NULL values indicate dataset types with " 

203 "isCalibration=False." 

204 ), 

205 ), 

206 ], 

207 unique=[("name",)], 

208 ), 

209 dataset=ddl.TableSpec( 

210 fields=[ 

211 ddl.FieldSpec( 

212 name="id", 

213 dtype=dtype, 

214 autoincrement=autoincrement, 

215 primaryKey=True, 

216 doc="A unique field used as the primary key for dataset.", 

217 ), 

218 ddl.FieldSpec( 

219 name="dataset_type_id", 

220 dtype=sqlalchemy.BigInteger, 

221 nullable=False, 

222 doc=( 

223 "Reference to the associated entry in the dataset_type " 

224 "table." 

225 ), 

226 ), 

227 ddl.FieldSpec( 

228 name="ingest_date", 

229 dtype=sqlalchemy.TIMESTAMP, 

230 default=sqlalchemy.sql.func.now(), 

231 nullable=False, 

232 doc="Time of dataset ingestion.", 

233 ), 

234 # Foreign key field/constraint to run added below. 

235 ], 

236 foreignKeys=[ 

237 ddl.ForeignKeySpec("dataset_type", source=("dataset_type_id",), target=("id",)), 

238 ] 

239 ), 

240 ) 

241 # Add foreign key fields programmatically. 

242 collections.addRunForeignKey(specs.dataset, onDelete="CASCADE", nullable=False) 

243 return specs 

244 

245 

246def makeTagTableName(datasetType: DatasetType, dimensionsKey: int) -> str: 

247 """Construct the name for a dynamic (DatasetType-dependent) tag table used 

248 by the classes in this package. 

249 

250 Parameters 

251 ---------- 

252 datasetType : `DatasetType` 

253 Dataset type to construct a name for. Multiple dataset types may 

254 share the same table. 

255 dimensionsKey : `int` 

256 Integer key used to save ``datasetType.dimensions`` to the database. 

257 

258 Returns 

259 ------- 

260 name : `str` 

261 Name for the table. 

262 """ 

263 return f"dataset_tags_{dimensionsKey:08d}" 

264 

265 

266def makeCalibTableName(datasetType: DatasetType, dimensionsKey: int) -> str: 

267 """Construct the name for a dynamic (DatasetType-dependent) tag + validity 

268 range table used by the classes in this package. 

269 

270 Parameters 

271 ---------- 

272 datasetType : `DatasetType` 

273 Dataset type to construct a name for. Multiple dataset types may 

274 share the same table. 

275 dimensionsKey : `int` 

276 Integer key used to save ``datasetType.dimensions`` to the database. 

277 

278 Returns 

279 ------- 

280 name : `str` 

281 Name for the table. 

282 """ 

283 assert datasetType.isCalibration() 

284 return f"dataset_calibs_{dimensionsKey:08d}" 

285 

286 

287def makeTagTableSpec(datasetType: DatasetType, collections: Type[CollectionManager], 

288 dtype: type) -> ddl.TableSpec: 

289 """Construct the specification for a dynamic (DatasetType-dependent) tag 

290 table used by the classes in this package. 

291 

292 Parameters 

293 ---------- 

294 datasetType : `DatasetType` 

295 Dataset type to construct a spec for. Multiple dataset types may 

296 share the same table. 

297 collections : `type` [ `CollectionManager` ] 

298 `CollectionManager` subclass that can be used to construct foreign keys 

299 to the run and/or collection tables. 

300 dtype: `type` 

301 Type of the FK column, same as the column type of the PK column of 

302 a referenced table (``dataset.id``). 

303 

304 Returns 

305 ------- 

306 spec : `ddl.TableSpec` 

307 Specification for the table. 

308 """ 

309 tableSpec = ddl.TableSpec( 

310 fields=[ 

311 # Foreign key fields to dataset, collection, and usually dimension 

312 # tables added below. 

313 # The dataset_type_id field here would be redundant with the one 

314 # in the main monolithic dataset table, but we need it here for an 

315 # important unique constraint. 

316 ddl.FieldSpec("dataset_type_id", dtype=sqlalchemy.BigInteger, nullable=False), 

317 ], 

318 foreignKeys=[ 

319 ddl.ForeignKeySpec("dataset_type", source=("dataset_type_id",), target=("id",)), 

320 ] 

321 ) 

322 # We'll also have a unique constraint on dataset type, collection, and data 

323 # ID. We only include the required part of the data ID, as that's 

324 # sufficient and saves us from worrying about nulls in the constraint. 

325 constraint = ["dataset_type_id"] 

326 # Add foreign key fields to dataset table (part of the primary key) 

327 addDatasetForeignKey(tableSpec, dtype, primaryKey=True, onDelete="CASCADE") 

328 # Add foreign key fields to collection table (part of the primary key and 

329 # the data ID unique constraint). 

330 collectionFieldSpec = collections.addCollectionForeignKey(tableSpec, primaryKey=True, onDelete="CASCADE") 

331 constraint.append(collectionFieldSpec.name) 

332 # Add foreign key constraint to the collection_summary_dataset_type table. 

333 tableSpec.foreignKeys.append( 

334 ddl.ForeignKeySpec( 

335 "collection_summary_dataset_type", 

336 source=(collectionFieldSpec.name, "dataset_type_id"), 

337 target=(collectionFieldSpec.name, "dataset_type_id"), 

338 ) 

339 ) 

340 for dimension in datasetType.dimensions.required: 

341 fieldSpec = addDimensionForeignKey(tableSpec, dimension=dimension, nullable=False, primaryKey=False) 

342 constraint.append(fieldSpec.name) 

343 # If this is a governor dimension, add a foreign key constraint to the 

344 # collection_summary_<dimension> table. 

345 if isinstance(dimension, GovernorDimension): 

346 tableSpec.foreignKeys.append( 

347 ddl.ForeignKeySpec( 

348 f"collection_summary_{dimension.name}", 

349 source=(collectionFieldSpec.name, fieldSpec.name), 

350 target=(collectionFieldSpec.name, fieldSpec.name), 

351 ) 

352 ) 

353 # Actually add the unique constraint. 

354 tableSpec.unique.add(tuple(constraint)) 

355 return tableSpec 

356 

357 

358def makeCalibTableSpec(datasetType: DatasetType, collections: Type[CollectionManager], 

359 TimespanReprClass: Type[TimespanDatabaseRepresentation], 

360 dtype: type) -> ddl.TableSpec: 

361 """Construct the specification for a dynamic (DatasetType-dependent) tag + 

362 validity range table used by the classes in this package. 

363 

364 Parameters 

365 ---------- 

366 datasetType : `DatasetType` 

367 Dataset type to construct a spec for. Multiple dataset types may 

368 share the same table. 

369 collections : `type` [ `CollectionManager` ] 

370 `CollectionManager` subclass that can be used to construct foreign keys 

371 to the run and/or collection tables. 

372 dtype: `type` 

373 Type of the FK column, same as the column type of the PK column of 

374 a referenced table (``dataset.id``). 

375 

376 Returns 

377 ------- 

378 spec : `ddl.TableSpec` 

379 Specification for the table. 

380 """ 

381 tableSpec = ddl.TableSpec( 

382 fields=[ 

383 # This table has no natural primary key, compound or otherwise, so 

384 # we add an autoincrement key. We may use this field a bit 

385 # internally, but its presence is an implementation detail and it 

386 # shouldn't appear as a foreign key in any other tables. 

387 ddl.FieldSpec("id", dtype=sqlalchemy.BigInteger, autoincrement=True, primaryKey=True), 

388 # Foreign key fields to dataset, collection, and usually dimension 

389 # tables added below. The dataset_type_id field here is redundant 

390 # with the one in the main monolithic dataset table, but this bit 

391 # of denormalization lets us define what should be a much more 

392 # useful index. 

393 ddl.FieldSpec("dataset_type_id", dtype=sqlalchemy.BigInteger, nullable=False), 

394 ], 

395 foreignKeys=[ 

396 ddl.ForeignKeySpec("dataset_type", source=("dataset_type_id",), target=("id",)), 

397 ] 

398 ) 

399 # Record fields that should go in the temporal lookup index/constraint, 

400 # starting with the dataset type. 

401 index: List[Union[str, Type[TimespanDatabaseRepresentation]]] = ["dataset_type_id"] 

402 # Add foreign key fields to dataset table (not part of the temporal 

403 # lookup/constraint). 

404 addDatasetForeignKey(tableSpec, dtype, nullable=False, onDelete="CASCADE") 

405 # Add foreign key fields to collection table (part of the temporal lookup 

406 # index/constraint). 

407 collectionFieldSpec = collections.addCollectionForeignKey(tableSpec, nullable=False, onDelete="CASCADE") 

408 index.append(collectionFieldSpec.name) 

409 # Add foreign key constraint to the collection_summary_dataset_type table. 

410 tableSpec.foreignKeys.append( 

411 ddl.ForeignKeySpec( 

412 "collection_summary_dataset_type", 

413 source=(collectionFieldSpec.name, "dataset_type_id"), 

414 target=(collectionFieldSpec.name, "dataset_type_id"), 

415 ) 

416 ) 

417 # Add dimension fields (part of the temporal lookup index.constraint). 

418 for dimension in datasetType.dimensions.required: 

419 fieldSpec = addDimensionForeignKey(tableSpec, dimension=dimension, nullable=False, primaryKey=False) 

420 index.append(fieldSpec.name) 

421 # If this is a governor dimension, add a foreign key constraint to the 

422 # collection_summary_<dimension> table. 

423 if isinstance(dimension, GovernorDimension): 

424 tableSpec.foreignKeys.append( 

425 ddl.ForeignKeySpec( 

426 f"collection_summary_{dimension.name}", 

427 source=(collectionFieldSpec.name, fieldSpec.name), 

428 target=(collectionFieldSpec.name, fieldSpec.name), 

429 ) 

430 ) 

431 # Add validity-range field(s) (part of the temporal lookup 

432 # index/constraint). 

433 tsFieldSpecs = TimespanReprClass.makeFieldSpecs(nullable=False) 

434 for fieldSpec in tsFieldSpecs: 

435 tableSpec.fields.add(fieldSpec) 

436 if TimespanReprClass.hasExclusionConstraint(): 436 ↛ 441line 436 didn't jump to line 441, because the condition on line 436 was never true

437 # This database's timespan representation can define a database-level 

438 # constraint that prevents overlapping validity ranges for entries with 

439 # the same DatasetType, collection, and data ID. 

440 # This also creates an index. 

441 index.append(TimespanReprClass) 

442 tableSpec.exclusion.add(tuple(index)) 

443 else: 

444 # No database-level constraint possible. We'll have to simulate that 

445 # in our DatasetRecordStorage.certify() implementation, and just create 

446 # a regular index here in the hope that helps with lookups. 

447 index.extend(fieldSpec.name for fieldSpec in tsFieldSpecs) 

448 tableSpec.indexes.add(tuple(index)) # type: ignore 

449 return tableSpec