Coverage for python/lsst/daf/butler/registry/datasets/byDimensions/tables.py: 96%

62 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2022-10-07 02:46 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ( 

25 "addDatasetForeignKey", 

26 "makeCalibTableName", 

27 "makeCalibTableSpec", 

28 "makeStaticTableSpecs", 

29 "makeTagTableName", 

30 "makeTagTableSpec", 

31 "StaticDatasetTablesTuple", 

32) 

33 

34from collections import namedtuple 

35from typing import Any 

36 

37import sqlalchemy 

38 

39from ....core import ( 

40 DatasetType, 

41 DimensionUniverse, 

42 GovernorDimension, 

43 TimespanDatabaseRepresentation, 

44 addDimensionForeignKey, 

45 ddl, 

46) 

47from ...interfaces import CollectionManager 

48 

49DATASET_TYPE_NAME_LENGTH = 128 

50 

51 

52StaticDatasetTablesTuple = namedtuple( 

53 "StaticDatasetTablesTuple", 

54 [ 

55 "dataset_type", 

56 "dataset", 

57 ], 

58) 

59 

60 

61def addDatasetForeignKey( 

62 tableSpec: ddl.TableSpec, 

63 dtype: type, 

64 *, 

65 name: str = "dataset", 

66 onDelete: str | None = None, 

67 constraint: bool = True, 

68 **kwargs: Any, 

69) -> ddl.FieldSpec: 

70 """Add a foreign key column for datasets and (optionally) a constraint to 

71 a table. 

72 

73 This is an internal interface for the ``byDimensions`` package; external 

74 code should use `DatasetRecordStorageManager.addDatasetForeignKey` instead. 

75 

76 Parameters 

77 ---------- 

78 tableSpec : `ddl.TableSpec` 

79 Specification for the table that should reference the dataset 

80 table. Will be modified in place. 

81 dtype: `type` 

82 Type of the column, same as the column type of the PK column of 

83 a referenced table (``dataset.id``). 

84 name: `str`, optional 

85 A name to use for the prefix of the new field; the full name is 

86 ``{name}_id``. 

87 onDelete: `str`, optional 

88 One of "CASCADE" or "SET NULL", indicating what should happen to 

89 the referencing row if the collection row is deleted. `None` 

90 indicates that this should be an integrity error. 

91 constraint: `bool`, optional 

92 If `False` (`True` is default), add a field that can be joined to 

93 the dataset primary key, but do not add a foreign key constraint. 

94 **kwargs 

95 Additional keyword arguments are forwarded to the `ddl.FieldSpec` 

96 constructor (only the ``name`` and ``dtype`` arguments are 

97 otherwise provided). 

98 

99 Returns 

100 ------- 

101 idSpec : `ddl.FieldSpec` 

102 Specification for the ID field. 

103 """ 

104 idFieldSpec = ddl.FieldSpec(f"{name}_id", dtype=dtype, **kwargs) 

105 tableSpec.fields.add(idFieldSpec) 

106 if constraint: 

107 tableSpec.foreignKeys.append( 

108 ddl.ForeignKeySpec("dataset", source=(idFieldSpec.name,), target=("id",), onDelete=onDelete) 

109 ) 

110 return idFieldSpec 

111 

112 

113def makeStaticTableSpecs( 

114 collections: type[CollectionManager], 

115 universe: DimensionUniverse, 

116 dtype: type, 

117 autoincrement: bool, 

118) -> StaticDatasetTablesTuple: 

119 """Construct all static tables used by the classes in this package. 

120 

121 Static tables are those that are present in all Registries and do not 

122 depend on what DatasetTypes have been registered. 

123 

124 Parameters 

125 ---------- 

126 collections: `CollectionManager` 

127 Manager object for the collections in this `Registry`. 

128 universe : `DimensionUniverse` 

129 Universe graph containing all dimensions known to this `Registry`. 

130 dtype: `type` 

131 Type of the dataset ID (primary key) column. 

132 autoincrement: `bool` 

133 If `True` then dataset ID column will be auto-incrementing. 

134 

135 Returns 

136 ------- 

137 specs : `StaticDatasetTablesTuple` 

138 A named tuple containing `ddl.TableSpec` instances. 

139 """ 

140 specs = StaticDatasetTablesTuple( 

141 dataset_type=ddl.TableSpec( 

142 fields=[ 

143 ddl.FieldSpec( 

144 name="id", 

145 dtype=sqlalchemy.BigInteger, 

146 autoincrement=True, 

147 primaryKey=True, 

148 doc=( 

149 "Autoincrement ID that uniquely identifies a dataset " 

150 "type in other tables. Python code outside the " 

151 "`Registry` class should never interact with this; " 

152 "its existence is considered an implementation detail." 

153 ), 

154 ), 

155 ddl.FieldSpec( 

156 name="name", 

157 dtype=sqlalchemy.String, 

158 length=DATASET_TYPE_NAME_LENGTH, 

159 nullable=False, 

160 doc="String name that uniquely identifies a dataset type.", 

161 ), 

162 ddl.FieldSpec( 

163 name="storage_class", 

164 dtype=sqlalchemy.String, 

165 length=64, 

166 nullable=False, 

167 doc=( 

168 "Name of the storage class associated with all " 

169 "datasets of this type. Storage classes are " 

170 "generally associated with a Python class, and are " 

171 "enumerated in butler configuration." 

172 ), 

173 ), 

174 ddl.FieldSpec( 

175 name="dimensions_key", 

176 dtype=sqlalchemy.BigInteger, 

177 nullable=False, 

178 doc="Unique key for the set of dimensions that identifies datasets of this type.", 

179 ), 

180 ddl.FieldSpec( 

181 name="tag_association_table", 

182 dtype=sqlalchemy.String, 

183 length=128, 

184 nullable=False, 

185 doc=( 

186 "Name of the table that holds associations between " 

187 "datasets of this type and most types of collections." 

188 ), 

189 ), 

190 ddl.FieldSpec( 

191 name="calibration_association_table", 

192 dtype=sqlalchemy.String, 

193 length=128, 

194 nullable=True, 

195 doc=( 

196 "Name of the table that holds associations between " 

197 "datasets of this type and CALIBRATION collections. " 

198 "NULL values indicate dataset types with " 

199 "isCalibration=False." 

200 ), 

201 ), 

202 ], 

203 unique=[("name",)], 

204 ), 

205 dataset=ddl.TableSpec( 

206 fields=[ 

207 ddl.FieldSpec( 

208 name="id", 

209 dtype=dtype, 

210 autoincrement=autoincrement, 

211 primaryKey=True, 

212 doc="A unique field used as the primary key for dataset.", 

213 ), 

214 ddl.FieldSpec( 

215 name="dataset_type_id", 

216 dtype=sqlalchemy.BigInteger, 

217 nullable=False, 

218 doc=("Reference to the associated entry in the dataset_type table."), 

219 ), 

220 ddl.FieldSpec( 

221 name="ingest_date", 

222 dtype=sqlalchemy.TIMESTAMP, 

223 default=sqlalchemy.sql.func.now(), 

224 nullable=False, 

225 doc="Time of dataset ingestion.", 

226 ), 

227 # Foreign key field/constraint to run added below. 

228 ], 

229 foreignKeys=[ 

230 ddl.ForeignKeySpec("dataset_type", source=("dataset_type_id",), target=("id",)), 

231 ], 

232 ), 

233 ) 

234 # Add foreign key fields programmatically. 

235 collections.addRunForeignKey(specs.dataset, onDelete="CASCADE", nullable=False) 

236 return specs 

237 

238 

239def makeTagTableName(datasetType: DatasetType, dimensionsKey: int) -> str: 

240 """Construct the name for a dynamic (DatasetType-dependent) tag table used 

241 by the classes in this package. 

242 

243 Parameters 

244 ---------- 

245 datasetType : `DatasetType` 

246 Dataset type to construct a name for. Multiple dataset types may 

247 share the same table. 

248 dimensionsKey : `int` 

249 Integer key used to save ``datasetType.dimensions`` to the database. 

250 

251 Returns 

252 ------- 

253 name : `str` 

254 Name for the table. 

255 """ 

256 return f"dataset_tags_{dimensionsKey:08d}" 

257 

258 

259def makeCalibTableName(datasetType: DatasetType, dimensionsKey: int) -> str: 

260 """Construct the name for a dynamic (DatasetType-dependent) tag + validity 

261 range table used by the classes in this package. 

262 

263 Parameters 

264 ---------- 

265 datasetType : `DatasetType` 

266 Dataset type to construct a name for. Multiple dataset types may 

267 share the same table. 

268 dimensionsKey : `int` 

269 Integer key used to save ``datasetType.dimensions`` to the database. 

270 

271 Returns 

272 ------- 

273 name : `str` 

274 Name for the table. 

275 """ 

276 assert datasetType.isCalibration() 

277 return f"dataset_calibs_{dimensionsKey:08d}" 

278 

279 

280def makeTagTableSpec( 

281 datasetType: DatasetType, collections: type[CollectionManager], dtype: type, *, constraints: bool = True 

282) -> ddl.TableSpec: 

283 """Construct the specification for a dynamic (DatasetType-dependent) tag 

284 table used by the classes in this package. 

285 

286 Parameters 

287 ---------- 

288 datasetType : `DatasetType` 

289 Dataset type to construct a spec for. Multiple dataset types may 

290 share the same table. 

291 collections : `type` [ `CollectionManager` ] 

292 `CollectionManager` subclass that can be used to construct foreign keys 

293 to the run and/or collection tables. 

294 dtype : `type` 

295 Type of the FK column, same as the column type of the PK column of 

296 a referenced table (``dataset.id``). 

297 constraints : `bool`, optional 

298 If `False` (`True` is default), do not define foreign key constraints. 

299 

300 Returns 

301 ------- 

302 spec : `ddl.TableSpec` 

303 Specification for the table. 

304 """ 

305 tableSpec = ddl.TableSpec( 

306 fields=[ 

307 # Foreign key fields to dataset, collection, and usually dimension 

308 # tables added below. 

309 # The dataset_type_id field here would be redundant with the one 

310 # in the main monolithic dataset table, but we need it here for an 

311 # important unique constraint. 

312 ddl.FieldSpec("dataset_type_id", dtype=sqlalchemy.BigInteger, nullable=False), 

313 ] 

314 ) 

315 if constraints: 

316 tableSpec.foreignKeys.append( 

317 ddl.ForeignKeySpec("dataset_type", source=("dataset_type_id",), target=("id",)) 

318 ) 

319 # We'll also have a unique constraint on dataset type, collection, and data 

320 # ID. We only include the required part of the data ID, as that's 

321 # sufficient and saves us from worrying about nulls in the constraint. 

322 constraint = ["dataset_type_id"] 

323 # Add foreign key fields to dataset table (part of the primary key) 

324 addDatasetForeignKey(tableSpec, dtype, primaryKey=True, onDelete="CASCADE", constraint=constraints) 

325 # Add foreign key fields to collection table (part of the primary key and 

326 # the data ID unique constraint). 

327 collectionFieldSpec = collections.addCollectionForeignKey( 

328 tableSpec, primaryKey=True, onDelete="CASCADE", constraint=constraints 

329 ) 

330 constraint.append(collectionFieldSpec.name) 

331 # Add foreign key constraint to the collection_summary_dataset_type table. 

332 if constraints: 

333 tableSpec.foreignKeys.append( 

334 ddl.ForeignKeySpec( 

335 "collection_summary_dataset_type", 

336 source=(collectionFieldSpec.name, "dataset_type_id"), 

337 target=(collectionFieldSpec.name, "dataset_type_id"), 

338 ) 

339 ) 

340 for dimension in datasetType.dimensions.required: 

341 fieldSpec = addDimensionForeignKey( 

342 tableSpec, dimension=dimension, nullable=False, primaryKey=False, constraint=constraints 

343 ) 

344 constraint.append(fieldSpec.name) 

345 # If this is a governor dimension, add a foreign key constraint to the 

346 # collection_summary_<dimension> table. 

347 if isinstance(dimension, GovernorDimension) and constraints: 

348 tableSpec.foreignKeys.append( 

349 ddl.ForeignKeySpec( 

350 f"collection_summary_{dimension.name}", 

351 source=(collectionFieldSpec.name, fieldSpec.name), 

352 target=(collectionFieldSpec.name, fieldSpec.name), 

353 ) 

354 ) 

355 # Actually add the unique constraint. 

356 tableSpec.unique.add(tuple(constraint)) 

357 return tableSpec 

358 

359 

360def makeCalibTableSpec( 

361 datasetType: DatasetType, 

362 collections: type[CollectionManager], 

363 TimespanReprClass: type[TimespanDatabaseRepresentation], 

364 dtype: type, 

365) -> ddl.TableSpec: 

366 """Construct the specification for a dynamic (DatasetType-dependent) tag + 

367 validity range table used by the classes in this package. 

368 

369 Parameters 

370 ---------- 

371 datasetType : `DatasetType` 

372 Dataset type to construct a spec for. Multiple dataset types may 

373 share the same table. 

374 collections : `type` [ `CollectionManager` ] 

375 `CollectionManager` subclass that can be used to construct foreign keys 

376 to the run and/or collection tables. 

377 dtype: `type` 

378 Type of the FK column, same as the column type of the PK column of 

379 a referenced table (``dataset.id``). 

380 

381 Returns 

382 ------- 

383 spec : `ddl.TableSpec` 

384 Specification for the table. 

385 """ 

386 tableSpec = ddl.TableSpec( 

387 fields=[ 

388 # This table has no natural primary key, compound or otherwise, so 

389 # we add an autoincrement key. We may use this field a bit 

390 # internally, but its presence is an implementation detail and it 

391 # shouldn't appear as a foreign key in any other tables. 

392 ddl.FieldSpec("id", dtype=sqlalchemy.BigInteger, autoincrement=True, primaryKey=True), 

393 # Foreign key fields to dataset, collection, and usually dimension 

394 # tables added below. The dataset_type_id field here is redundant 

395 # with the one in the main monolithic dataset table, but this bit 

396 # of denormalization lets us define what should be a much more 

397 # useful index. 

398 ddl.FieldSpec("dataset_type_id", dtype=sqlalchemy.BigInteger, nullable=False), 

399 ], 

400 foreignKeys=[ 

401 ddl.ForeignKeySpec("dataset_type", source=("dataset_type_id",), target=("id",)), 

402 ], 

403 ) 

404 # Record fields that should go in the temporal lookup index/constraint, 

405 # starting with the dataset type. 

406 index: list[str | type[TimespanDatabaseRepresentation]] = ["dataset_type_id"] 

407 # Add foreign key fields to dataset table (not part of the temporal 

408 # lookup/constraint). 

409 addDatasetForeignKey(tableSpec, dtype, nullable=False, onDelete="CASCADE") 

410 # Add foreign key fields to collection table (part of the temporal lookup 

411 # index/constraint). 

412 collectionFieldSpec = collections.addCollectionForeignKey(tableSpec, nullable=False, onDelete="CASCADE") 

413 index.append(collectionFieldSpec.name) 

414 # Add foreign key constraint to the collection_summary_dataset_type table. 

415 tableSpec.foreignKeys.append( 

416 ddl.ForeignKeySpec( 

417 "collection_summary_dataset_type", 

418 source=(collectionFieldSpec.name, "dataset_type_id"), 

419 target=(collectionFieldSpec.name, "dataset_type_id"), 

420 ) 

421 ) 

422 # Add dimension fields (part of the temporal lookup index.constraint). 

423 for dimension in datasetType.dimensions.required: 

424 fieldSpec = addDimensionForeignKey(tableSpec, dimension=dimension, nullable=False, primaryKey=False) 

425 index.append(fieldSpec.name) 

426 # If this is a governor dimension, add a foreign key constraint to the 

427 # collection_summary_<dimension> table. 

428 if isinstance(dimension, GovernorDimension): 

429 tableSpec.foreignKeys.append( 

430 ddl.ForeignKeySpec( 

431 f"collection_summary_{dimension.name}", 

432 source=(collectionFieldSpec.name, fieldSpec.name), 

433 target=(collectionFieldSpec.name, fieldSpec.name), 

434 ) 

435 ) 

436 # Add validity-range field(s) (part of the temporal lookup 

437 # index/constraint). 

438 tsFieldSpecs = TimespanReprClass.makeFieldSpecs(nullable=False) 

439 for fieldSpec in tsFieldSpecs: 

440 tableSpec.fields.add(fieldSpec) 

441 if TimespanReprClass.hasExclusionConstraint(): 441 ↛ 446line 441 didn't jump to line 446, because the condition on line 441 was never true

442 # This database's timespan representation can define a database-level 

443 # constraint that prevents overlapping validity ranges for entries with 

444 # the same DatasetType, collection, and data ID. 

445 # This also creates an index. 

446 index.append(TimespanReprClass) 

447 tableSpec.exclusion.add(tuple(index)) 

448 else: 

449 # No database-level constraint possible. We'll have to simulate that 

450 # in our DatasetRecordStorage.certify() implementation, and just create 

451 # a regular index here in the hope that helps with lookups. 

452 index.extend(fieldSpec.name for fieldSpec in tsFieldSpecs) 

453 tableSpec.indexes.add(tuple(index)) # type: ignore 

454 return tableSpec