Coverage for python/lsst/daf/butler/registry/datasets/byDimensions/tables.py: 96%

62 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2022-08-31 10:07 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ( 

25 "addDatasetForeignKey", 

26 "makeCalibTableName", 

27 "makeCalibTableSpec", 

28 "makeStaticTableSpecs", 

29 "makeTagTableName", 

30 "makeTagTableSpec", 

31 "StaticDatasetTablesTuple", 

32) 

33 

34from collections import namedtuple 

35from typing import Any, List, Optional, Type, Union 

36 

37import sqlalchemy 

38from lsst.daf.butler import ( 

39 DatasetType, 

40 DimensionUniverse, 

41 GovernorDimension, 

42 TimespanDatabaseRepresentation, 

43 addDimensionForeignKey, 

44 ddl, 

45) 

46from lsst.daf.butler.registry.interfaces import CollectionManager 

47 

48DATASET_TYPE_NAME_LENGTH = 128 

49 

50 

51StaticDatasetTablesTuple = namedtuple( 

52 "StaticDatasetTablesTuple", 

53 [ 

54 "dataset_type", 

55 "dataset", 

56 ], 

57) 

58 

59 

60def addDatasetForeignKey( 

61 tableSpec: ddl.TableSpec, 

62 dtype: type, 

63 *, 

64 name: str = "dataset", 

65 onDelete: Optional[str] = None, 

66 constraint: bool = True, 

67 **kwargs: Any, 

68) -> ddl.FieldSpec: 

69 """Add a foreign key column for datasets and (optionally) a constraint to 

70 a table. 

71 

72 This is an internal interface for the ``byDimensions`` package; external 

73 code should use `DatasetRecordStorageManager.addDatasetForeignKey` instead. 

74 

75 Parameters 

76 ---------- 

77 tableSpec : `ddl.TableSpec` 

78 Specification for the table that should reference the dataset 

79 table. Will be modified in place. 

80 dtype: `type` 

81 Type of the column, same as the column type of the PK column of 

82 a referenced table (``dataset.id``). 

83 name: `str`, optional 

84 A name to use for the prefix of the new field; the full name is 

85 ``{name}_id``. 

86 onDelete: `str`, optional 

87 One of "CASCADE" or "SET NULL", indicating what should happen to 

88 the referencing row if the collection row is deleted. `None` 

89 indicates that this should be an integrity error. 

90 constraint: `bool`, optional 

91 If `False` (`True` is default), add a field that can be joined to 

92 the dataset primary key, but do not add a foreign key constraint. 

93 **kwargs 

94 Additional keyword arguments are forwarded to the `ddl.FieldSpec` 

95 constructor (only the ``name`` and ``dtype`` arguments are 

96 otherwise provided). 

97 

98 Returns 

99 ------- 

100 idSpec : `ddl.FieldSpec` 

101 Specification for the ID field. 

102 """ 

103 idFieldSpec = ddl.FieldSpec(f"{name}_id", dtype=dtype, **kwargs) 

104 tableSpec.fields.add(idFieldSpec) 

105 if constraint: 

106 tableSpec.foreignKeys.append( 

107 ddl.ForeignKeySpec("dataset", source=(idFieldSpec.name,), target=("id",), onDelete=onDelete) 

108 ) 

109 return idFieldSpec 

110 

111 

112def makeStaticTableSpecs( 

113 collections: Type[CollectionManager], 

114 universe: DimensionUniverse, 

115 dtype: type, 

116 autoincrement: bool, 

117) -> StaticDatasetTablesTuple: 

118 """Construct all static tables used by the classes in this package. 

119 

120 Static tables are those that are present in all Registries and do not 

121 depend on what DatasetTypes have been registered. 

122 

123 Parameters 

124 ---------- 

125 collections: `CollectionManager` 

126 Manager object for the collections in this `Registry`. 

127 universe : `DimensionUniverse` 

128 Universe graph containing all dimensions known to this `Registry`. 

129 dtype: `type` 

130 Type of the dataset ID (primary key) column. 

131 autoincrement: `bool` 

132 If `True` then dataset ID column will be auto-incrementing. 

133 

134 Returns 

135 ------- 

136 specs : `StaticDatasetTablesTuple` 

137 A named tuple containing `ddl.TableSpec` instances. 

138 """ 

139 specs = StaticDatasetTablesTuple( 

140 dataset_type=ddl.TableSpec( 

141 fields=[ 

142 ddl.FieldSpec( 

143 name="id", 

144 dtype=sqlalchemy.BigInteger, 

145 autoincrement=True, 

146 primaryKey=True, 

147 doc=( 

148 "Autoincrement ID that uniquely identifies a dataset " 

149 "type in other tables. Python code outside the " 

150 "`Registry` class should never interact with this; " 

151 "its existence is considered an implementation detail." 

152 ), 

153 ), 

154 ddl.FieldSpec( 

155 name="name", 

156 dtype=sqlalchemy.String, 

157 length=DATASET_TYPE_NAME_LENGTH, 

158 nullable=False, 

159 doc="String name that uniquely identifies a dataset type.", 

160 ), 

161 ddl.FieldSpec( 

162 name="storage_class", 

163 dtype=sqlalchemy.String, 

164 length=64, 

165 nullable=False, 

166 doc=( 

167 "Name of the storage class associated with all " 

168 "datasets of this type. Storage classes are " 

169 "generally associated with a Python class, and are " 

170 "enumerated in butler configuration." 

171 ), 

172 ), 

173 ddl.FieldSpec( 

174 name="dimensions_key", 

175 dtype=sqlalchemy.BigInteger, 

176 nullable=False, 

177 doc="Unique key for the set of dimensions that identifies datasets of this type.", 

178 ), 

179 ddl.FieldSpec( 

180 name="tag_association_table", 

181 dtype=sqlalchemy.String, 

182 length=128, 

183 nullable=False, 

184 doc=( 

185 "Name of the table that holds associations between " 

186 "datasets of this type and most types of collections." 

187 ), 

188 ), 

189 ddl.FieldSpec( 

190 name="calibration_association_table", 

191 dtype=sqlalchemy.String, 

192 length=128, 

193 nullable=True, 

194 doc=( 

195 "Name of the table that holds associations between " 

196 "datasets of this type and CALIBRATION collections. " 

197 "NULL values indicate dataset types with " 

198 "isCalibration=False." 

199 ), 

200 ), 

201 ], 

202 unique=[("name",)], 

203 ), 

204 dataset=ddl.TableSpec( 

205 fields=[ 

206 ddl.FieldSpec( 

207 name="id", 

208 dtype=dtype, 

209 autoincrement=autoincrement, 

210 primaryKey=True, 

211 doc="A unique field used as the primary key for dataset.", 

212 ), 

213 ddl.FieldSpec( 

214 name="dataset_type_id", 

215 dtype=sqlalchemy.BigInteger, 

216 nullable=False, 

217 doc=("Reference to the associated entry in the dataset_type table."), 

218 ), 

219 ddl.FieldSpec( 

220 name="ingest_date", 

221 dtype=sqlalchemy.TIMESTAMP, 

222 default=sqlalchemy.sql.func.now(), 

223 nullable=False, 

224 doc="Time of dataset ingestion.", 

225 ), 

226 # Foreign key field/constraint to run added below. 

227 ], 

228 foreignKeys=[ 

229 ddl.ForeignKeySpec("dataset_type", source=("dataset_type_id",), target=("id",)), 

230 ], 

231 ), 

232 ) 

233 # Add foreign key fields programmatically. 

234 collections.addRunForeignKey(specs.dataset, onDelete="CASCADE", nullable=False) 

235 return specs 

236 

237 

238def makeTagTableName(datasetType: DatasetType, dimensionsKey: int) -> str: 

239 """Construct the name for a dynamic (DatasetType-dependent) tag table used 

240 by the classes in this package. 

241 

242 Parameters 

243 ---------- 

244 datasetType : `DatasetType` 

245 Dataset type to construct a name for. Multiple dataset types may 

246 share the same table. 

247 dimensionsKey : `int` 

248 Integer key used to save ``datasetType.dimensions`` to the database. 

249 

250 Returns 

251 ------- 

252 name : `str` 

253 Name for the table. 

254 """ 

255 return f"dataset_tags_{dimensionsKey:08d}" 

256 

257 

258def makeCalibTableName(datasetType: DatasetType, dimensionsKey: int) -> str: 

259 """Construct the name for a dynamic (DatasetType-dependent) tag + validity 

260 range table used by the classes in this package. 

261 

262 Parameters 

263 ---------- 

264 datasetType : `DatasetType` 

265 Dataset type to construct a name for. Multiple dataset types may 

266 share the same table. 

267 dimensionsKey : `int` 

268 Integer key used to save ``datasetType.dimensions`` to the database. 

269 

270 Returns 

271 ------- 

272 name : `str` 

273 Name for the table. 

274 """ 

275 assert datasetType.isCalibration() 

276 return f"dataset_calibs_{dimensionsKey:08d}" 

277 

278 

279def makeTagTableSpec( 

280 datasetType: DatasetType, collections: Type[CollectionManager], dtype: type, *, constraints: bool = True 

281) -> ddl.TableSpec: 

282 """Construct the specification for a dynamic (DatasetType-dependent) tag 

283 table used by the classes in this package. 

284 

285 Parameters 

286 ---------- 

287 datasetType : `DatasetType` 

288 Dataset type to construct a spec for. Multiple dataset types may 

289 share the same table. 

290 collections : `type` [ `CollectionManager` ] 

291 `CollectionManager` subclass that can be used to construct foreign keys 

292 to the run and/or collection tables. 

293 dtype : `type` 

294 Type of the FK column, same as the column type of the PK column of 

295 a referenced table (``dataset.id``). 

296 constraints : `bool`, optional 

297 If `False` (`True` is default), do not define foreign key constraints. 

298 

299 Returns 

300 ------- 

301 spec : `ddl.TableSpec` 

302 Specification for the table. 

303 """ 

304 tableSpec = ddl.TableSpec( 

305 fields=[ 

306 # Foreign key fields to dataset, collection, and usually dimension 

307 # tables added below. 

308 # The dataset_type_id field here would be redundant with the one 

309 # in the main monolithic dataset table, but we need it here for an 

310 # important unique constraint. 

311 ddl.FieldSpec("dataset_type_id", dtype=sqlalchemy.BigInteger, nullable=False), 

312 ] 

313 ) 

314 if constraints: 

315 tableSpec.foreignKeys.append( 

316 ddl.ForeignKeySpec("dataset_type", source=("dataset_type_id",), target=("id",)) 

317 ) 

318 # We'll also have a unique constraint on dataset type, collection, and data 

319 # ID. We only include the required part of the data ID, as that's 

320 # sufficient and saves us from worrying about nulls in the constraint. 

321 constraint = ["dataset_type_id"] 

322 # Add foreign key fields to dataset table (part of the primary key) 

323 addDatasetForeignKey(tableSpec, dtype, primaryKey=True, onDelete="CASCADE", constraint=constraints) 

324 # Add foreign key fields to collection table (part of the primary key and 

325 # the data ID unique constraint). 

326 collectionFieldSpec = collections.addCollectionForeignKey( 

327 tableSpec, primaryKey=True, onDelete="CASCADE", constraint=constraints 

328 ) 

329 constraint.append(collectionFieldSpec.name) 

330 # Add foreign key constraint to the collection_summary_dataset_type table. 

331 if constraints: 

332 tableSpec.foreignKeys.append( 

333 ddl.ForeignKeySpec( 

334 "collection_summary_dataset_type", 

335 source=(collectionFieldSpec.name, "dataset_type_id"), 

336 target=(collectionFieldSpec.name, "dataset_type_id"), 

337 ) 

338 ) 

339 for dimension in datasetType.dimensions.required: 

340 fieldSpec = addDimensionForeignKey( 

341 tableSpec, dimension=dimension, nullable=False, primaryKey=False, constraint=constraints 

342 ) 

343 constraint.append(fieldSpec.name) 

344 # If this is a governor dimension, add a foreign key constraint to the 

345 # collection_summary_<dimension> table. 

346 if isinstance(dimension, GovernorDimension) and constraints: 

347 tableSpec.foreignKeys.append( 

348 ddl.ForeignKeySpec( 

349 f"collection_summary_{dimension.name}", 

350 source=(collectionFieldSpec.name, fieldSpec.name), 

351 target=(collectionFieldSpec.name, fieldSpec.name), 

352 ) 

353 ) 

354 # Actually add the unique constraint. 

355 tableSpec.unique.add(tuple(constraint)) 

356 return tableSpec 

357 

358 

359def makeCalibTableSpec( 

360 datasetType: DatasetType, 

361 collections: Type[CollectionManager], 

362 TimespanReprClass: Type[TimespanDatabaseRepresentation], 

363 dtype: type, 

364) -> ddl.TableSpec: 

365 """Construct the specification for a dynamic (DatasetType-dependent) tag + 

366 validity range table used by the classes in this package. 

367 

368 Parameters 

369 ---------- 

370 datasetType : `DatasetType` 

371 Dataset type to construct a spec for. Multiple dataset types may 

372 share the same table. 

373 collections : `type` [ `CollectionManager` ] 

374 `CollectionManager` subclass that can be used to construct foreign keys 

375 to the run and/or collection tables. 

376 dtype: `type` 

377 Type of the FK column, same as the column type of the PK column of 

378 a referenced table (``dataset.id``). 

379 

380 Returns 

381 ------- 

382 spec : `ddl.TableSpec` 

383 Specification for the table. 

384 """ 

385 tableSpec = ddl.TableSpec( 

386 fields=[ 

387 # This table has no natural primary key, compound or otherwise, so 

388 # we add an autoincrement key. We may use this field a bit 

389 # internally, but its presence is an implementation detail and it 

390 # shouldn't appear as a foreign key in any other tables. 

391 ddl.FieldSpec("id", dtype=sqlalchemy.BigInteger, autoincrement=True, primaryKey=True), 

392 # Foreign key fields to dataset, collection, and usually dimension 

393 # tables added below. The dataset_type_id field here is redundant 

394 # with the one in the main monolithic dataset table, but this bit 

395 # of denormalization lets us define what should be a much more 

396 # useful index. 

397 ddl.FieldSpec("dataset_type_id", dtype=sqlalchemy.BigInteger, nullable=False), 

398 ], 

399 foreignKeys=[ 

400 ddl.ForeignKeySpec("dataset_type", source=("dataset_type_id",), target=("id",)), 

401 ], 

402 ) 

403 # Record fields that should go in the temporal lookup index/constraint, 

404 # starting with the dataset type. 

405 index: List[Union[str, Type[TimespanDatabaseRepresentation]]] = ["dataset_type_id"] 

406 # Add foreign key fields to dataset table (not part of the temporal 

407 # lookup/constraint). 

408 addDatasetForeignKey(tableSpec, dtype, nullable=False, onDelete="CASCADE") 

409 # Add foreign key fields to collection table (part of the temporal lookup 

410 # index/constraint). 

411 collectionFieldSpec = collections.addCollectionForeignKey(tableSpec, nullable=False, onDelete="CASCADE") 

412 index.append(collectionFieldSpec.name) 

413 # Add foreign key constraint to the collection_summary_dataset_type table. 

414 tableSpec.foreignKeys.append( 

415 ddl.ForeignKeySpec( 

416 "collection_summary_dataset_type", 

417 source=(collectionFieldSpec.name, "dataset_type_id"), 

418 target=(collectionFieldSpec.name, "dataset_type_id"), 

419 ) 

420 ) 

421 # Add dimension fields (part of the temporal lookup index.constraint). 

422 for dimension in datasetType.dimensions.required: 

423 fieldSpec = addDimensionForeignKey(tableSpec, dimension=dimension, nullable=False, primaryKey=False) 

424 index.append(fieldSpec.name) 

425 # If this is a governor dimension, add a foreign key constraint to the 

426 # collection_summary_<dimension> table. 

427 if isinstance(dimension, GovernorDimension): 

428 tableSpec.foreignKeys.append( 

429 ddl.ForeignKeySpec( 

430 f"collection_summary_{dimension.name}", 

431 source=(collectionFieldSpec.name, fieldSpec.name), 

432 target=(collectionFieldSpec.name, fieldSpec.name), 

433 ) 

434 ) 

435 # Add validity-range field(s) (part of the temporal lookup 

436 # index/constraint). 

437 tsFieldSpecs = TimespanReprClass.makeFieldSpecs(nullable=False) 

438 for fieldSpec in tsFieldSpecs: 

439 tableSpec.fields.add(fieldSpec) 

440 if TimespanReprClass.hasExclusionConstraint(): 440 ↛ 445line 440 didn't jump to line 445, because the condition on line 440 was never true

441 # This database's timespan representation can define a database-level 

442 # constraint that prevents overlapping validity ranges for entries with 

443 # the same DatasetType, collection, and data ID. 

444 # This also creates an index. 

445 index.append(TimespanReprClass) 

446 tableSpec.exclusion.add(tuple(index)) 

447 else: 

448 # No database-level constraint possible. We'll have to simulate that 

449 # in our DatasetRecordStorage.certify() implementation, and just create 

450 # a regular index here in the hope that helps with lookups. 

451 index.extend(fieldSpec.name for fieldSpec in tsFieldSpecs) 

452 tableSpec.indexes.add(tuple(index)) # type: ignore 

453 return tableSpec