Coverage for python/lsst/daf/butler/registry/datasets/byDimensions/tables.py: 97%

67 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-23 09:29 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ( 

25 "addDatasetForeignKey", 

26 "makeCalibTableName", 

27 "makeCalibTableSpec", 

28 "makeStaticTableSpecs", 

29 "makeTagTableName", 

30 "makeTagTableSpec", 

31 "StaticDatasetTablesTuple", 

32) 

33 

34from collections import namedtuple 

35from typing import Any 

36 

37import sqlalchemy 

38 

39from ....core import ( 

40 DatasetType, 

41 DimensionUniverse, 

42 GovernorDimension, 

43 TimespanDatabaseRepresentation, 

44 addDimensionForeignKey, 

45 ddl, 

46) 

47from ...interfaces import CollectionManager, VersionTuple 

48 

49DATASET_TYPE_NAME_LENGTH = 128 

50 

51 

52StaticDatasetTablesTuple = namedtuple( 

53 "StaticDatasetTablesTuple", 

54 [ 

55 "dataset_type", 

56 "dataset", 

57 ], 

58) 

59 

60 

61def addDatasetForeignKey( 

62 tableSpec: ddl.TableSpec, 

63 dtype: type, 

64 *, 

65 name: str = "dataset", 

66 onDelete: str | None = None, 

67 constraint: bool = True, 

68 **kwargs: Any, 

69) -> ddl.FieldSpec: 

70 """Add a foreign key column for datasets and (optionally) a constraint to 

71 a table. 

72 

73 This is an internal interface for the ``byDimensions`` package; external 

74 code should use `DatasetRecordStorageManager.addDatasetForeignKey` instead. 

75 

76 Parameters 

77 ---------- 

78 tableSpec : `ddl.TableSpec` 

79 Specification for the table that should reference the dataset 

80 table. Will be modified in place. 

81 dtype: `type` 

82 Type of the column, same as the column type of the PK column of 

83 a referenced table (``dataset.id``). 

84 name: `str`, optional 

85 A name to use for the prefix of the new field; the full name is 

86 ``{name}_id``. 

87 onDelete: `str`, optional 

88 One of "CASCADE" or "SET NULL", indicating what should happen to 

89 the referencing row if the collection row is deleted. `None` 

90 indicates that this should be an integrity error. 

91 constraint: `bool`, optional 

92 If `False` (`True` is default), add a field that can be joined to 

93 the dataset primary key, but do not add a foreign key constraint. 

94 **kwargs 

95 Additional keyword arguments are forwarded to the `ddl.FieldSpec` 

96 constructor (only the ``name`` and ``dtype`` arguments are 

97 otherwise provided). 

98 

99 Returns 

100 ------- 

101 idSpec : `ddl.FieldSpec` 

102 Specification for the ID field. 

103 """ 

104 idFieldSpec = ddl.FieldSpec(f"{name}_id", dtype=dtype, **kwargs) 

105 tableSpec.fields.add(idFieldSpec) 

106 if constraint: 

107 tableSpec.foreignKeys.append( 

108 ddl.ForeignKeySpec("dataset", source=(idFieldSpec.name,), target=("id",), onDelete=onDelete) 

109 ) 

110 return idFieldSpec 

111 

112 

113def makeStaticTableSpecs( 

114 collections: type[CollectionManager], 

115 universe: DimensionUniverse, 

116 dtype: type, 

117 autoincrement: bool, 

118 schema_version: VersionTuple, 

119) -> StaticDatasetTablesTuple: 

120 """Construct all static tables used by the classes in this package. 

121 

122 Static tables are those that are present in all Registries and do not 

123 depend on what DatasetTypes have been registered. 

124 

125 Parameters 

126 ---------- 

127 collections: `CollectionManager` 

128 Manager object for the collections in this `Registry`. 

129 universe : `DimensionUniverse` 

130 Universe graph containing all dimensions known to this `Registry`. 

131 dtype: `type` 

132 Type of the dataset ID (primary key) column. 

133 autoincrement: `bool` 

134 If `True` then dataset ID column will be auto-incrementing. 

135 

136 Returns 

137 ------- 

138 specs : `StaticDatasetTablesTuple` 

139 A named tuple containing `ddl.TableSpec` instances. 

140 """ 

141 ingest_date_type: type 

142 ingest_date_default: Any = None 

143 if schema_version.major > 1: 

144 ingest_date_type = ddl.AstropyTimeNsecTai 

145 else: 

146 ingest_date_type = sqlalchemy.TIMESTAMP 

147 # New code provides explicit values for ingest_data, but we keep 

148 # default just to be consistent with the existing schema. 

149 ingest_date_default = sqlalchemy.sql.func.now() 

150 

151 specs = StaticDatasetTablesTuple( 

152 dataset_type=ddl.TableSpec( 

153 fields=[ 

154 ddl.FieldSpec( 

155 name="id", 

156 dtype=sqlalchemy.BigInteger, 

157 autoincrement=True, 

158 primaryKey=True, 

159 doc=( 

160 "Autoincrement ID that uniquely identifies a dataset " 

161 "type in other tables. Python code outside the " 

162 "`Registry` class should never interact with this; " 

163 "its existence is considered an implementation detail." 

164 ), 

165 ), 

166 ddl.FieldSpec( 

167 name="name", 

168 dtype=sqlalchemy.String, 

169 length=DATASET_TYPE_NAME_LENGTH, 

170 nullable=False, 

171 doc="String name that uniquely identifies a dataset type.", 

172 ), 

173 ddl.FieldSpec( 

174 name="storage_class", 

175 dtype=sqlalchemy.String, 

176 length=64, 

177 nullable=False, 

178 doc=( 

179 "Name of the storage class associated with all " 

180 "datasets of this type. Storage classes are " 

181 "generally associated with a Python class, and are " 

182 "enumerated in butler configuration." 

183 ), 

184 ), 

185 ddl.FieldSpec( 

186 name="dimensions_key", 

187 dtype=sqlalchemy.BigInteger, 

188 nullable=False, 

189 doc="Unique key for the set of dimensions that identifies datasets of this type.", 

190 ), 

191 ddl.FieldSpec( 

192 name="tag_association_table", 

193 dtype=sqlalchemy.String, 

194 length=128, 

195 nullable=False, 

196 doc=( 

197 "Name of the table that holds associations between " 

198 "datasets of this type and most types of collections." 

199 ), 

200 ), 

201 ddl.FieldSpec( 

202 name="calibration_association_table", 

203 dtype=sqlalchemy.String, 

204 length=128, 

205 nullable=True, 

206 doc=( 

207 "Name of the table that holds associations between " 

208 "datasets of this type and CALIBRATION collections. " 

209 "NULL values indicate dataset types with " 

210 "isCalibration=False." 

211 ), 

212 ), 

213 ], 

214 unique=[("name",)], 

215 ), 

216 dataset=ddl.TableSpec( 

217 fields=[ 

218 ddl.FieldSpec( 

219 name="id", 

220 dtype=dtype, 

221 autoincrement=autoincrement, 

222 primaryKey=True, 

223 doc="A unique field used as the primary key for dataset.", 

224 ), 

225 ddl.FieldSpec( 

226 name="dataset_type_id", 

227 dtype=sqlalchemy.BigInteger, 

228 nullable=False, 

229 doc="Reference to the associated entry in the dataset_type table.", 

230 ), 

231 ddl.FieldSpec( 

232 name="ingest_date", 

233 dtype=ingest_date_type, 

234 default=ingest_date_default, 

235 nullable=False, 

236 doc="Time of dataset ingestion.", 

237 ), 

238 # Foreign key field/constraint to run added below. 

239 ], 

240 foreignKeys=[ 

241 ddl.ForeignKeySpec("dataset_type", source=("dataset_type_id",), target=("id",)), 

242 ], 

243 ), 

244 ) 

245 # Add foreign key fields programmatically. 

246 collections.addRunForeignKey(specs.dataset, onDelete="CASCADE", nullable=False) 

247 return specs 

248 

249 

250def makeTagTableName(datasetType: DatasetType, dimensionsKey: int) -> str: 

251 """Construct the name for a dynamic (DatasetType-dependent) tag table used 

252 by the classes in this package. 

253 

254 Parameters 

255 ---------- 

256 datasetType : `DatasetType` 

257 Dataset type to construct a name for. Multiple dataset types may 

258 share the same table. 

259 dimensionsKey : `int` 

260 Integer key used to save ``datasetType.dimensions`` to the database. 

261 

262 Returns 

263 ------- 

264 name : `str` 

265 Name for the table. 

266 """ 

267 return f"dataset_tags_{dimensionsKey:08d}" 

268 

269 

270def makeCalibTableName(datasetType: DatasetType, dimensionsKey: int) -> str: 

271 """Construct the name for a dynamic (DatasetType-dependent) tag + validity 

272 range table used by the classes in this package. 

273 

274 Parameters 

275 ---------- 

276 datasetType : `DatasetType` 

277 Dataset type to construct a name for. Multiple dataset types may 

278 share the same table. 

279 dimensionsKey : `int` 

280 Integer key used to save ``datasetType.dimensions`` to the database. 

281 

282 Returns 

283 ------- 

284 name : `str` 

285 Name for the table. 

286 """ 

287 assert datasetType.isCalibration() 

288 return f"dataset_calibs_{dimensionsKey:08d}" 

289 

290 

291def makeTagTableSpec( 

292 datasetType: DatasetType, collections: type[CollectionManager], dtype: type, *, constraints: bool = True 

293) -> ddl.TableSpec: 

294 """Construct the specification for a dynamic (DatasetType-dependent) tag 

295 table used by the classes in this package. 

296 

297 Parameters 

298 ---------- 

299 datasetType : `DatasetType` 

300 Dataset type to construct a spec for. Multiple dataset types may 

301 share the same table. 

302 collections : `type` [ `CollectionManager` ] 

303 `CollectionManager` subclass that can be used to construct foreign keys 

304 to the run and/or collection tables. 

305 dtype : `type` 

306 Type of the FK column, same as the column type of the PK column of 

307 a referenced table (``dataset.id``). 

308 constraints : `bool`, optional 

309 If `False` (`True` is default), do not define foreign key constraints. 

310 

311 Returns 

312 ------- 

313 spec : `ddl.TableSpec` 

314 Specification for the table. 

315 """ 

316 tableSpec = ddl.TableSpec( 

317 fields=[ 

318 # Foreign key fields to dataset, collection, and usually dimension 

319 # tables added below. 

320 # The dataset_type_id field here would be redundant with the one 

321 # in the main monolithic dataset table, but we need it here for an 

322 # important unique constraint. 

323 ddl.FieldSpec("dataset_type_id", dtype=sqlalchemy.BigInteger, nullable=False), 

324 ] 

325 ) 

326 if constraints: 

327 tableSpec.foreignKeys.append( 

328 ddl.ForeignKeySpec("dataset_type", source=("dataset_type_id",), target=("id",)) 

329 ) 

330 # We'll also have a unique constraint on dataset type, collection, and data 

331 # ID. We only include the required part of the data ID, as that's 

332 # sufficient and saves us from worrying about nulls in the constraint. 

333 constraint = ["dataset_type_id"] 

334 # Add foreign key fields to dataset table (part of the primary key) 

335 addDatasetForeignKey(tableSpec, dtype, primaryKey=True, onDelete="CASCADE", constraint=constraints) 

336 # Add foreign key fields to collection table (part of the primary key and 

337 # the data ID unique constraint). 

338 collectionFieldSpec = collections.addCollectionForeignKey( 

339 tableSpec, primaryKey=True, onDelete="CASCADE", constraint=constraints 

340 ) 

341 constraint.append(collectionFieldSpec.name) 

342 # Add foreign key constraint to the collection_summary_dataset_type table. 

343 if constraints: 

344 tableSpec.foreignKeys.append( 

345 ddl.ForeignKeySpec( 

346 "collection_summary_dataset_type", 

347 source=(collectionFieldSpec.name, "dataset_type_id"), 

348 target=(collectionFieldSpec.name, "dataset_type_id"), 

349 ) 

350 ) 

351 for dimension in datasetType.dimensions.required: 

352 fieldSpec = addDimensionForeignKey( 

353 tableSpec, dimension=dimension, nullable=False, primaryKey=False, constraint=constraints 

354 ) 

355 constraint.append(fieldSpec.name) 

356 # If this is a governor dimension, add a foreign key constraint to the 

357 # collection_summary_<dimension> table. 

358 if isinstance(dimension, GovernorDimension) and constraints: 

359 tableSpec.foreignKeys.append( 

360 ddl.ForeignKeySpec( 

361 f"collection_summary_{dimension.name}", 

362 source=(collectionFieldSpec.name, fieldSpec.name), 

363 target=(collectionFieldSpec.name, fieldSpec.name), 

364 ) 

365 ) 

366 # Actually add the unique constraint. 

367 tableSpec.unique.add(tuple(constraint)) 

368 return tableSpec 

369 

370 

371def makeCalibTableSpec( 

372 datasetType: DatasetType, 

373 collections: type[CollectionManager], 

374 TimespanReprClass: type[TimespanDatabaseRepresentation], 

375 dtype: type, 

376) -> ddl.TableSpec: 

377 """Construct the specification for a dynamic (DatasetType-dependent) tag + 

378 validity range table used by the classes in this package. 

379 

380 Parameters 

381 ---------- 

382 datasetType : `DatasetType` 

383 Dataset type to construct a spec for. Multiple dataset types may 

384 share the same table. 

385 collections : `type` [ `CollectionManager` ] 

386 `CollectionManager` subclass that can be used to construct foreign keys 

387 to the run and/or collection tables. 

388 dtype: `type` 

389 Type of the FK column, same as the column type of the PK column of 

390 a referenced table (``dataset.id``). 

391 

392 Returns 

393 ------- 

394 spec : `ddl.TableSpec` 

395 Specification for the table. 

396 """ 

397 tableSpec = ddl.TableSpec( 

398 fields=[ 

399 # This table has no natural primary key, compound or otherwise, so 

400 # we add an autoincrement key. We may use this field a bit 

401 # internally, but its presence is an implementation detail and it 

402 # shouldn't appear as a foreign key in any other tables. 

403 ddl.FieldSpec("id", dtype=sqlalchemy.BigInteger, autoincrement=True, primaryKey=True), 

404 # Foreign key fields to dataset, collection, and usually dimension 

405 # tables added below. The dataset_type_id field here is redundant 

406 # with the one in the main monolithic dataset table, but this bit 

407 # of denormalization lets us define what should be a much more 

408 # useful index. 

409 ddl.FieldSpec("dataset_type_id", dtype=sqlalchemy.BigInteger, nullable=False), 

410 ], 

411 foreignKeys=[ 

412 ddl.ForeignKeySpec("dataset_type", source=("dataset_type_id",), target=("id",)), 

413 ], 

414 ) 

415 # Record fields that should go in the temporal lookup index/constraint, 

416 # starting with the dataset type. 

417 index: list[str | type[TimespanDatabaseRepresentation]] = ["dataset_type_id"] 

418 # Add foreign key fields to dataset table (not part of the temporal 

419 # lookup/constraint). 

420 addDatasetForeignKey(tableSpec, dtype, nullable=False, onDelete="CASCADE") 

421 # Add foreign key fields to collection table (part of the temporal lookup 

422 # index/constraint). 

423 collectionFieldSpec = collections.addCollectionForeignKey(tableSpec, nullable=False, onDelete="CASCADE") 

424 index.append(collectionFieldSpec.name) 

425 # Add foreign key constraint to the collection_summary_dataset_type table. 

426 tableSpec.foreignKeys.append( 

427 ddl.ForeignKeySpec( 

428 "collection_summary_dataset_type", 

429 source=(collectionFieldSpec.name, "dataset_type_id"), 

430 target=(collectionFieldSpec.name, "dataset_type_id"), 

431 ) 

432 ) 

433 # Add dimension fields (part of the temporal lookup index.constraint). 

434 for dimension in datasetType.dimensions.required: 

435 fieldSpec = addDimensionForeignKey(tableSpec, dimension=dimension, nullable=False, primaryKey=False) 

436 index.append(fieldSpec.name) 

437 # If this is a governor dimension, add a foreign key constraint to the 

438 # collection_summary_<dimension> table. 

439 if isinstance(dimension, GovernorDimension): 

440 tableSpec.foreignKeys.append( 

441 ddl.ForeignKeySpec( 

442 f"collection_summary_{dimension.name}", 

443 source=(collectionFieldSpec.name, fieldSpec.name), 

444 target=(collectionFieldSpec.name, fieldSpec.name), 

445 ) 

446 ) 

447 # Add validity-range field(s) (part of the temporal lookup 

448 # index/constraint). 

449 tsFieldSpecs = TimespanReprClass.makeFieldSpecs(nullable=False) 

450 for fieldSpec in tsFieldSpecs: 

451 tableSpec.fields.add(fieldSpec) 

452 if TimespanReprClass.hasExclusionConstraint(): 452 ↛ 457line 452 didn't jump to line 457, because the condition on line 452 was never true

453 # This database's timespan representation can define a database-level 

454 # constraint that prevents overlapping validity ranges for entries with 

455 # the same DatasetType, collection, and data ID. 

456 # This also creates an index. 

457 index.append(TimespanReprClass) 

458 tableSpec.exclusion.add(tuple(index)) 

459 else: 

460 # No database-level constraint possible. We'll have to simulate that 

461 # in our DatasetRecordStorage.certify() implementation, and just create 

462 # a regular index here in the hope that helps with lookups. 

463 index.extend(fieldSpec.name for fieldSpec in tsFieldSpecs) 

464 tableSpec.indexes.add(ddl.IndexSpec(*index)) # type: ignore 

465 return tableSpec