Coverage for python/lsst/daf/butler/registry/datasets/byDimensions/tables.py: 97%

72 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-04-13 09:56 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30from .... import ddl 

31 

32__all__ = ( 

33 "addDatasetForeignKey", 

34 "makeCalibTableName", 

35 "makeCalibTableSpec", 

36 "makeStaticTableSpecs", 

37 "makeTagTableName", 

38 "makeTagTableSpec", 

39 "StaticDatasetTablesTuple", 

40) 

41 

42from collections import namedtuple 

43from typing import Any 

44 

45import sqlalchemy 

46 

47from ...._dataset_type import DatasetType 

48from ....dimensions import DimensionUniverse, GovernorDimension, addDimensionForeignKey 

49from ....timespan_database_representation import TimespanDatabaseRepresentation 

50from ...interfaces import CollectionManager, VersionTuple 

51 

52DATASET_TYPE_NAME_LENGTH = 128 

53 

54 

55StaticDatasetTablesTuple = namedtuple( 

56 "StaticDatasetTablesTuple", 

57 [ 

58 "dataset_type", 

59 "dataset", 

60 ], 

61) 

62 

63 

64def addDatasetForeignKey( 

65 tableSpec: ddl.TableSpec, 

66 dtype: type, 

67 *, 

68 name: str = "dataset", 

69 onDelete: str | None = None, 

70 constraint: bool = True, 

71 **kwargs: Any, 

72) -> ddl.FieldSpec: 

73 """Add a foreign key column for datasets and (optionally) a constraint to 

74 a table. 

75 

76 This is an internal interface for the ``byDimensions`` package; external 

77 code should use `DatasetRecordStorageManager.addDatasetForeignKey` instead. 

78 

79 Parameters 

80 ---------- 

81 tableSpec : `ddl.TableSpec` 

82 Specification for the table that should reference the dataset 

83 table. Will be modified in place. 

84 dtype : `type` 

85 Type of the column, same as the column type of the PK column of 

86 a referenced table (``dataset.id``). 

87 name : `str`, optional 

88 A name to use for the prefix of the new field; the full name is 

89 ``{name}_id``. 

90 onDelete : `str`, optional 

91 One of "CASCADE" or "SET NULL", indicating what should happen to 

92 the referencing row if the collection row is deleted. `None` 

93 indicates that this should be an integrity error. 

94 constraint : `bool`, optional 

95 If `False` (`True` is default), add a field that can be joined to 

96 the dataset primary key, but do not add a foreign key constraint. 

97 **kwargs 

98 Additional keyword arguments are forwarded to the `ddl.FieldSpec` 

99 constructor (only the ``name`` and ``dtype`` arguments are 

100 otherwise provided). 

101 

102 Returns 

103 ------- 

104 idSpec : `ddl.FieldSpec` 

105 Specification for the ID field. 

106 """ 

107 idFieldSpec = ddl.FieldSpec(f"{name}_id", dtype=dtype, **kwargs) 

108 tableSpec.fields.add(idFieldSpec) 

109 if constraint: 

110 tableSpec.foreignKeys.append( 

111 ddl.ForeignKeySpec("dataset", source=(idFieldSpec.name,), target=("id",), onDelete=onDelete) 

112 ) 

113 return idFieldSpec 

114 

115 

116def makeStaticTableSpecs( 

117 collections: type[CollectionManager], 

118 universe: DimensionUniverse, 

119 dtype: type, 

120 autoincrement: bool, 

121 schema_version: VersionTuple, 

122) -> StaticDatasetTablesTuple: 

123 """Construct all static tables used by the classes in this package. 

124 

125 Static tables are those that are present in all Registries and do not 

126 depend on what DatasetTypes have been registered. 

127 

128 Parameters 

129 ---------- 

130 collections : `CollectionManager` 

131 Manager object for the collections in this `Registry`. 

132 universe : `DimensionUniverse` 

133 Universe graph containing all dimensions known to this `Registry`. 

134 dtype : `type` 

135 Type of the dataset ID (primary key) column. 

136 autoincrement : `bool` 

137 If `True` then dataset ID column will be auto-incrementing. 

138 schema_version : `VersionTuple` 

139 The version of this schema. 

140 

141 Returns 

142 ------- 

143 specs : `StaticDatasetTablesTuple` 

144 A named tuple containing `ddl.TableSpec` instances. 

145 """ 

146 ingest_date_type: type 

147 ingest_date_default: Any = None 

148 if schema_version.major > 1: 

149 ingest_date_type = ddl.AstropyTimeNsecTai 

150 else: 

151 ingest_date_type = sqlalchemy.TIMESTAMP 

152 # New code provides explicit values for ingest_data, but we keep 

153 # default just to be consistent with the existing schema. 

154 ingest_date_default = sqlalchemy.sql.func.now() 

155 

156 specs = StaticDatasetTablesTuple( 

157 dataset_type=ddl.TableSpec( 

158 fields=[ 

159 ddl.FieldSpec( 

160 name="id", 

161 dtype=sqlalchemy.BigInteger, 

162 autoincrement=True, 

163 primaryKey=True, 

164 doc=( 

165 "Autoincrement ID that uniquely identifies a dataset " 

166 "type in other tables. Python code outside the " 

167 "`Registry` class should never interact with this; " 

168 "its existence is considered an implementation detail." 

169 ), 

170 ), 

171 ddl.FieldSpec( 

172 name="name", 

173 dtype=sqlalchemy.String, 

174 length=DATASET_TYPE_NAME_LENGTH, 

175 nullable=False, 

176 doc="String name that uniquely identifies a dataset type.", 

177 ), 

178 ddl.FieldSpec( 

179 name="storage_class", 

180 dtype=sqlalchemy.String, 

181 length=64, 

182 nullable=False, 

183 doc=( 

184 "Name of the storage class associated with all " 

185 "datasets of this type. Storage classes are " 

186 "generally associated with a Python class, and are " 

187 "enumerated in butler configuration." 

188 ), 

189 ), 

190 ddl.FieldSpec( 

191 name="dimensions_key", 

192 dtype=sqlalchemy.BigInteger, 

193 nullable=False, 

194 doc="Unique key for the set of dimensions that identifies datasets of this type.", 

195 ), 

196 ddl.FieldSpec( 

197 name="tag_association_table", 

198 dtype=sqlalchemy.String, 

199 length=128, 

200 nullable=False, 

201 doc=( 

202 "Name of the table that holds associations between " 

203 "datasets of this type and most types of collections." 

204 ), 

205 ), 

206 ddl.FieldSpec( 

207 name="calibration_association_table", 

208 dtype=sqlalchemy.String, 

209 length=128, 

210 nullable=True, 

211 doc=( 

212 "Name of the table that holds associations between " 

213 "datasets of this type and CALIBRATION collections. " 

214 "NULL values indicate dataset types with " 

215 "isCalibration=False." 

216 ), 

217 ), 

218 ], 

219 unique=[("name",)], 

220 ), 

221 dataset=ddl.TableSpec( 

222 fields=[ 

223 ddl.FieldSpec( 

224 name="id", 

225 dtype=dtype, 

226 autoincrement=autoincrement, 

227 primaryKey=True, 

228 doc="A unique field used as the primary key for dataset.", 

229 ), 

230 ddl.FieldSpec( 

231 name="dataset_type_id", 

232 dtype=sqlalchemy.BigInteger, 

233 nullable=False, 

234 doc="Reference to the associated entry in the dataset_type table.", 

235 ), 

236 ddl.FieldSpec( 

237 name="ingest_date", 

238 dtype=ingest_date_type, 

239 default=ingest_date_default, 

240 nullable=False, 

241 doc="Time of dataset ingestion.", 

242 ), 

243 # Foreign key field/constraint to run added below. 

244 ], 

245 foreignKeys=[ 

246 ddl.ForeignKeySpec("dataset_type", source=("dataset_type_id",), target=("id",)), 

247 ], 

248 ), 

249 ) 

250 # Add foreign key fields programmatically. 

251 collections.addRunForeignKey(specs.dataset, onDelete="CASCADE", nullable=False) 

252 return specs 

253 

254 

255def makeTagTableName(datasetType: DatasetType, dimensionsKey: int) -> str: 

256 """Construct the name for a dynamic (DatasetType-dependent) tag table used 

257 by the classes in this package. 

258 

259 Parameters 

260 ---------- 

261 datasetType : `DatasetType` 

262 Dataset type to construct a name for. Multiple dataset types may 

263 share the same table. 

264 dimensionsKey : `int` 

265 Integer key used to save ``datasetType.dimensions`` to the database. 

266 

267 Returns 

268 ------- 

269 name : `str` 

270 Name for the table. 

271 """ 

272 return f"dataset_tags_{dimensionsKey:08d}" 

273 

274 

275def makeCalibTableName(datasetType: DatasetType, dimensionsKey: int) -> str: 

276 """Construct the name for a dynamic (DatasetType-dependent) tag + validity 

277 range table used by the classes in this package. 

278 

279 Parameters 

280 ---------- 

281 datasetType : `DatasetType` 

282 Dataset type to construct a name for. Multiple dataset types may 

283 share the same table. 

284 dimensionsKey : `int` 

285 Integer key used to save ``datasetType.dimensions`` to the database. 

286 

287 Returns 

288 ------- 

289 name : `str` 

290 Name for the table. 

291 """ 

292 assert datasetType.isCalibration() 

293 return f"dataset_calibs_{dimensionsKey:08d}" 

294 

295 

296def makeTagTableSpec( 

297 datasetType: DatasetType, collections: type[CollectionManager], dtype: type, *, constraints: bool = True 

298) -> ddl.TableSpec: 

299 """Construct the specification for a dynamic (DatasetType-dependent) tag 

300 table used by the classes in this package. 

301 

302 Parameters 

303 ---------- 

304 datasetType : `DatasetType` 

305 Dataset type to construct a spec for. Multiple dataset types may 

306 share the same table. 

307 collections : `type` [ `CollectionManager` ] 

308 `CollectionManager` subclass that can be used to construct foreign keys 

309 to the run and/or collection tables. 

310 dtype : `type` 

311 Type of the FK column, same as the column type of the PK column of 

312 a referenced table (``dataset.id``). 

313 constraints : `bool`, optional 

314 If `False` (`True` is default), do not define foreign key constraints. 

315 

316 Returns 

317 ------- 

318 spec : `ddl.TableSpec` 

319 Specification for the table. 

320 """ 

321 tableSpec = ddl.TableSpec( 

322 fields=[ 

323 # Foreign key fields to dataset, collection, and usually dimension 

324 # tables added below. 

325 # The dataset_type_id field here would be redundant with the one 

326 # in the main monolithic dataset table, but we need it here for an 

327 # important unique constraint. 

328 ddl.FieldSpec("dataset_type_id", dtype=sqlalchemy.BigInteger, nullable=False), 

329 ] 

330 ) 

331 if constraints: 

332 tableSpec.foreignKeys.append( 

333 ddl.ForeignKeySpec("dataset_type", source=("dataset_type_id",), target=("id",)) 

334 ) 

335 # We'll also have a unique constraint on dataset type, collection, and data 

336 # ID. We only include the required part of the data ID, as that's 

337 # sufficient and saves us from worrying about nulls in the constraint. 

338 constraint = ["dataset_type_id"] 

339 # Add foreign key fields to dataset table (part of the primary key) 

340 addDatasetForeignKey(tableSpec, dtype, primaryKey=True, onDelete="CASCADE", constraint=constraints) 

341 # Add foreign key fields to collection table (part of the primary key and 

342 # the data ID unique constraint). 

343 collectionFieldSpec = collections.addCollectionForeignKey( 

344 tableSpec, primaryKey=True, onDelete="CASCADE", constraint=constraints 

345 ) 

346 constraint.append(collectionFieldSpec.name) 

347 # Add foreign key constraint to the collection_summary_dataset_type table. 

348 if constraints: 

349 tableSpec.foreignKeys.append( 

350 ddl.ForeignKeySpec( 

351 "collection_summary_dataset_type", 

352 source=(collectionFieldSpec.name, "dataset_type_id"), 

353 target=(collectionFieldSpec.name, "dataset_type_id"), 

354 ) 

355 ) 

356 for dimension_name in datasetType.dimensions.required.names: 

357 dimension = datasetType.dimensions.universe.dimensions[dimension_name] 

358 fieldSpec = addDimensionForeignKey( 

359 tableSpec, dimension=dimension, nullable=False, primaryKey=False, constraint=constraints 

360 ) 

361 constraint.append(fieldSpec.name) 

362 # If this is a governor dimension, add a foreign key constraint to the 

363 # collection_summary_<dimension> table. 

364 if isinstance(dimension, GovernorDimension) and constraints: 

365 tableSpec.foreignKeys.append( 

366 ddl.ForeignKeySpec( 

367 f"collection_summary_{dimension.name}", 

368 source=(collectionFieldSpec.name, fieldSpec.name), 

369 target=(collectionFieldSpec.name, fieldSpec.name), 

370 ) 

371 ) 

372 # Actually add the unique constraint. 

373 tableSpec.unique.add(tuple(constraint)) 

374 return tableSpec 

375 

376 

377def makeCalibTableSpec( 

378 datasetType: DatasetType, 

379 collections: type[CollectionManager], 

380 TimespanReprClass: type[TimespanDatabaseRepresentation], 

381 dtype: type, 

382) -> ddl.TableSpec: 

383 """Construct the specification for a dynamic (DatasetType-dependent) tag + 

384 validity range table used by the classes in this package. 

385 

386 Parameters 

387 ---------- 

388 datasetType : `DatasetType` 

389 Dataset type to construct a spec for. Multiple dataset types may 

390 share the same table. 

391 collections : `type` [ `CollectionManager` ] 

392 `CollectionManager` subclass that can be used to construct foreign keys 

393 to the run and/or collection tables. 

394 TimespanReprClass : `type` of `TimespanDatabaseRepresentation` 

395 The Python type to use to represent a timespan. 

396 dtype : `type` 

397 Type of the FK column, same as the column type of the PK column of 

398 a referenced table (``dataset.id``). 

399 

400 Returns 

401 ------- 

402 spec : `ddl.TableSpec` 

403 Specification for the table. 

404 """ 

405 tableSpec = ddl.TableSpec( 

406 fields=[ 

407 # This table has no natural primary key, compound or otherwise, so 

408 # we add an autoincrement key. We may use this field a bit 

409 # internally, but its presence is an implementation detail and it 

410 # shouldn't appear as a foreign key in any other tables. 

411 ddl.FieldSpec("id", dtype=sqlalchemy.BigInteger, autoincrement=True, primaryKey=True), 

412 # Foreign key fields to dataset, collection, and usually dimension 

413 # tables added below. The dataset_type_id field here is redundant 

414 # with the one in the main monolithic dataset table, but this bit 

415 # of denormalization lets us define what should be a much more 

416 # useful index. 

417 ddl.FieldSpec("dataset_type_id", dtype=sqlalchemy.BigInteger, nullable=False), 

418 ], 

419 foreignKeys=[ 

420 ddl.ForeignKeySpec("dataset_type", source=("dataset_type_id",), target=("id",)), 

421 ], 

422 ) 

423 # Record fields that should go in the temporal lookup index/constraint, 

424 # starting with the dataset type. 

425 index: list[str | type[TimespanDatabaseRepresentation]] = ["dataset_type_id"] 

426 # Add foreign key fields to dataset table (not part of the temporal 

427 # lookup/constraint). 

428 addDatasetForeignKey(tableSpec, dtype, nullable=False, onDelete="CASCADE") 

429 # Add foreign key fields to collection table (part of the temporal lookup 

430 # index/constraint). 

431 collectionFieldSpec = collections.addCollectionForeignKey(tableSpec, nullable=False, onDelete="CASCADE") 

432 index.append(collectionFieldSpec.name) 

433 # Add foreign key constraint to the collection_summary_dataset_type table. 

434 tableSpec.foreignKeys.append( 

435 ddl.ForeignKeySpec( 

436 "collection_summary_dataset_type", 

437 source=(collectionFieldSpec.name, "dataset_type_id"), 

438 target=(collectionFieldSpec.name, "dataset_type_id"), 

439 ) 

440 ) 

441 # Add dimension fields (part of the temporal lookup index.constraint). 

442 for dimension_name in datasetType.dimensions.required.names: 

443 dimension = datasetType.dimensions.universe.dimensions[dimension_name] 

444 fieldSpec = addDimensionForeignKey(tableSpec, dimension=dimension, nullable=False, primaryKey=False) 

445 index.append(fieldSpec.name) 

446 # If this is a governor dimension, add a foreign key constraint to the 

447 # collection_summary_<dimension> table. 

448 if isinstance(dimension, GovernorDimension): 

449 tableSpec.foreignKeys.append( 

450 ddl.ForeignKeySpec( 

451 f"collection_summary_{dimension.name}", 

452 source=(collectionFieldSpec.name, fieldSpec.name), 

453 target=(collectionFieldSpec.name, fieldSpec.name), 

454 ) 

455 ) 

456 # Add validity-range field(s) (part of the temporal lookup 

457 # index/constraint). 

458 tsFieldSpecs = TimespanReprClass.makeFieldSpecs(nullable=False) 

459 for fieldSpec in tsFieldSpecs: 

460 tableSpec.fields.add(fieldSpec) 

461 if TimespanReprClass.hasExclusionConstraint(): 461 ↛ 466line 461 didn't jump to line 466, because the condition on line 461 was never true

462 # This database's timespan representation can define a database-level 

463 # constraint that prevents overlapping validity ranges for entries with 

464 # the same DatasetType, collection, and data ID. 

465 # This also creates an index. 

466 index.append(TimespanReprClass) 

467 tableSpec.exclusion.add(tuple(index)) 

468 else: 

469 # No database-level constraint possible. We'll have to simulate that 

470 # in our DatasetRecordStorage.certify() implementation, and just create 

471 # a regular index here in the hope that helps with lookups. 

472 index.extend(fieldSpec.name for fieldSpec in tsFieldSpecs) 

473 tableSpec.indexes.add(ddl.IndexSpec(*index)) # type: ignore 

474 return tableSpec