Coverage for python/lsst/daf/butler/registry/datasets/byDimensions/tables.py: 97%

67 statements  

« prev     ^ index     » next       coverage.py v7.3.1, created at 2023-10-02 07:59 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30__all__ = ( 

31 "addDatasetForeignKey", 

32 "makeCalibTableName", 

33 "makeCalibTableSpec", 

34 "makeStaticTableSpecs", 

35 "makeTagTableName", 

36 "makeTagTableSpec", 

37 "StaticDatasetTablesTuple", 

38) 

39 

40from collections import namedtuple 

41from typing import Any 

42 

43import sqlalchemy 

44 

45from ....core import ( 

46 DatasetType, 

47 DimensionUniverse, 

48 GovernorDimension, 

49 TimespanDatabaseRepresentation, 

50 addDimensionForeignKey, 

51 ddl, 

52) 

53from ...interfaces import CollectionManager, VersionTuple 

54 

55DATASET_TYPE_NAME_LENGTH = 128 

56 

57 

58StaticDatasetTablesTuple = namedtuple( 

59 "StaticDatasetTablesTuple", 

60 [ 

61 "dataset_type", 

62 "dataset", 

63 ], 

64) 

65 

66 

67def addDatasetForeignKey( 

68 tableSpec: ddl.TableSpec, 

69 dtype: type, 

70 *, 

71 name: str = "dataset", 

72 onDelete: str | None = None, 

73 constraint: bool = True, 

74 **kwargs: Any, 

75) -> ddl.FieldSpec: 

76 """Add a foreign key column for datasets and (optionally) a constraint to 

77 a table. 

78 

79 This is an internal interface for the ``byDimensions`` package; external 

80 code should use `DatasetRecordStorageManager.addDatasetForeignKey` instead. 

81 

82 Parameters 

83 ---------- 

84 tableSpec : `ddl.TableSpec` 

85 Specification for the table that should reference the dataset 

86 table. Will be modified in place. 

87 dtype: `type` 

88 Type of the column, same as the column type of the PK column of 

89 a referenced table (``dataset.id``). 

90 name: `str`, optional 

91 A name to use for the prefix of the new field; the full name is 

92 ``{name}_id``. 

93 onDelete: `str`, optional 

94 One of "CASCADE" or "SET NULL", indicating what should happen to 

95 the referencing row if the collection row is deleted. `None` 

96 indicates that this should be an integrity error. 

97 constraint: `bool`, optional 

98 If `False` (`True` is default), add a field that can be joined to 

99 the dataset primary key, but do not add a foreign key constraint. 

100 **kwargs 

101 Additional keyword arguments are forwarded to the `ddl.FieldSpec` 

102 constructor (only the ``name`` and ``dtype`` arguments are 

103 otherwise provided). 

104 

105 Returns 

106 ------- 

107 idSpec : `ddl.FieldSpec` 

108 Specification for the ID field. 

109 """ 

110 idFieldSpec = ddl.FieldSpec(f"{name}_id", dtype=dtype, **kwargs) 

111 tableSpec.fields.add(idFieldSpec) 

112 if constraint: 

113 tableSpec.foreignKeys.append( 

114 ddl.ForeignKeySpec("dataset", source=(idFieldSpec.name,), target=("id",), onDelete=onDelete) 

115 ) 

116 return idFieldSpec 

117 

118 

119def makeStaticTableSpecs( 

120 collections: type[CollectionManager], 

121 universe: DimensionUniverse, 

122 dtype: type, 

123 autoincrement: bool, 

124 schema_version: VersionTuple, 

125) -> StaticDatasetTablesTuple: 

126 """Construct all static tables used by the classes in this package. 

127 

128 Static tables are those that are present in all Registries and do not 

129 depend on what DatasetTypes have been registered. 

130 

131 Parameters 

132 ---------- 

133 collections: `CollectionManager` 

134 Manager object for the collections in this `Registry`. 

135 universe : `DimensionUniverse` 

136 Universe graph containing all dimensions known to this `Registry`. 

137 dtype: `type` 

138 Type of the dataset ID (primary key) column. 

139 autoincrement: `bool` 

140 If `True` then dataset ID column will be auto-incrementing. 

141 

142 Returns 

143 ------- 

144 specs : `StaticDatasetTablesTuple` 

145 A named tuple containing `ddl.TableSpec` instances. 

146 """ 

147 ingest_date_type: type 

148 ingest_date_default: Any = None 

149 if schema_version.major > 1: 

150 ingest_date_type = ddl.AstropyTimeNsecTai 

151 else: 

152 ingest_date_type = sqlalchemy.TIMESTAMP 

153 # New code provides explicit values for ingest_data, but we keep 

154 # default just to be consistent with the existing schema. 

155 ingest_date_default = sqlalchemy.sql.func.now() 

156 

157 specs = StaticDatasetTablesTuple( 

158 dataset_type=ddl.TableSpec( 

159 fields=[ 

160 ddl.FieldSpec( 

161 name="id", 

162 dtype=sqlalchemy.BigInteger, 

163 autoincrement=True, 

164 primaryKey=True, 

165 doc=( 

166 "Autoincrement ID that uniquely identifies a dataset " 

167 "type in other tables. Python code outside the " 

168 "`Registry` class should never interact with this; " 

169 "its existence is considered an implementation detail." 

170 ), 

171 ), 

172 ddl.FieldSpec( 

173 name="name", 

174 dtype=sqlalchemy.String, 

175 length=DATASET_TYPE_NAME_LENGTH, 

176 nullable=False, 

177 doc="String name that uniquely identifies a dataset type.", 

178 ), 

179 ddl.FieldSpec( 

180 name="storage_class", 

181 dtype=sqlalchemy.String, 

182 length=64, 

183 nullable=False, 

184 doc=( 

185 "Name of the storage class associated with all " 

186 "datasets of this type. Storage classes are " 

187 "generally associated with a Python class, and are " 

188 "enumerated in butler configuration." 

189 ), 

190 ), 

191 ddl.FieldSpec( 

192 name="dimensions_key", 

193 dtype=sqlalchemy.BigInteger, 

194 nullable=False, 

195 doc="Unique key for the set of dimensions that identifies datasets of this type.", 

196 ), 

197 ddl.FieldSpec( 

198 name="tag_association_table", 

199 dtype=sqlalchemy.String, 

200 length=128, 

201 nullable=False, 

202 doc=( 

203 "Name of the table that holds associations between " 

204 "datasets of this type and most types of collections." 

205 ), 

206 ), 

207 ddl.FieldSpec( 

208 name="calibration_association_table", 

209 dtype=sqlalchemy.String, 

210 length=128, 

211 nullable=True, 

212 doc=( 

213 "Name of the table that holds associations between " 

214 "datasets of this type and CALIBRATION collections. " 

215 "NULL values indicate dataset types with " 

216 "isCalibration=False." 

217 ), 

218 ), 

219 ], 

220 unique=[("name",)], 

221 ), 

222 dataset=ddl.TableSpec( 

223 fields=[ 

224 ddl.FieldSpec( 

225 name="id", 

226 dtype=dtype, 

227 autoincrement=autoincrement, 

228 primaryKey=True, 

229 doc="A unique field used as the primary key for dataset.", 

230 ), 

231 ddl.FieldSpec( 

232 name="dataset_type_id", 

233 dtype=sqlalchemy.BigInteger, 

234 nullable=False, 

235 doc="Reference to the associated entry in the dataset_type table.", 

236 ), 

237 ddl.FieldSpec( 

238 name="ingest_date", 

239 dtype=ingest_date_type, 

240 default=ingest_date_default, 

241 nullable=False, 

242 doc="Time of dataset ingestion.", 

243 ), 

244 # Foreign key field/constraint to run added below. 

245 ], 

246 foreignKeys=[ 

247 ddl.ForeignKeySpec("dataset_type", source=("dataset_type_id",), target=("id",)), 

248 ], 

249 ), 

250 ) 

251 # Add foreign key fields programmatically. 

252 collections.addRunForeignKey(specs.dataset, onDelete="CASCADE", nullable=False) 

253 return specs 

254 

255 

256def makeTagTableName(datasetType: DatasetType, dimensionsKey: int) -> str: 

257 """Construct the name for a dynamic (DatasetType-dependent) tag table used 

258 by the classes in this package. 

259 

260 Parameters 

261 ---------- 

262 datasetType : `DatasetType` 

263 Dataset type to construct a name for. Multiple dataset types may 

264 share the same table. 

265 dimensionsKey : `int` 

266 Integer key used to save ``datasetType.dimensions`` to the database. 

267 

268 Returns 

269 ------- 

270 name : `str` 

271 Name for the table. 

272 """ 

273 return f"dataset_tags_{dimensionsKey:08d}" 

274 

275 

276def makeCalibTableName(datasetType: DatasetType, dimensionsKey: int) -> str: 

277 """Construct the name for a dynamic (DatasetType-dependent) tag + validity 

278 range table used by the classes in this package. 

279 

280 Parameters 

281 ---------- 

282 datasetType : `DatasetType` 

283 Dataset type to construct a name for. Multiple dataset types may 

284 share the same table. 

285 dimensionsKey : `int` 

286 Integer key used to save ``datasetType.dimensions`` to the database. 

287 

288 Returns 

289 ------- 

290 name : `str` 

291 Name for the table. 

292 """ 

293 assert datasetType.isCalibration() 

294 return f"dataset_calibs_{dimensionsKey:08d}" 

295 

296 

297def makeTagTableSpec( 

298 datasetType: DatasetType, collections: type[CollectionManager], dtype: type, *, constraints: bool = True 

299) -> ddl.TableSpec: 

300 """Construct the specification for a dynamic (DatasetType-dependent) tag 

301 table used by the classes in this package. 

302 

303 Parameters 

304 ---------- 

305 datasetType : `DatasetType` 

306 Dataset type to construct a spec for. Multiple dataset types may 

307 share the same table. 

308 collections : `type` [ `CollectionManager` ] 

309 `CollectionManager` subclass that can be used to construct foreign keys 

310 to the run and/or collection tables. 

311 dtype : `type` 

312 Type of the FK column, same as the column type of the PK column of 

313 a referenced table (``dataset.id``). 

314 constraints : `bool`, optional 

315 If `False` (`True` is default), do not define foreign key constraints. 

316 

317 Returns 

318 ------- 

319 spec : `ddl.TableSpec` 

320 Specification for the table. 

321 """ 

322 tableSpec = ddl.TableSpec( 

323 fields=[ 

324 # Foreign key fields to dataset, collection, and usually dimension 

325 # tables added below. 

326 # The dataset_type_id field here would be redundant with the one 

327 # in the main monolithic dataset table, but we need it here for an 

328 # important unique constraint. 

329 ddl.FieldSpec("dataset_type_id", dtype=sqlalchemy.BigInteger, nullable=False), 

330 ] 

331 ) 

332 if constraints: 

333 tableSpec.foreignKeys.append( 

334 ddl.ForeignKeySpec("dataset_type", source=("dataset_type_id",), target=("id",)) 

335 ) 

336 # We'll also have a unique constraint on dataset type, collection, and data 

337 # ID. We only include the required part of the data ID, as that's 

338 # sufficient and saves us from worrying about nulls in the constraint. 

339 constraint = ["dataset_type_id"] 

340 # Add foreign key fields to dataset table (part of the primary key) 

341 addDatasetForeignKey(tableSpec, dtype, primaryKey=True, onDelete="CASCADE", constraint=constraints) 

342 # Add foreign key fields to collection table (part of the primary key and 

343 # the data ID unique constraint). 

344 collectionFieldSpec = collections.addCollectionForeignKey( 

345 tableSpec, primaryKey=True, onDelete="CASCADE", constraint=constraints 

346 ) 

347 constraint.append(collectionFieldSpec.name) 

348 # Add foreign key constraint to the collection_summary_dataset_type table. 

349 if constraints: 

350 tableSpec.foreignKeys.append( 

351 ddl.ForeignKeySpec( 

352 "collection_summary_dataset_type", 

353 source=(collectionFieldSpec.name, "dataset_type_id"), 

354 target=(collectionFieldSpec.name, "dataset_type_id"), 

355 ) 

356 ) 

357 for dimension in datasetType.dimensions.required: 

358 fieldSpec = addDimensionForeignKey( 

359 tableSpec, dimension=dimension, nullable=False, primaryKey=False, constraint=constraints 

360 ) 

361 constraint.append(fieldSpec.name) 

362 # If this is a governor dimension, add a foreign key constraint to the 

363 # collection_summary_<dimension> table. 

364 if isinstance(dimension, GovernorDimension) and constraints: 

365 tableSpec.foreignKeys.append( 

366 ddl.ForeignKeySpec( 

367 f"collection_summary_{dimension.name}", 

368 source=(collectionFieldSpec.name, fieldSpec.name), 

369 target=(collectionFieldSpec.name, fieldSpec.name), 

370 ) 

371 ) 

372 # Actually add the unique constraint. 

373 tableSpec.unique.add(tuple(constraint)) 

374 return tableSpec 

375 

376 

377def makeCalibTableSpec( 

378 datasetType: DatasetType, 

379 collections: type[CollectionManager], 

380 TimespanReprClass: type[TimespanDatabaseRepresentation], 

381 dtype: type, 

382) -> ddl.TableSpec: 

383 """Construct the specification for a dynamic (DatasetType-dependent) tag + 

384 validity range table used by the classes in this package. 

385 

386 Parameters 

387 ---------- 

388 datasetType : `DatasetType` 

389 Dataset type to construct a spec for. Multiple dataset types may 

390 share the same table. 

391 collections : `type` [ `CollectionManager` ] 

392 `CollectionManager` subclass that can be used to construct foreign keys 

393 to the run and/or collection tables. 

394 dtype: `type` 

395 Type of the FK column, same as the column type of the PK column of 

396 a referenced table (``dataset.id``). 

397 

398 Returns 

399 ------- 

400 spec : `ddl.TableSpec` 

401 Specification for the table. 

402 """ 

403 tableSpec = ddl.TableSpec( 

404 fields=[ 

405 # This table has no natural primary key, compound or otherwise, so 

406 # we add an autoincrement key. We may use this field a bit 

407 # internally, but its presence is an implementation detail and it 

408 # shouldn't appear as a foreign key in any other tables. 

409 ddl.FieldSpec("id", dtype=sqlalchemy.BigInteger, autoincrement=True, primaryKey=True), 

410 # Foreign key fields to dataset, collection, and usually dimension 

411 # tables added below. The dataset_type_id field here is redundant 

412 # with the one in the main monolithic dataset table, but this bit 

413 # of denormalization lets us define what should be a much more 

414 # useful index. 

415 ddl.FieldSpec("dataset_type_id", dtype=sqlalchemy.BigInteger, nullable=False), 

416 ], 

417 foreignKeys=[ 

418 ddl.ForeignKeySpec("dataset_type", source=("dataset_type_id",), target=("id",)), 

419 ], 

420 ) 

421 # Record fields that should go in the temporal lookup index/constraint, 

422 # starting with the dataset type. 

423 index: list[str | type[TimespanDatabaseRepresentation]] = ["dataset_type_id"] 

424 # Add foreign key fields to dataset table (not part of the temporal 

425 # lookup/constraint). 

426 addDatasetForeignKey(tableSpec, dtype, nullable=False, onDelete="CASCADE") 

427 # Add foreign key fields to collection table (part of the temporal lookup 

428 # index/constraint). 

429 collectionFieldSpec = collections.addCollectionForeignKey(tableSpec, nullable=False, onDelete="CASCADE") 

430 index.append(collectionFieldSpec.name) 

431 # Add foreign key constraint to the collection_summary_dataset_type table. 

432 tableSpec.foreignKeys.append( 

433 ddl.ForeignKeySpec( 

434 "collection_summary_dataset_type", 

435 source=(collectionFieldSpec.name, "dataset_type_id"), 

436 target=(collectionFieldSpec.name, "dataset_type_id"), 

437 ) 

438 ) 

439 # Add dimension fields (part of the temporal lookup index.constraint). 

440 for dimension in datasetType.dimensions.required: 

441 fieldSpec = addDimensionForeignKey(tableSpec, dimension=dimension, nullable=False, primaryKey=False) 

442 index.append(fieldSpec.name) 

443 # If this is a governor dimension, add a foreign key constraint to the 

444 # collection_summary_<dimension> table. 

445 if isinstance(dimension, GovernorDimension): 

446 tableSpec.foreignKeys.append( 

447 ddl.ForeignKeySpec( 

448 f"collection_summary_{dimension.name}", 

449 source=(collectionFieldSpec.name, fieldSpec.name), 

450 target=(collectionFieldSpec.name, fieldSpec.name), 

451 ) 

452 ) 

453 # Add validity-range field(s) (part of the temporal lookup 

454 # index/constraint). 

455 tsFieldSpecs = TimespanReprClass.makeFieldSpecs(nullable=False) 

456 for fieldSpec in tsFieldSpecs: 

457 tableSpec.fields.add(fieldSpec) 

458 if TimespanReprClass.hasExclusionConstraint(): 458 ↛ 463line 458 didn't jump to line 463, because the condition on line 458 was never true

459 # This database's timespan representation can define a database-level 

460 # constraint that prevents overlapping validity ranges for entries with 

461 # the same DatasetType, collection, and data ID. 

462 # This also creates an index. 

463 index.append(TimespanReprClass) 

464 tableSpec.exclusion.add(tuple(index)) 

465 else: 

466 # No database-level constraint possible. We'll have to simulate that 

467 # in our DatasetRecordStorage.certify() implementation, and just create 

468 # a regular index here in the hope that helps with lookups. 

469 index.extend(fieldSpec.name for fieldSpec in tsFieldSpecs) 

470 tableSpec.indexes.add(ddl.IndexSpec(*index)) # type: ignore 

471 return tableSpec