Coverage for python/lsst/daf/butler/registry/datasets/byDimensions/tables.py: 97%

72 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-12-06 10:52 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30from .... import ddl 

31 

32__all__ = ( 

33 "addDatasetForeignKey", 

34 "makeCalibTableName", 

35 "makeCalibTableSpec", 

36 "makeStaticTableSpecs", 

37 "makeTagTableName", 

38 "makeTagTableSpec", 

39 "StaticDatasetTablesTuple", 

40) 

41 

42from collections import namedtuple 

43from typing import Any 

44 

45import sqlalchemy 

46 

47from ...._dataset_type import DatasetType 

48from ...._timespan import TimespanDatabaseRepresentation 

49from ....dimensions import DimensionUniverse, GovernorDimension, addDimensionForeignKey 

50from ...interfaces import CollectionManager, VersionTuple 

51 

52DATASET_TYPE_NAME_LENGTH = 128 

53 

54 

55StaticDatasetTablesTuple = namedtuple( 

56 "StaticDatasetTablesTuple", 

57 [ 

58 "dataset_type", 

59 "dataset", 

60 ], 

61) 

62 

63 

64def addDatasetForeignKey( 

65 tableSpec: ddl.TableSpec, 

66 dtype: type, 

67 *, 

68 name: str = "dataset", 

69 onDelete: str | None = None, 

70 constraint: bool = True, 

71 **kwargs: Any, 

72) -> ddl.FieldSpec: 

73 """Add a foreign key column for datasets and (optionally) a constraint to 

74 a table. 

75 

76 This is an internal interface for the ``byDimensions`` package; external 

77 code should use `DatasetRecordStorageManager.addDatasetForeignKey` instead. 

78 

79 Parameters 

80 ---------- 

81 tableSpec : `ddl.TableSpec` 

82 Specification for the table that should reference the dataset 

83 table. Will be modified in place. 

84 dtype: `type` 

85 Type of the column, same as the column type of the PK column of 

86 a referenced table (``dataset.id``). 

87 name: `str`, optional 

88 A name to use for the prefix of the new field; the full name is 

89 ``{name}_id``. 

90 onDelete: `str`, optional 

91 One of "CASCADE" or "SET NULL", indicating what should happen to 

92 the referencing row if the collection row is deleted. `None` 

93 indicates that this should be an integrity error. 

94 constraint: `bool`, optional 

95 If `False` (`True` is default), add a field that can be joined to 

96 the dataset primary key, but do not add a foreign key constraint. 

97 **kwargs 

98 Additional keyword arguments are forwarded to the `ddl.FieldSpec` 

99 constructor (only the ``name`` and ``dtype`` arguments are 

100 otherwise provided). 

101 

102 Returns 

103 ------- 

104 idSpec : `ddl.FieldSpec` 

105 Specification for the ID field. 

106 """ 

107 idFieldSpec = ddl.FieldSpec(f"{name}_id", dtype=dtype, **kwargs) 

108 tableSpec.fields.add(idFieldSpec) 

109 if constraint: 

110 tableSpec.foreignKeys.append( 

111 ddl.ForeignKeySpec("dataset", source=(idFieldSpec.name,), target=("id",), onDelete=onDelete) 

112 ) 

113 return idFieldSpec 

114 

115 

116def makeStaticTableSpecs( 

117 collections: type[CollectionManager], 

118 universe: DimensionUniverse, 

119 dtype: type, 

120 autoincrement: bool, 

121 schema_version: VersionTuple, 

122) -> StaticDatasetTablesTuple: 

123 """Construct all static tables used by the classes in this package. 

124 

125 Static tables are those that are present in all Registries and do not 

126 depend on what DatasetTypes have been registered. 

127 

128 Parameters 

129 ---------- 

130 collections: `CollectionManager` 

131 Manager object for the collections in this `Registry`. 

132 universe : `DimensionUniverse` 

133 Universe graph containing all dimensions known to this `Registry`. 

134 dtype: `type` 

135 Type of the dataset ID (primary key) column. 

136 autoincrement: `bool` 

137 If `True` then dataset ID column will be auto-incrementing. 

138 

139 Returns 

140 ------- 

141 specs : `StaticDatasetTablesTuple` 

142 A named tuple containing `ddl.TableSpec` instances. 

143 """ 

144 ingest_date_type: type 

145 ingest_date_default: Any = None 

146 if schema_version.major > 1: 

147 ingest_date_type = ddl.AstropyTimeNsecTai 

148 else: 

149 ingest_date_type = sqlalchemy.TIMESTAMP 

150 # New code provides explicit values for ingest_data, but we keep 

151 # default just to be consistent with the existing schema. 

152 ingest_date_default = sqlalchemy.sql.func.now() 

153 

154 specs = StaticDatasetTablesTuple( 

155 dataset_type=ddl.TableSpec( 

156 fields=[ 

157 ddl.FieldSpec( 

158 name="id", 

159 dtype=sqlalchemy.BigInteger, 

160 autoincrement=True, 

161 primaryKey=True, 

162 doc=( 

163 "Autoincrement ID that uniquely identifies a dataset " 

164 "type in other tables. Python code outside the " 

165 "`Registry` class should never interact with this; " 

166 "its existence is considered an implementation detail." 

167 ), 

168 ), 

169 ddl.FieldSpec( 

170 name="name", 

171 dtype=sqlalchemy.String, 

172 length=DATASET_TYPE_NAME_LENGTH, 

173 nullable=False, 

174 doc="String name that uniquely identifies a dataset type.", 

175 ), 

176 ddl.FieldSpec( 

177 name="storage_class", 

178 dtype=sqlalchemy.String, 

179 length=64, 

180 nullable=False, 

181 doc=( 

182 "Name of the storage class associated with all " 

183 "datasets of this type. Storage classes are " 

184 "generally associated with a Python class, and are " 

185 "enumerated in butler configuration." 

186 ), 

187 ), 

188 ddl.FieldSpec( 

189 name="dimensions_key", 

190 dtype=sqlalchemy.BigInteger, 

191 nullable=False, 

192 doc="Unique key for the set of dimensions that identifies datasets of this type.", 

193 ), 

194 ddl.FieldSpec( 

195 name="tag_association_table", 

196 dtype=sqlalchemy.String, 

197 length=128, 

198 nullable=False, 

199 doc=( 

200 "Name of the table that holds associations between " 

201 "datasets of this type and most types of collections." 

202 ), 

203 ), 

204 ddl.FieldSpec( 

205 name="calibration_association_table", 

206 dtype=sqlalchemy.String, 

207 length=128, 

208 nullable=True, 

209 doc=( 

210 "Name of the table that holds associations between " 

211 "datasets of this type and CALIBRATION collections. " 

212 "NULL values indicate dataset types with " 

213 "isCalibration=False." 

214 ), 

215 ), 

216 ], 

217 unique=[("name",)], 

218 ), 

219 dataset=ddl.TableSpec( 

220 fields=[ 

221 ddl.FieldSpec( 

222 name="id", 

223 dtype=dtype, 

224 autoincrement=autoincrement, 

225 primaryKey=True, 

226 doc="A unique field used as the primary key for dataset.", 

227 ), 

228 ddl.FieldSpec( 

229 name="dataset_type_id", 

230 dtype=sqlalchemy.BigInteger, 

231 nullable=False, 

232 doc="Reference to the associated entry in the dataset_type table.", 

233 ), 

234 ddl.FieldSpec( 

235 name="ingest_date", 

236 dtype=ingest_date_type, 

237 default=ingest_date_default, 

238 nullable=False, 

239 doc="Time of dataset ingestion.", 

240 ), 

241 # Foreign key field/constraint to run added below. 

242 ], 

243 foreignKeys=[ 

244 ddl.ForeignKeySpec("dataset_type", source=("dataset_type_id",), target=("id",)), 

245 ], 

246 ), 

247 ) 

248 # Add foreign key fields programmatically. 

249 collections.addRunForeignKey(specs.dataset, onDelete="CASCADE", nullable=False) 

250 return specs 

251 

252 

253def makeTagTableName(datasetType: DatasetType, dimensionsKey: int) -> str: 

254 """Construct the name for a dynamic (DatasetType-dependent) tag table used 

255 by the classes in this package. 

256 

257 Parameters 

258 ---------- 

259 datasetType : `DatasetType` 

260 Dataset type to construct a name for. Multiple dataset types may 

261 share the same table. 

262 dimensionsKey : `int` 

263 Integer key used to save ``datasetType.dimensions`` to the database. 

264 

265 Returns 

266 ------- 

267 name : `str` 

268 Name for the table. 

269 """ 

270 return f"dataset_tags_{dimensionsKey:08d}" 

271 

272 

273def makeCalibTableName(datasetType: DatasetType, dimensionsKey: int) -> str: 

274 """Construct the name for a dynamic (DatasetType-dependent) tag + validity 

275 range table used by the classes in this package. 

276 

277 Parameters 

278 ---------- 

279 datasetType : `DatasetType` 

280 Dataset type to construct a name for. Multiple dataset types may 

281 share the same table. 

282 dimensionsKey : `int` 

283 Integer key used to save ``datasetType.dimensions`` to the database. 

284 

285 Returns 

286 ------- 

287 name : `str` 

288 Name for the table. 

289 """ 

290 assert datasetType.isCalibration() 

291 return f"dataset_calibs_{dimensionsKey:08d}" 

292 

293 

294def makeTagTableSpec( 

295 datasetType: DatasetType, collections: type[CollectionManager], dtype: type, *, constraints: bool = True 

296) -> ddl.TableSpec: 

297 """Construct the specification for a dynamic (DatasetType-dependent) tag 

298 table used by the classes in this package. 

299 

300 Parameters 

301 ---------- 

302 datasetType : `DatasetType` 

303 Dataset type to construct a spec for. Multiple dataset types may 

304 share the same table. 

305 collections : `type` [ `CollectionManager` ] 

306 `CollectionManager` subclass that can be used to construct foreign keys 

307 to the run and/or collection tables. 

308 dtype : `type` 

309 Type of the FK column, same as the column type of the PK column of 

310 a referenced table (``dataset.id``). 

311 constraints : `bool`, optional 

312 If `False` (`True` is default), do not define foreign key constraints. 

313 

314 Returns 

315 ------- 

316 spec : `ddl.TableSpec` 

317 Specification for the table. 

318 """ 

319 tableSpec = ddl.TableSpec( 

320 fields=[ 

321 # Foreign key fields to dataset, collection, and usually dimension 

322 # tables added below. 

323 # The dataset_type_id field here would be redundant with the one 

324 # in the main monolithic dataset table, but we need it here for an 

325 # important unique constraint. 

326 ddl.FieldSpec("dataset_type_id", dtype=sqlalchemy.BigInteger, nullable=False), 

327 ] 

328 ) 

329 if constraints: 

330 tableSpec.foreignKeys.append( 

331 ddl.ForeignKeySpec("dataset_type", source=("dataset_type_id",), target=("id",)) 

332 ) 

333 # We'll also have a unique constraint on dataset type, collection, and data 

334 # ID. We only include the required part of the data ID, as that's 

335 # sufficient and saves us from worrying about nulls in the constraint. 

336 constraint = ["dataset_type_id"] 

337 # Add foreign key fields to dataset table (part of the primary key) 

338 addDatasetForeignKey(tableSpec, dtype, primaryKey=True, onDelete="CASCADE", constraint=constraints) 

339 # Add foreign key fields to collection table (part of the primary key and 

340 # the data ID unique constraint). 

341 collectionFieldSpec = collections.addCollectionForeignKey( 

342 tableSpec, primaryKey=True, onDelete="CASCADE", constraint=constraints 

343 ) 

344 constraint.append(collectionFieldSpec.name) 

345 # Add foreign key constraint to the collection_summary_dataset_type table. 

346 if constraints: 

347 tableSpec.foreignKeys.append( 

348 ddl.ForeignKeySpec( 

349 "collection_summary_dataset_type", 

350 source=(collectionFieldSpec.name, "dataset_type_id"), 

351 target=(collectionFieldSpec.name, "dataset_type_id"), 

352 ) 

353 ) 

354 for dimension_name in datasetType.dimensions.required.names: 

355 dimension = datasetType.dimensions.universe.dimensions[dimension_name] 

356 fieldSpec = addDimensionForeignKey( 

357 tableSpec, dimension=dimension, nullable=False, primaryKey=False, constraint=constraints 

358 ) 

359 constraint.append(fieldSpec.name) 

360 # If this is a governor dimension, add a foreign key constraint to the 

361 # collection_summary_<dimension> table. 

362 if isinstance(dimension, GovernorDimension) and constraints: 

363 tableSpec.foreignKeys.append( 

364 ddl.ForeignKeySpec( 

365 f"collection_summary_{dimension.name}", 

366 source=(collectionFieldSpec.name, fieldSpec.name), 

367 target=(collectionFieldSpec.name, fieldSpec.name), 

368 ) 

369 ) 

370 # Actually add the unique constraint. 

371 tableSpec.unique.add(tuple(constraint)) 

372 return tableSpec 

373 

374 

375def makeCalibTableSpec( 

376 datasetType: DatasetType, 

377 collections: type[CollectionManager], 

378 TimespanReprClass: type[TimespanDatabaseRepresentation], 

379 dtype: type, 

380) -> ddl.TableSpec: 

381 """Construct the specification for a dynamic (DatasetType-dependent) tag + 

382 validity range table used by the classes in this package. 

383 

384 Parameters 

385 ---------- 

386 datasetType : `DatasetType` 

387 Dataset type to construct a spec for. Multiple dataset types may 

388 share the same table. 

389 collections : `type` [ `CollectionManager` ] 

390 `CollectionManager` subclass that can be used to construct foreign keys 

391 to the run and/or collection tables. 

392 dtype: `type` 

393 Type of the FK column, same as the column type of the PK column of 

394 a referenced table (``dataset.id``). 

395 

396 Returns 

397 ------- 

398 spec : `ddl.TableSpec` 

399 Specification for the table. 

400 """ 

401 tableSpec = ddl.TableSpec( 

402 fields=[ 

403 # This table has no natural primary key, compound or otherwise, so 

404 # we add an autoincrement key. We may use this field a bit 

405 # internally, but its presence is an implementation detail and it 

406 # shouldn't appear as a foreign key in any other tables. 

407 ddl.FieldSpec("id", dtype=sqlalchemy.BigInteger, autoincrement=True, primaryKey=True), 

408 # Foreign key fields to dataset, collection, and usually dimension 

409 # tables added below. The dataset_type_id field here is redundant 

410 # with the one in the main monolithic dataset table, but this bit 

411 # of denormalization lets us define what should be a much more 

412 # useful index. 

413 ddl.FieldSpec("dataset_type_id", dtype=sqlalchemy.BigInteger, nullable=False), 

414 ], 

415 foreignKeys=[ 

416 ddl.ForeignKeySpec("dataset_type", source=("dataset_type_id",), target=("id",)), 

417 ], 

418 ) 

419 # Record fields that should go in the temporal lookup index/constraint, 

420 # starting with the dataset type. 

421 index: list[str | type[TimespanDatabaseRepresentation]] = ["dataset_type_id"] 

422 # Add foreign key fields to dataset table (not part of the temporal 

423 # lookup/constraint). 

424 addDatasetForeignKey(tableSpec, dtype, nullable=False, onDelete="CASCADE") 

425 # Add foreign key fields to collection table (part of the temporal lookup 

426 # index/constraint). 

427 collectionFieldSpec = collections.addCollectionForeignKey(tableSpec, nullable=False, onDelete="CASCADE") 

428 index.append(collectionFieldSpec.name) 

429 # Add foreign key constraint to the collection_summary_dataset_type table. 

430 tableSpec.foreignKeys.append( 

431 ddl.ForeignKeySpec( 

432 "collection_summary_dataset_type", 

433 source=(collectionFieldSpec.name, "dataset_type_id"), 

434 target=(collectionFieldSpec.name, "dataset_type_id"), 

435 ) 

436 ) 

437 # Add dimension fields (part of the temporal lookup index.constraint). 

438 for dimension_name in datasetType.dimensions.required.names: 

439 dimension = datasetType.dimensions.universe.dimensions[dimension_name] 

440 fieldSpec = addDimensionForeignKey(tableSpec, dimension=dimension, nullable=False, primaryKey=False) 

441 index.append(fieldSpec.name) 

442 # If this is a governor dimension, add a foreign key constraint to the 

443 # collection_summary_<dimension> table. 

444 if isinstance(dimension, GovernorDimension): 

445 tableSpec.foreignKeys.append( 

446 ddl.ForeignKeySpec( 

447 f"collection_summary_{dimension.name}", 

448 source=(collectionFieldSpec.name, fieldSpec.name), 

449 target=(collectionFieldSpec.name, fieldSpec.name), 

450 ) 

451 ) 

452 # Add validity-range field(s) (part of the temporal lookup 

453 # index/constraint). 

454 tsFieldSpecs = TimespanReprClass.makeFieldSpecs(nullable=False) 

455 for fieldSpec in tsFieldSpecs: 

456 tableSpec.fields.add(fieldSpec) 

457 if TimespanReprClass.hasExclusionConstraint(): 457 ↛ 462line 457 didn't jump to line 462, because the condition on line 457 was never true

458 # This database's timespan representation can define a database-level 

459 # constraint that prevents overlapping validity ranges for entries with 

460 # the same DatasetType, collection, and data ID. 

461 # This also creates an index. 

462 index.append(TimespanReprClass) 

463 tableSpec.exclusion.add(tuple(index)) 

464 else: 

465 # No database-level constraint possible. We'll have to simulate that 

466 # in our DatasetRecordStorage.certify() implementation, and just create 

467 # a regular index here in the hope that helps with lookups. 

468 index.extend(fieldSpec.name for fieldSpec in tsFieldSpecs) 

469 tableSpec.indexes.add(ddl.IndexSpec(*index)) # type: ignore 

470 return tableSpec