Coverage for python/lsst/dax/apdb/apdbSqlSchema.py: 20%

211 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-10-12 09:46 +0000

1# This file is part of dax_apdb. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Module responsible for APDB schema operations. 

23""" 

24 

25from __future__ import annotations 

26 

27__all__ = ["ApdbSqlSchema", "ExtraTables"] 

28 

29import enum 

30import logging 

31import uuid 

32from typing import Any, Dict, List, Mapping, Optional, Type 

33 

34import felis.types 

35import sqlalchemy 

36from felis import simple 

37from sqlalchemy import ( 

38 DDL, 

39 Column, 

40 ForeignKeyConstraint, 

41 Index, 

42 MetaData, 

43 PrimaryKeyConstraint, 

44 Table, 

45 UniqueConstraint, 

46 event, 

47 inspect, 

48) 

49from sqlalchemy.dialects.postgresql import UUID 

50 

51from .apdbSchema import ApdbSchema, ApdbTables 

52 

53_LOG = logging.getLogger(__name__) 

54 

55 

56# 

57# Copied from daf_butler. 

58# 

59class GUID(sqlalchemy.TypeDecorator): 

60 """Platform-independent GUID type. 

61 

62 Uses PostgreSQL's UUID type, otherwise uses CHAR(32), storing as 

63 stringified hex values. 

64 """ 

65 

66 impl = sqlalchemy.CHAR 

67 

68 cache_ok = True 

69 

70 def load_dialect_impl(self, dialect: sqlalchemy.engine.Dialect) -> sqlalchemy.types.TypeEngine: 

71 if dialect.name == "postgresql": 

72 return dialect.type_descriptor(UUID()) 

73 else: 

74 return dialect.type_descriptor(sqlalchemy.CHAR(32)) 

75 

76 def process_bind_param(self, value: Any, dialect: sqlalchemy.engine.Dialect) -> Optional[str]: 

77 if value is None: 

78 return value 

79 

80 # Coerce input to UUID type, in general having UUID on input is the 

81 # only thing that we want but there is code right now that uses ints. 

82 if isinstance(value, int): 

83 value = uuid.UUID(int=value) 

84 elif isinstance(value, bytes): 

85 value = uuid.UUID(bytes=value) 

86 elif isinstance(value, str): 

87 # hexstring 

88 value = uuid.UUID(hex=value) 

89 elif not isinstance(value, uuid.UUID): 

90 raise TypeError(f"Unexpected type of a bind value: {type(value)}") 

91 

92 if dialect.name == "postgresql": 

93 return str(value) 

94 else: 

95 return "%.32x" % value.int 

96 

97 def process_result_value( 

98 self, value: str | uuid.UUID | None, dialect: sqlalchemy.engine.Dialect 

99 ) -> Optional[uuid.UUID]: 

100 if value is None: 

101 return value 

102 elif isinstance(value, uuid.UUID): 

103 # sqlalchemy 2 converts to UUID internally 

104 return value 

105 else: 

106 return uuid.UUID(hex=value) 

107 

108 

109@enum.unique 

110class ExtraTables(enum.Enum): 

111 """Names of the tables used for tracking insert IDs.""" 

112 

113 DiaInsertId = "DiaInsertId" 

114 """Name of the table for insert ID records.""" 

115 

116 DiaObjectInsertId = "DiaObjectInsertId" 

117 """Name of the table for DIAObject insert ID records.""" 

118 

119 DiaSourceInsertId = "DiaSourceInsertId" 

120 """Name of the table for DIASource insert ID records.""" 

121 

122 DiaForcedSourceInsertId = "DiaFSourceInsertId" 

123 """Name of the table for DIAForcedSource insert ID records.""" 

124 

125 def table_name(self, prefix: str = "") -> str: 

126 """Return full table name.""" 

127 return prefix + self.value 

128 

129 @classmethod 

130 def insert_id_tables(cls) -> Mapping[ExtraTables, ApdbTables]: 

131 """Return mapping of tables used for insert ID tracking to their 

132 corresponding regular tables. 

133 """ 

134 return { 

135 cls.DiaObjectInsertId: ApdbTables.DiaObject, 

136 cls.DiaSourceInsertId: ApdbTables.DiaSource, 

137 cls.DiaForcedSourceInsertId: ApdbTables.DiaForcedSource, 

138 } 

139 

140 

141class ApdbSqlSchema(ApdbSchema): 

142 """Class for management of APDB schema. 

143 

144 Attributes 

145 ---------- 

146 objects : `sqlalchemy.Table` 

147 DiaObject table instance 

148 objects_last : `sqlalchemy.Table` 

149 DiaObjectLast table instance, may be None 

150 sources : `sqlalchemy.Table` 

151 DiaSource table instance 

152 forcedSources : `sqlalchemy.Table` 

153 DiaForcedSource table instance 

154 has_insert_id : `bool` 

155 If true then schema has tables for insert ID tracking. 

156 

157 Parameters 

158 ---------- 

159 engine : `sqlalchemy.engine.Engine` 

160 SQLAlchemy engine instance 

161 dia_object_index : `str` 

162 Indexing mode for DiaObject table, see `ApdbSqlConfig.dia_object_index` 

163 for details. 

164 htm_index_column : `str` 

165 Name of a HTM index column for DiaObject and DiaSource tables. 

166 schema_file : `str` 

167 Name of the YAML schema file. 

168 schema_name : `str`, optional 

169 Name of the schema in YAML files. 

170 prefix : `str`, optional 

171 Prefix to add to all schema elements. 

172 namespace : `str`, optional 

173 Namespace (or schema name) to use for all APDB tables. 

174 use_insert_id : `bool`, optional 

175 

176 """ 

177 

178 pixel_id_tables = (ApdbTables.DiaObject, ApdbTables.DiaObjectLast, ApdbTables.DiaSource) 

179 """Tables that need pixelId column for spatial indexing.""" 

180 

181 def __init__( 

182 self, 

183 engine: sqlalchemy.engine.Engine, 

184 dia_object_index: str, 

185 htm_index_column: str, 

186 schema_file: str, 

187 schema_name: str = "ApdbSchema", 

188 prefix: str = "", 

189 namespace: Optional[str] = None, 

190 use_insert_id: bool = False, 

191 ): 

192 super().__init__(schema_file, schema_name) 

193 

194 self._engine = engine 

195 self._dia_object_index = dia_object_index 

196 self._htm_index_column = htm_index_column 

197 self._prefix = prefix 

198 self._use_insert_id = use_insert_id 

199 

200 self._metadata = MetaData(schema=namespace) 

201 

202 # map YAML column types to SQLAlchemy 

203 self._type_map = { 

204 felis.types.Double: self._getDoubleType(engine), 

205 felis.types.Float: sqlalchemy.types.Float, 

206 felis.types.Timestamp: sqlalchemy.types.TIMESTAMP, 

207 felis.types.Long: sqlalchemy.types.BigInteger, 

208 felis.types.Int: sqlalchemy.types.Integer, 

209 felis.types.Short: sqlalchemy.types.Integer, 

210 felis.types.Byte: sqlalchemy.types.Integer, 

211 felis.types.Binary: sqlalchemy.types.LargeBinary, 

212 felis.types.Text: sqlalchemy.types.CHAR, 

213 felis.types.String: sqlalchemy.types.CHAR, 

214 felis.types.Char: sqlalchemy.types.CHAR, 

215 felis.types.Unicode: sqlalchemy.types.CHAR, 

216 felis.types.Boolean: sqlalchemy.types.Boolean, 

217 } 

218 

219 # Add pixelId column and index to tables that need it 

220 for table in self.pixel_id_tables: 

221 tableDef = self.tableSchemas.get(table) 

222 if not tableDef: 

223 continue 

224 column = simple.Column( 

225 id=f"#{htm_index_column}", 

226 name=htm_index_column, 

227 datatype=felis.types.Long, 

228 nullable=False, 

229 value=None, 

230 description="Pixelization index column.", 

231 table=tableDef, 

232 ) 

233 tableDef.columns.append(column) 

234 

235 # Adjust index if needed 

236 if table == ApdbTables.DiaObject and self._dia_object_index == "pix_id_iov": 

237 tableDef.primary_key.insert(0, column) 

238 

239 if table is ApdbTables.DiaObjectLast: 

240 # use it as a leading PK column 

241 tableDef.primary_key.insert(0, column) 

242 else: 

243 # make a regular index 

244 name = f"IDX_{tableDef.name}_{htm_index_column}" 

245 index = simple.Index(id=f"#{name}", name=name, columns=[column]) 

246 tableDef.indexes.append(index) 

247 

248 # generate schema for all tables, must be called last 

249 self._apdb_tables = self._make_apdb_tables() 

250 self._extra_tables = self._make_extra_tables(self._apdb_tables) 

251 

252 self._has_insert_id: bool | None = None 

253 

254 def makeSchema(self, drop: bool = False) -> None: 

255 """Create or re-create all tables. 

256 

257 Parameters 

258 ---------- 

259 drop : `bool`, optional 

260 If True then drop tables before creating new ones. 

261 """ 

262 # Create namespace if it does not exist yet, for now this only makes 

263 # sense for postgres. 

264 if self._metadata.schema: 

265 dialect = self._engine.dialect 

266 quoted_schema = dialect.preparer(dialect).quote_schema(self._metadata.schema) 

267 create_schema = DDL( 

268 "CREATE SCHEMA IF NOT EXISTS %(schema)s", context={"schema": quoted_schema} 

269 ).execute_if(dialect="postgresql") 

270 event.listen(self._metadata, "before_create", create_schema) 

271 

272 # create all tables (optionally drop first) 

273 if drop: 

274 _LOG.info("dropping all tables") 

275 self._metadata.drop_all(self._engine) 

276 _LOG.info("creating all tables") 

277 self._metadata.create_all(self._engine) 

278 

279 # Reset possibly cached value. 

280 self._has_insert_id = None 

281 

282 def get_table(self, table_enum: ApdbTables | ExtraTables) -> Table: 

283 """Return SQLAlchemy table instance for a specified table type/enum. 

284 

285 Parameters 

286 ---------- 

287 table_enum : `ApdbTables` or `ExtraTables` 

288 Type of table to return. 

289 

290 Returns 

291 ------- 

292 table : `sqlalchemy.schema.Table` 

293 Table instance. 

294 

295 Raises 

296 ------ 

297 ValueError 

298 Raised if ``table_enum`` is not valid for this database. 

299 """ 

300 try: 

301 if isinstance(table_enum, ApdbTables): 

302 return self._apdb_tables[table_enum] 

303 else: 

304 return self._extra_tables[table_enum] 

305 except LookupError: 

306 raise ValueError(f"Table type {table_enum} does not exist in the schema") from None 

307 

308 def get_apdb_columns(self, table_enum: ApdbTables | ExtraTables) -> list[Column]: 

309 """Return list of columns defined for a table in APDB schema. 

310 

311 Returned list excludes columns that are implementation-specific, e.g. 

312 ``pixelId`` column is not include in the returned list. 

313 

314 Parameters 

315 ---------- 

316 table_enum : `ApdbTables` or `ExtraTables` 

317 Type of table. 

318 

319 Returns 

320 ------- 

321 table : `list` [`sqlalchemy.schema.Column`] 

322 Table instance. 

323 

324 Raises 

325 ------ 

326 ValueError 

327 Raised if ``table_enum`` is not valid for this database. 

328 """ 

329 table = self.get_table(table_enum) 

330 exclude_columns = set() 

331 if table_enum in self.pixel_id_tables: 

332 exclude_columns.add(self._htm_index_column) 

333 return [column for column in table.columns if column.name not in exclude_columns] 

334 

335 @property 

336 def has_insert_id(self) -> bool: 

337 """Whether insert ID tables are to be used (`bool`).""" 

338 if self._has_insert_id is None: 

339 self._has_insert_id = self._use_insert_id and self._check_insert_id() 

340 return self._has_insert_id 

341 

342 def _check_insert_id(self) -> bool: 

343 """Check whether database has tables for tracking insert IDs.""" 

344 inspector = inspect(self._engine) 

345 db_tables = set(inspector.get_table_names(schema=self._metadata.schema)) 

346 return ExtraTables.DiaInsertId.table_name(self._prefix) in db_tables 

347 

348 def _make_apdb_tables(self, mysql_engine: str = "InnoDB") -> Mapping[ApdbTables, Table]: 

349 """Generate schema for regular tables. 

350 

351 Parameters 

352 ---------- 

353 mysql_engine : `str`, optional 

354 MySQL engine type to use for new tables. 

355 """ 

356 tables = {} 

357 for table_enum in ApdbTables: 

358 if table_enum is ApdbTables.DiaObjectLast and self._dia_object_index != "last_object_table": 

359 continue 

360 

361 columns = self._tableColumns(table_enum) 

362 constraints = self._tableIndices(table_enum) 

363 table = Table( 

364 table_enum.table_name(self._prefix), 

365 self._metadata, 

366 *columns, 

367 *constraints, 

368 mysql_engine=mysql_engine, 

369 ) 

370 tables[table_enum] = table 

371 

372 return tables 

373 

374 def _make_extra_tables( 

375 self, apdb_tables: Mapping[ApdbTables, Table], mysql_engine: str = "InnoDB" 

376 ) -> Mapping[ExtraTables, Table]: 

377 """Generate schema for insert ID tables.""" 

378 tables: dict[ExtraTables, Table] = {} 

379 if not self._use_insert_id: 

380 return tables 

381 

382 # Parent table needs to be defined first 

383 column_defs = [ 

384 Column("insert_id", GUID, primary_key=True), 

385 Column("insert_time", sqlalchemy.types.TIMESTAMP, nullable=False), 

386 ] 

387 parent_table = Table( 

388 ExtraTables.DiaInsertId.table_name(self._prefix), 

389 self._metadata, 

390 *column_defs, 

391 mysql_engine=mysql_engine, 

392 ) 

393 tables[ExtraTables.DiaInsertId] = parent_table 

394 

395 for table_enum, apdb_enum in ExtraTables.insert_id_tables().items(): 

396 apdb_table = apdb_tables[apdb_enum] 

397 columns = self._insertIdColumns(table_enum) 

398 constraints = self._insertIdIndices(table_enum, apdb_table, parent_table) 

399 table = Table( 

400 table_enum.table_name(self._prefix), 

401 self._metadata, 

402 *columns, 

403 *constraints, 

404 mysql_engine=mysql_engine, 

405 ) 

406 tables[table_enum] = table 

407 

408 return tables 

409 

410 def _tableColumns(self, table_name: ApdbTables) -> List[Column]: 

411 """Return set of columns in a table 

412 

413 Parameters 

414 ---------- 

415 table_name : `ApdbTables` 

416 Name of the table. 

417 

418 Returns 

419 ------- 

420 column_defs : `list` 

421 List of `Column` objects. 

422 """ 

423 # get the list of columns in primary key, they are treated somewhat 

424 # specially below 

425 table_schema = self.tableSchemas[table_name] 

426 

427 # convert all column dicts into alchemy Columns 

428 column_defs: list[Column] = [] 

429 for column in table_schema.columns: 

430 kwargs: Dict[str, Any] = dict(nullable=column.nullable) 

431 if column.value is not None: 

432 kwargs.update(server_default=str(column.value)) 

433 if column in table_schema.primary_key: 

434 kwargs.update(autoincrement=False) 

435 ctype = self._type_map[column.datatype] 

436 column_defs.append(Column(column.name, ctype, **kwargs)) 

437 

438 return column_defs 

439 

440 def _tableIndices(self, table_name: ApdbTables) -> List[sqlalchemy.schema.SchemaItem]: 

441 """Return set of constraints/indices in a table 

442 

443 Parameters 

444 ---------- 

445 table_name : `ApdbTables` 

446 Name of the table. 

447 info : `dict` 

448 Additional options passed to SQLAlchemy index constructor. 

449 

450 Returns 

451 ------- 

452 index_defs : `list` 

453 List of SQLAlchemy index/constraint objects. 

454 """ 

455 table_schema = self.tableSchemas[table_name] 

456 

457 # convert all index dicts into alchemy Columns 

458 index_defs: List[sqlalchemy.schema.SchemaItem] = [] 

459 if table_schema.primary_key: 

460 index_defs.append(PrimaryKeyConstraint(*[column.name for column in table_schema.primary_key])) 

461 for index in table_schema.indexes: 

462 name = self._prefix + index.name if index.name else "" 

463 index_defs.append(Index(name, *[column.name for column in index.columns])) 

464 for constraint in table_schema.constraints: 

465 constr_name: str | None = None 

466 if constraint.name: 

467 constr_name = self._prefix + constraint.name 

468 if isinstance(constraint, simple.UniqueConstraint): 

469 index_defs.append( 

470 UniqueConstraint(*[column.name for column in constraint.columns], name=constr_name) 

471 ) 

472 

473 return index_defs 

474 

475 def _insertIdColumns(self, table_enum: ExtraTables) -> List[Column]: 

476 """Return list of columns for insert ID tables.""" 

477 column_defs: list[Column] = [Column("insert_id", GUID, nullable=False)] 

478 insert_id_tables = ExtraTables.insert_id_tables() 

479 if table_enum in insert_id_tables: 

480 column_defs += self._tablePkColumns(insert_id_tables[table_enum]) 

481 else: 

482 assert False, "Above branches have to cover all enum values" 

483 return column_defs 

484 

485 def _tablePkColumns(self, table_enum: ApdbTables) -> list[Column]: 

486 """Return a list of columns for table PK.""" 

487 table_schema = self.tableSchemas[table_enum] 

488 column_defs: list[Column] = [] 

489 for column in table_schema.primary_key: 

490 ctype = self._type_map[column.datatype] 

491 column_defs.append(Column(column.name, ctype, nullable=False, autoincrement=False)) 

492 return column_defs 

493 

494 def _insertIdIndices( 

495 self, 

496 table_enum: ExtraTables, 

497 apdb_table: sqlalchemy.schema.Table, 

498 parent_table: sqlalchemy.schema.Table, 

499 ) -> List[sqlalchemy.schema.SchemaItem]: 

500 """Return set of constraints/indices for insert ID tables.""" 

501 index_defs: List[sqlalchemy.schema.SchemaItem] = [] 

502 

503 # Special case for insert ID tables that are not in felis schema. 

504 insert_id_tables = ExtraTables.insert_id_tables() 

505 if table_enum in insert_id_tables: 

506 # PK is the same as for original table 

507 pk_names = [column.name for column in self._tablePkColumns(insert_id_tables[table_enum])] 

508 index_defs.append(PrimaryKeyConstraint(*pk_names)) 

509 # Non-unique index on insert_id column. 

510 name = self._prefix + table_enum.name + "_idx" 

511 index_defs.append(Index(name, "insert_id")) 

512 # Foreign key to original table 

513 pk_columns = [apdb_table.columns[column] for column in pk_names] 

514 index_defs.append( 

515 ForeignKeyConstraint(pk_names, pk_columns, onupdate="CASCADE", ondelete="CASCADE") 

516 ) 

517 # Foreign key to parent table 

518 index_defs.append( 

519 ForeignKeyConstraint( 

520 ["insert_id"], [parent_table.columns["insert_id"]], onupdate="CASCADE", ondelete="CASCADE" 

521 ) 

522 ) 

523 else: 

524 assert False, "Above branches have to cover all enum values" 

525 return index_defs 

526 

527 @classmethod 

528 def _getDoubleType(cls, engine: sqlalchemy.engine.Engine) -> Type | sqlalchemy.types.TypeEngine: 

529 """DOUBLE type is database-specific, select one based on dialect. 

530 

531 Parameters 

532 ---------- 

533 engine : `sqlalchemy.engine.Engine` 

534 Database engine. 

535 

536 Returns 

537 ------- 

538 type_object : `object` 

539 Database-specific type definition. 

540 """ 

541 if engine.name == "mysql": 

542 from sqlalchemy.dialects.mysql import DOUBLE 

543 

544 return DOUBLE(asdecimal=False) 

545 elif engine.name == "postgresql": 

546 from sqlalchemy.dialects.postgresql import DOUBLE_PRECISION 

547 

548 return DOUBLE_PRECISION 

549 elif engine.name == "oracle": 

550 from sqlalchemy.dialects.oracle import DOUBLE_PRECISION 

551 

552 return DOUBLE_PRECISION 

553 elif engine.name == "sqlite": 

554 # all floats in sqlite are 8-byte 

555 from sqlalchemy.dialects.sqlite import REAL 

556 

557 return REAL 

558 else: 

559 raise TypeError("cannot determine DOUBLE type, unexpected dialect: " + engine.name)