Coverage for python/lsst/dax/apdb/apdbSqlSchema.py: 18%

239 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-04-06 04:04 -0700

1# This file is part of dax_apdb. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Module responsible for APDB schema operations. 

23""" 

24 

25from __future__ import annotations 

26 

27__all__ = ["ApdbSqlSchema", "ExtraTables"] 

28 

29import enum 

30import logging 

31import uuid 

32from collections.abc import Mapping 

33from typing import Any 

34 

35import felis.types 

36import sqlalchemy 

37from felis import simple 

38from sqlalchemy import ( 

39 DDL, 

40 Column, 

41 ForeignKeyConstraint, 

42 Index, 

43 MetaData, 

44 PrimaryKeyConstraint, 

45 Table, 

46 UniqueConstraint, 

47 event, 

48 inspect, 

49) 

50from sqlalchemy.dialects.postgresql import UUID 

51 

52from .apdbSchema import ApdbSchema, ApdbTables 

53 

54_LOG = logging.getLogger(__name__) 

55 

56 

57# 

58# Copied from daf_butler. 

59# 

60class GUID(sqlalchemy.TypeDecorator): 

61 """Platform-independent GUID type. 

62 

63 Uses PostgreSQL's UUID type, otherwise uses CHAR(32), storing as 

64 stringified hex values. 

65 """ 

66 

67 impl = sqlalchemy.CHAR 

68 

69 cache_ok = True 

70 

71 def load_dialect_impl(self, dialect: sqlalchemy.engine.Dialect) -> sqlalchemy.types.TypeEngine: 

72 if dialect.name == "postgresql": 

73 return dialect.type_descriptor(UUID()) 

74 else: 

75 return dialect.type_descriptor(sqlalchemy.CHAR(32)) 

76 

77 def process_bind_param(self, value: Any, dialect: sqlalchemy.engine.Dialect) -> str | None: 

78 if value is None: 

79 return value 

80 

81 # Coerce input to UUID type, in general having UUID on input is the 

82 # only thing that we want but there is code right now that uses ints. 

83 if isinstance(value, int): 

84 value = uuid.UUID(int=value) 

85 elif isinstance(value, bytes): 

86 value = uuid.UUID(bytes=value) 

87 elif isinstance(value, str): 

88 # hexstring 

89 value = uuid.UUID(hex=value) 

90 elif not isinstance(value, uuid.UUID): 

91 raise TypeError(f"Unexpected type of a bind value: {type(value)}") 

92 

93 if dialect.name == "postgresql": 

94 return str(value) 

95 else: 

96 return "%.32x" % value.int 

97 

98 def process_result_value( 

99 self, value: str | uuid.UUID | None, dialect: sqlalchemy.engine.Dialect 

100 ) -> uuid.UUID | None: 

101 if value is None: 

102 return value 

103 elif isinstance(value, uuid.UUID): 

104 # sqlalchemy 2 converts to UUID internally 

105 return value 

106 else: 

107 return uuid.UUID(hex=value) 

108 

109 

110class InconsistentSchemaError(RuntimeError): 

111 """Exception raised when schema state is inconsistent.""" 

112 

113 

114@enum.unique 

115class ExtraTables(enum.Enum): 

116 """Names of the tables used for tracking insert IDs.""" 

117 

118 DiaInsertId = "DiaInsertId" 

119 """Name of the table for insert ID records.""" 

120 

121 DiaObjectInsertId = "DiaObjectInsertId" 

122 """Name of the table for DIAObject insert ID records.""" 

123 

124 DiaSourceInsertId = "DiaSourceInsertId" 

125 """Name of the table for DIASource insert ID records.""" 

126 

127 DiaForcedSourceInsertId = "DiaFSourceInsertId" 

128 """Name of the table for DIAForcedSource insert ID records.""" 

129 

130 def table_name(self, prefix: str = "") -> str: 

131 """Return full table name.""" 

132 return prefix + self.value 

133 

134 @classmethod 

135 def insert_id_tables(cls) -> Mapping[ExtraTables, ApdbTables]: 

136 """Return mapping of tables used for insert ID tracking to their 

137 corresponding regular tables. 

138 """ 

139 return { 

140 cls.DiaObjectInsertId: ApdbTables.DiaObject, 

141 cls.DiaSourceInsertId: ApdbTables.DiaSource, 

142 cls.DiaForcedSourceInsertId: ApdbTables.DiaForcedSource, 

143 } 

144 

145 

146class ApdbSqlSchema(ApdbSchema): 

147 """Class for management of APDB schema. 

148 

149 Attributes 

150 ---------- 

151 objects : `sqlalchemy.Table` 

152 DiaObject table instance 

153 objects_last : `sqlalchemy.Table` 

154 DiaObjectLast table instance, may be None 

155 sources : `sqlalchemy.Table` 

156 DiaSource table instance 

157 forcedSources : `sqlalchemy.Table` 

158 DiaForcedSource table instance 

159 has_insert_id : `bool` 

160 If true then schema has tables for insert ID tracking. 

161 

162 Parameters 

163 ---------- 

164 engine : `sqlalchemy.engine.Engine` 

165 SQLAlchemy engine instance 

166 dia_object_index : `str` 

167 Indexing mode for DiaObject table, see `ApdbSqlConfig.dia_object_index` 

168 for details. 

169 htm_index_column : `str` 

170 Name of a HTM index column for DiaObject and DiaSource tables. 

171 schema_file : `str` 

172 Name of the YAML schema file. 

173 schema_name : `str`, optional 

174 Name of the schema in YAML files. 

175 prefix : `str`, optional 

176 Prefix to add to all schema elements. 

177 namespace : `str`, optional 

178 Namespace (or schema name) to use for all APDB tables. 

179 use_insert_id : `bool`, optional 

180 

181 """ 

182 

183 pixel_id_tables = (ApdbTables.DiaObject, ApdbTables.DiaObjectLast, ApdbTables.DiaSource) 

184 """Tables that need pixelId column for spatial indexing.""" 

185 

186 def __init__( 

187 self, 

188 engine: sqlalchemy.engine.Engine, 

189 dia_object_index: str, 

190 htm_index_column: str, 

191 schema_file: str, 

192 schema_name: str = "ApdbSchema", 

193 prefix: str = "", 

194 namespace: str | None = None, 

195 use_insert_id: bool = False, 

196 ): 

197 super().__init__(schema_file, schema_name) 

198 

199 self._engine = engine 

200 self._dia_object_index = dia_object_index 

201 self._htm_index_column = htm_index_column 

202 self._prefix = prefix 

203 self._use_insert_id = use_insert_id 

204 

205 self._metadata = MetaData(schema=namespace) 

206 

207 # map YAML column types to SQLAlchemy 

208 self._type_map = { 

209 felis.types.Double: self._getDoubleType(engine), 

210 felis.types.Float: sqlalchemy.types.Float, 

211 felis.types.Timestamp: sqlalchemy.types.TIMESTAMP, 

212 felis.types.Long: sqlalchemy.types.BigInteger, 

213 felis.types.Int: sqlalchemy.types.Integer, 

214 felis.types.Short: sqlalchemy.types.Integer, 

215 felis.types.Byte: sqlalchemy.types.Integer, 

216 felis.types.Binary: sqlalchemy.types.LargeBinary, 

217 felis.types.Text: sqlalchemy.types.Text, 

218 felis.types.String: sqlalchemy.types.CHAR, 

219 felis.types.Char: sqlalchemy.types.CHAR, 

220 felis.types.Unicode: sqlalchemy.types.CHAR, 

221 felis.types.Boolean: sqlalchemy.types.Boolean, 

222 } 

223 

224 # Add pixelId column and index to tables that need it 

225 for table in self.pixel_id_tables: 

226 tableDef = self.tableSchemas.get(table) 

227 if not tableDef: 

228 continue 

229 column = simple.Column( 

230 id=f"#{htm_index_column}", 

231 name=htm_index_column, 

232 datatype=felis.types.Long, 

233 nullable=False, 

234 value=None, 

235 description="Pixelization index column.", 

236 table=tableDef, 

237 ) 

238 tableDef.columns.append(column) 

239 

240 # Adjust index if needed 

241 if table == ApdbTables.DiaObject and self._dia_object_index == "pix_id_iov": 

242 tableDef.primary_key.insert(0, column) 

243 

244 if table is ApdbTables.DiaObjectLast: 

245 # use it as a leading PK column 

246 tableDef.primary_key.insert(0, column) 

247 else: 

248 # make a regular index 

249 name = f"IDX_{tableDef.name}_{htm_index_column}" 

250 index = simple.Index(id=f"#{name}", name=name, columns=[column]) 

251 tableDef.indexes.append(index) 

252 

253 # generate schema for all tables, must be called last 

254 self._apdb_tables = self._make_apdb_tables() 

255 self._extra_tables = self._make_extra_tables(self._apdb_tables) 

256 

257 self._has_insert_id: bool | None = None 

258 self._metadata_check: bool | None = None 

259 

260 def empty(self) -> bool: 

261 """Return True if database schema is empty. 

262 

263 Returns 

264 ------- 

265 empty : `bool` 

266 `True` if none of the required APDB tables exist in the database, 

267 `False` if all required tables exist. 

268 

269 Raises 

270 ------ 

271 InconsistentSchemaError 

272 Raised when some of the required tables exist but not all. 

273 """ 

274 inspector = inspect(self._engine) 

275 table_names = set(inspector.get_table_names(self._metadata.schema)) 

276 

277 existing_tables = [] 

278 missing_tables = [] 

279 for table_enum in self._apdb_tables: 

280 table_name = table_enum.table_name(self._prefix) 

281 if table_name in table_names: 

282 existing_tables.append(table_name) 

283 else: 

284 missing_tables.append(table_name) 

285 

286 if not missing_tables: 

287 return False 

288 elif not existing_tables: 

289 return True 

290 else: 

291 raise InconsistentSchemaError( 

292 f"Only some required APDB tables exist: {existing_tables}, missing tables: {missing_tables}" 

293 ) 

294 

295 def makeSchema(self, drop: bool = False) -> None: 

296 """Create or re-create all tables. 

297 

298 Parameters 

299 ---------- 

300 drop : `bool`, optional 

301 If True then drop tables before creating new ones. 

302 """ 

303 # Create namespace if it does not exist yet, for now this only makes 

304 # sense for postgres. 

305 if self._metadata.schema: 

306 dialect = self._engine.dialect 

307 quoted_schema = dialect.preparer(dialect).quote_schema(self._metadata.schema) 

308 create_schema = DDL( 

309 "CREATE SCHEMA IF NOT EXISTS %(schema)s", context={"schema": quoted_schema} 

310 ).execute_if(dialect="postgresql") 

311 event.listen(self._metadata, "before_create", create_schema) 

312 

313 # create all tables (optionally drop first) 

314 if drop: 

315 _LOG.info("dropping all tables") 

316 self._metadata.drop_all(self._engine) 

317 _LOG.info("creating all tables") 

318 self._metadata.create_all(self._engine) 

319 

320 # Reset possibly cached value. 

321 self._has_insert_id = None 

322 self._metadata_check = None 

323 

324 def get_table(self, table_enum: ApdbTables | ExtraTables) -> Table: 

325 """Return SQLAlchemy table instance for a specified table type/enum. 

326 

327 Parameters 

328 ---------- 

329 table_enum : `ApdbTables` or `ExtraTables` 

330 Type of table to return. 

331 

332 Returns 

333 ------- 

334 table : `sqlalchemy.schema.Table` 

335 Table instance. 

336 

337 Raises 

338 ------ 

339 ValueError 

340 Raised if ``table_enum`` is not valid for this database. 

341 """ 

342 try: 

343 if isinstance(table_enum, ApdbTables): 

344 if table_enum is ApdbTables.metadata: 

345 # There may be cases when schema is configured with the 

346 # metadata table but database is still missing it. Check 

347 # that table actually exists in the database. Note that 

348 # this may interact with `makeSchema`. 

349 if self._metadata_check is None: 

350 inspector = inspect(self._engine) 

351 table_name = table_enum.table_name(self._prefix) 

352 self._metadata_check = inspector.has_table(table_name, schema=self._metadata.schema) 

353 if not self._metadata_check: 

354 # this will be caught below 

355 raise LookupError("metadata table is missing") 

356 return self._apdb_tables[table_enum] 

357 else: 

358 return self._extra_tables[table_enum] 

359 except LookupError: 

360 raise ValueError(f"Table type {table_enum} does not exist in the schema") from None 

361 

362 def get_apdb_columns(self, table_enum: ApdbTables | ExtraTables) -> list[Column]: 

363 """Return list of columns defined for a table in APDB schema. 

364 

365 Returned list excludes columns that are implementation-specific, e.g. 

366 ``pixelId`` column is not include in the returned list. 

367 

368 Parameters 

369 ---------- 

370 table_enum : `ApdbTables` or `ExtraTables` 

371 Type of table. 

372 

373 Returns 

374 ------- 

375 table : `list` [`sqlalchemy.schema.Column`] 

376 Table instance. 

377 

378 Raises 

379 ------ 

380 ValueError 

381 Raised if ``table_enum`` is not valid for this database. 

382 """ 

383 table = self.get_table(table_enum) 

384 exclude_columns = set() 

385 if table_enum in self.pixel_id_tables: 

386 exclude_columns.add(self._htm_index_column) 

387 return [column for column in table.columns if column.name not in exclude_columns] 

388 

389 @property 

390 def has_insert_id(self) -> bool: 

391 """Whether insert ID tables are to be used (`bool`).""" 

392 if self._has_insert_id is None: 

393 self._has_insert_id = self._use_insert_id and self._check_insert_id() 

394 return self._has_insert_id 

395 

396 def _check_insert_id(self) -> bool: 

397 """Check whether database has tables for tracking insert IDs.""" 

398 inspector = inspect(self._engine) 

399 db_tables = set(inspector.get_table_names(schema=self._metadata.schema)) 

400 return ExtraTables.DiaInsertId.table_name(self._prefix) in db_tables 

401 

402 def _make_apdb_tables(self, mysql_engine: str = "InnoDB") -> Mapping[ApdbTables, Table]: 

403 """Generate schema for regular tables. 

404 

405 Parameters 

406 ---------- 

407 mysql_engine : `str`, optional 

408 MySQL engine type to use for new tables. 

409 """ 

410 tables = {} 

411 for table_enum in ApdbTables: 

412 if table_enum is ApdbTables.DiaObjectLast and self._dia_object_index != "last_object_table": 

413 continue 

414 if table_enum is ApdbTables.metadata and table_enum not in self.tableSchemas: 

415 # Schema does not define metadata. 

416 continue 

417 

418 columns = self._tableColumns(table_enum) 

419 constraints = self._tableIndices(table_enum) 

420 table = Table( 

421 table_enum.table_name(self._prefix), 

422 self._metadata, 

423 *columns, 

424 *constraints, 

425 mysql_engine=mysql_engine, 

426 ) 

427 tables[table_enum] = table 

428 

429 return tables 

430 

431 def _make_extra_tables( 

432 self, apdb_tables: Mapping[ApdbTables, Table], mysql_engine: str = "InnoDB" 

433 ) -> Mapping[ExtraTables, Table]: 

434 """Generate schema for insert ID tables.""" 

435 tables: dict[ExtraTables, Table] = {} 

436 if not self._use_insert_id: 

437 return tables 

438 

439 # Parent table needs to be defined first 

440 column_defs = [ 

441 Column("insert_id", GUID, primary_key=True), 

442 Column("insert_time", sqlalchemy.types.TIMESTAMP, nullable=False), 

443 ] 

444 parent_table = Table( 

445 ExtraTables.DiaInsertId.table_name(self._prefix), 

446 self._metadata, 

447 *column_defs, 

448 mysql_engine=mysql_engine, 

449 ) 

450 tables[ExtraTables.DiaInsertId] = parent_table 

451 

452 for table_enum, apdb_enum in ExtraTables.insert_id_tables().items(): 

453 apdb_table = apdb_tables[apdb_enum] 

454 columns = self._insertIdColumns(table_enum) 

455 constraints = self._insertIdIndices(table_enum, apdb_table, parent_table) 

456 table = Table( 

457 table_enum.table_name(self._prefix), 

458 self._metadata, 

459 *columns, 

460 *constraints, 

461 mysql_engine=mysql_engine, 

462 ) 

463 tables[table_enum] = table 

464 

465 return tables 

466 

467 def _tableColumns(self, table_name: ApdbTables) -> list[Column]: 

468 """Return set of columns in a table 

469 

470 Parameters 

471 ---------- 

472 table_name : `ApdbTables` 

473 Name of the table. 

474 

475 Returns 

476 ------- 

477 column_defs : `list` 

478 List of `Column` objects. 

479 """ 

480 # get the list of columns in primary key, they are treated somewhat 

481 # specially below 

482 table_schema = self.tableSchemas[table_name] 

483 

484 # convert all column dicts into alchemy Columns 

485 column_defs: list[Column] = [] 

486 for column in table_schema.columns: 

487 kwargs: dict[str, Any] = dict(nullable=column.nullable) 

488 if column.value is not None: 

489 kwargs.update(server_default=str(column.value)) 

490 if column in table_schema.primary_key: 

491 kwargs.update(autoincrement=False) 

492 ctype = self._type_map[column.datatype] 

493 column_defs.append(Column(column.name, ctype, **kwargs)) 

494 

495 return column_defs 

496 

497 def _tableIndices(self, table_name: ApdbTables) -> list[sqlalchemy.schema.SchemaItem]: 

498 """Return set of constraints/indices in a table 

499 

500 Parameters 

501 ---------- 

502 table_name : `ApdbTables` 

503 Name of the table. 

504 info : `dict` 

505 Additional options passed to SQLAlchemy index constructor. 

506 

507 Returns 

508 ------- 

509 index_defs : `list` 

510 List of SQLAlchemy index/constraint objects. 

511 """ 

512 table_schema = self.tableSchemas[table_name] 

513 

514 # convert all index dicts into alchemy Columns 

515 index_defs: list[sqlalchemy.schema.SchemaItem] = [] 

516 if table_schema.primary_key: 

517 index_defs.append(PrimaryKeyConstraint(*[column.name for column in table_schema.primary_key])) 

518 for index in table_schema.indexes: 

519 name = self._prefix + index.name if index.name else "" 

520 index_defs.append(Index(name, *[column.name for column in index.columns])) 

521 for constraint in table_schema.constraints: 

522 constr_name: str | None = None 

523 if constraint.name: 

524 constr_name = self._prefix + constraint.name 

525 if isinstance(constraint, simple.UniqueConstraint): 

526 index_defs.append( 

527 UniqueConstraint(*[column.name for column in constraint.columns], name=constr_name) 

528 ) 

529 

530 return index_defs 

531 

532 def _insertIdColumns(self, table_enum: ExtraTables) -> list[Column]: 

533 """Return list of columns for insert ID tables.""" 

534 column_defs: list[Column] = [Column("insert_id", GUID, nullable=False)] 

535 insert_id_tables = ExtraTables.insert_id_tables() 

536 if table_enum in insert_id_tables: 

537 column_defs += self._tablePkColumns(insert_id_tables[table_enum]) 

538 else: 

539 assert False, "Above branches have to cover all enum values" 

540 return column_defs 

541 

542 def _tablePkColumns(self, table_enum: ApdbTables) -> list[Column]: 

543 """Return a list of columns for table PK.""" 

544 table_schema = self.tableSchemas[table_enum] 

545 column_defs: list[Column] = [] 

546 for column in table_schema.primary_key: 

547 ctype = self._type_map[column.datatype] 

548 column_defs.append(Column(column.name, ctype, nullable=False, autoincrement=False)) 

549 return column_defs 

550 

551 def _insertIdIndices( 

552 self, 

553 table_enum: ExtraTables, 

554 apdb_table: sqlalchemy.schema.Table, 

555 parent_table: sqlalchemy.schema.Table, 

556 ) -> list[sqlalchemy.schema.SchemaItem]: 

557 """Return set of constraints/indices for insert ID tables.""" 

558 index_defs: list[sqlalchemy.schema.SchemaItem] = [] 

559 

560 # Special case for insert ID tables that are not in felis schema. 

561 insert_id_tables = ExtraTables.insert_id_tables() 

562 if table_enum in insert_id_tables: 

563 # PK is the same as for original table 

564 pk_names = [column.name for column in self._tablePkColumns(insert_id_tables[table_enum])] 

565 index_defs.append(PrimaryKeyConstraint(*pk_names)) 

566 # Non-unique index on insert_id column. 

567 name = self._prefix + table_enum.name + "_idx" 

568 index_defs.append(Index(name, "insert_id")) 

569 # Foreign key to original table 

570 pk_columns = [apdb_table.columns[column] for column in pk_names] 

571 index_defs.append( 

572 ForeignKeyConstraint(pk_names, pk_columns, onupdate="CASCADE", ondelete="CASCADE") 

573 ) 

574 # Foreign key to parent table 

575 index_defs.append( 

576 ForeignKeyConstraint( 

577 ["insert_id"], [parent_table.columns["insert_id"]], onupdate="CASCADE", ondelete="CASCADE" 

578 ) 

579 ) 

580 else: 

581 assert False, "Above branches have to cover all enum values" 

582 return index_defs 

583 

584 @classmethod 

585 def _getDoubleType(cls, engine: sqlalchemy.engine.Engine) -> type | sqlalchemy.types.TypeEngine: 

586 """DOUBLE type is database-specific, select one based on dialect. 

587 

588 Parameters 

589 ---------- 

590 engine : `sqlalchemy.engine.Engine` 

591 Database engine. 

592 

593 Returns 

594 ------- 

595 type_object : `object` 

596 Database-specific type definition. 

597 """ 

598 if engine.name == "mysql": 

599 from sqlalchemy.dialects.mysql import DOUBLE 

600 

601 return DOUBLE(asdecimal=False) 

602 elif engine.name == "postgresql": 

603 from sqlalchemy.dialects.postgresql import DOUBLE_PRECISION 

604 

605 return DOUBLE_PRECISION 

606 elif engine.name == "oracle": 

607 from sqlalchemy.dialects.oracle import DOUBLE_PRECISION 

608 

609 return DOUBLE_PRECISION 

610 elif engine.name == "sqlite": 

611 # all floats in sqlite are 8-byte 

612 from sqlalchemy.dialects.sqlite import REAL 

613 

614 return REAL 

615 else: 

616 raise TypeError("cannot determine DOUBLE type, unexpected dialect: " + engine.name)