Coverage for python/lsst/daf/butler/registry/interfaces/_database.py: 22%

412 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-10-25 15:14 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = [ 

24 "Database", 

25 "ReadOnlyDatabaseError", 

26 "DatabaseConflictError", 

27 "DatabaseInsertMode", 

28 "SchemaAlreadyDefinedError", 

29 "StaticTablesContext", 

30] 

31 

32import enum 

33import uuid 

34import warnings 

35from abc import ABC, abstractmethod 

36from collections import defaultdict 

37from collections.abc import Callable, Iterable, Iterator, Sequence 

38from contextlib import contextmanager 

39from typing import Any, cast, final 

40 

41import astropy.time 

42import sqlalchemy 

43 

44from ...core import TimespanDatabaseRepresentation, ddl, time_utils 

45from ...core.named import NamedValueAbstractSet 

46from .._exceptions import ConflictingDefinitionError 

47 

48 

49class DatabaseInsertMode(enum.Enum): 

50 """Mode options available for inserting database records.""" 

51 

52 INSERT = enum.auto() 

53 """Insert records, failing if they already exist.""" 

54 

55 REPLACE = enum.auto() 

56 """Replace records, overwriting existing.""" 

57 

58 ENSURE = enum.auto() 

59 """Insert records, skipping any that already exist.""" 

60 

61 

62# TODO: method is called with list[ReflectedColumn] in SA 2, and 

63# ReflectedColumn does not exist in 1.4. 

64def _checkExistingTableDefinition(name: str, spec: ddl.TableSpec, inspection: list) -> None: 

65 """Test that the definition of a table in a `ddl.TableSpec` and from 

66 database introspection are consistent. 

67 

68 Parameters 

69 ---------- 

70 name : `str` 

71 Name of the table (only used in error messages). 

72 spec : `ddl.TableSpec` 

73 Specification of the table. 

74 inspection : `dict` 

75 Dictionary returned by 

76 `sqlalchemy.engine.reflection.Inspector.get_columns`. 

77 

78 Raises 

79 ------ 

80 DatabaseConflictError 

81 Raised if the definitions are inconsistent. 

82 """ 

83 columnNames = [c["name"] for c in inspection] 

84 if spec.fields.names != set(columnNames): 

85 raise DatabaseConflictError( 

86 f"Table '{name}' exists but is defined differently in the database; " 

87 f"specification has columns {list(spec.fields.names)}, while the " 

88 f"table in the database has {columnNames}." 

89 ) 

90 

91 

92class ReadOnlyDatabaseError(RuntimeError): 

93 """Exception raised when a write operation is called on a read-only 

94 `Database`. 

95 """ 

96 

97 

98class DatabaseConflictError(ConflictingDefinitionError): 

99 """Exception raised when database content (row values or schema entities) 

100 are inconsistent with what this client expects. 

101 """ 

102 

103 

104class SchemaAlreadyDefinedError(RuntimeError): 

105 """Exception raised when trying to initialize database schema when some 

106 tables already exist. 

107 """ 

108 

109 

110class StaticTablesContext: 

111 """Helper class used to declare the static schema for a registry layer 

112 in a database. 

113 

114 An instance of this class is returned by `Database.declareStaticTables`, 

115 which should be the only way it should be constructed. 

116 """ 

117 

118 def __init__(self, db: Database, connection: sqlalchemy.engine.Connection): 

119 self._db = db 

120 self._foreignKeys: list[tuple[sqlalchemy.schema.Table, sqlalchemy.schema.ForeignKeyConstraint]] = [] 

121 self._inspector = sqlalchemy.inspect(connection) 

122 self._tableNames = frozenset(self._inspector.get_table_names(schema=self._db.namespace)) 

123 self._initializers: list[Callable[[Database], None]] = [] 

124 

125 def addTable(self, name: str, spec: ddl.TableSpec) -> sqlalchemy.schema.Table: 

126 """Add a new table to the schema, returning its sqlalchemy 

127 representation. 

128 

129 The new table may not actually be created until the end of the 

130 context created by `Database.declareStaticTables`, allowing tables 

131 to be declared in any order even in the presence of foreign key 

132 relationships. 

133 """ 

134 name = self._db._mangleTableName(name) 

135 if name in self._tableNames: 

136 _checkExistingTableDefinition( 

137 name, spec, self._inspector.get_columns(name, schema=self._db.namespace) 

138 ) 

139 metadata = self._db._metadata 

140 assert metadata is not None, "Guaranteed by context manager that returns this object." 

141 table = self._db._convertTableSpec(name, spec, metadata) 

142 for foreignKeySpec in spec.foreignKeys: 

143 self._foreignKeys.append((table, self._db._convertForeignKeySpec(name, foreignKeySpec, metadata))) 

144 return table 

145 

146 def addTableTuple(self, specs: tuple[ddl.TableSpec, ...]) -> tuple[sqlalchemy.schema.Table, ...]: 

147 """Add a named tuple of tables to the schema, returning their 

148 SQLAlchemy representations in a named tuple of the same type. 

149 

150 The new tables may not actually be created until the end of the 

151 context created by `Database.declareStaticTables`, allowing tables 

152 to be declared in any order even in the presence of foreign key 

153 relationships. 

154 

155 Notes 

156 ----- 

157 ``specs`` *must* be an instance of a type created by 

158 `collections.namedtuple`, not just regular tuple, and the returned 

159 object is guaranteed to be the same. Because `~collections.namedtuple` 

160 is just a factory for `type` objects, not an actual type itself, 

161 we cannot represent this with type annotations. 

162 """ 

163 return specs._make( # type: ignore 

164 self.addTable(name, spec) for name, spec in zip(specs._fields, specs, strict=True) # type: ignore 

165 ) 

166 

167 def addInitializer(self, initializer: Callable[[Database], None]) -> None: 

168 """Add a method that does one-time initialization of a database. 

169 

170 Initialization can mean anything that changes state of a database 

171 and needs to be done exactly once after database schema was created. 

172 An example for that could be population of schema attributes. 

173 

174 Parameters 

175 ---------- 

176 initializer : callable 

177 Method of a single argument which is a `Database` instance. 

178 """ 

179 self._initializers.append(initializer) 

180 

181 

182class Database(ABC): 

183 """An abstract interface that represents a particular database engine's 

184 representation of a single schema/namespace/database. 

185 

186 Parameters 

187 ---------- 

188 origin : `int` 

189 An integer ID that should be used as the default for any datasets, 

190 quanta, or other entities that use a (autoincrement, origin) compound 

191 primary key. 

192 engine : `sqlalchemy.engine.Engine` 

193 The SQLAlchemy engine for this `Database`. 

194 namespace : `str`, optional 

195 Name of the schema or namespace this instance is associated with. 

196 This is passed as the ``schema`` argument when constructing a 

197 `sqlalchemy.schema.MetaData` instance. We use ``namespace`` instead to 

198 avoid confusion between "schema means namespace" and "schema means 

199 table definitions". 

200 

201 Notes 

202 ----- 

203 `Database` requires all write operations to go through its special named 

204 methods. Our write patterns are sufficiently simple that we don't really 

205 need the full flexibility of SQL insert/update/delete syntax, and we need 

206 non-standard (but common) functionality in these operations sufficiently 

207 often that it seems worthwhile to provide our own generic API. 

208 

209 In contrast, `Database.query` allows arbitrary ``SELECT`` queries (via 

210 their SQLAlchemy representation) to be run, as we expect these to require 

211 significantly more sophistication while still being limited to standard 

212 SQL. 

213 

214 `Database` itself has several underscore-prefixed attributes: 

215 

216 - ``_engine``: SQLAlchemy object representing its engine. 

217 - ``_connection``: method returning a context manager for 

218 `sqlalchemy.engine.Connection` object. 

219 - ``_metadata``: the `sqlalchemy.schema.MetaData` object representing 

220 the tables and other schema entities. 

221 

222 These are considered protected (derived classes may access them, but other 

223 code should not), and read-only, aside from executing SQL via 

224 ``_connection``. 

225 """ 

226 

227 def __init__(self, *, origin: int, engine: sqlalchemy.engine.Engine, namespace: str | None = None): 

228 self.origin = origin 

229 self.namespace = namespace 

230 self._engine = engine 

231 self._session_connection: sqlalchemy.engine.Connection | None = None 

232 self._metadata: sqlalchemy.schema.MetaData | None = None 

233 self._temp_tables: set[str] = set() 

234 

235 def __repr__(self) -> str: 

236 # Rather than try to reproduce all the parameters used to create 

237 # the object, instead report the more useful information of the 

238 # connection URL. 

239 if self._engine.url.password is not None: 

240 uri = str(self._engine.url.set(password="***")) 

241 else: 

242 uri = str(self._engine.url) 

243 if self.namespace: 

244 uri += f"#{self.namespace}" 

245 return f'{type(self).__name__}("{uri}")' 

246 

247 @classmethod 

248 def makeDefaultUri(cls, root: str) -> str | None: 

249 """Create a default connection URI appropriate for the given root 

250 directory, or `None` if there can be no such default. 

251 """ 

252 return None 

253 

254 @classmethod 

255 def fromUri( 

256 cls, 

257 uri: str | sqlalchemy.engine.URL, 

258 *, 

259 origin: int, 

260 namespace: str | None = None, 

261 writeable: bool = True, 

262 ) -> Database: 

263 """Construct a database from a SQLAlchemy URI. 

264 

265 Parameters 

266 ---------- 

267 uri : `str` or `sqlalchemy.engine.URL` 

268 A SQLAlchemy URI connection string. 

269 origin : `int` 

270 An integer ID that should be used as the default for any datasets, 

271 quanta, or other entities that use a (autoincrement, origin) 

272 compound primary key. 

273 namespace : `str`, optional 

274 A database namespace (i.e. schema) the new instance should be 

275 associated with. If `None` (default), the namespace (if any) is 

276 inferred from the URI. 

277 writeable : `bool`, optional 

278 If `True`, allow write operations on the database, including 

279 ``CREATE TABLE``. 

280 

281 Returns 

282 ------- 

283 db : `Database` 

284 A new `Database` instance. 

285 """ 

286 return cls.fromEngine( 

287 cls.makeEngine(uri, writeable=writeable), origin=origin, namespace=namespace, writeable=writeable 

288 ) 

289 

290 @classmethod 

291 @abstractmethod 

292 def makeEngine( 

293 cls, uri: str | sqlalchemy.engine.URL, *, writeable: bool = True 

294 ) -> sqlalchemy.engine.Engine: 

295 """Create a `sqlalchemy.engine.Engine` from a SQLAlchemy URI. 

296 

297 Parameters 

298 ---------- 

299 uri : `str` or `sqlalchemy.engine.URL` 

300 A SQLAlchemy URI connection string. 

301 writeable : `bool`, optional 

302 If `True`, allow write operations on the database, including 

303 ``CREATE TABLE``. 

304 

305 Returns 

306 ------- 

307 engine : `sqlalchemy.engine.Engine` 

308 A database engine. 

309 

310 Notes 

311 ----- 

312 Subclasses that support other ways to connect to a database are 

313 encouraged to add optional arguments to their implementation of this 

314 method, as long as they maintain compatibility with the base class 

315 call signature. 

316 """ 

317 raise NotImplementedError() 

318 

319 @classmethod 

320 @abstractmethod 

321 def fromEngine( 

322 cls, 

323 engine: sqlalchemy.engine.Engine, 

324 *, 

325 origin: int, 

326 namespace: str | None = None, 

327 writeable: bool = True, 

328 ) -> Database: 

329 """Create a new `Database` from an existing `sqlalchemy.engine.Engine`. 

330 

331 Parameters 

332 ---------- 

333 engine : `sqlalchemy.engine.Engine` 

334 The engine for the database. May be shared between `Database` 

335 instances. 

336 origin : `int` 

337 An integer ID that should be used as the default for any datasets, 

338 quanta, or other entities that use a (autoincrement, origin) 

339 compound primary key. 

340 namespace : `str`, optional 

341 A different database namespace (i.e. schema) the new instance 

342 should be associated with. If `None` (default), the namespace 

343 (if any) is inferred from the connection. 

344 writeable : `bool`, optional 

345 If `True`, allow write operations on the database, including 

346 ``CREATE TABLE``. 

347 

348 Returns 

349 ------- 

350 db : `Database` 

351 A new `Database` instance. 

352 

353 Notes 

354 ----- 

355 This method allows different `Database` instances to share the same 

356 engine, which is desirable when they represent different namespaces 

357 can be queried together. 

358 """ 

359 raise NotImplementedError() 

360 

361 @final 

362 @contextmanager 

363 def session(self) -> Iterator[None]: 

364 """Return a context manager that represents a session (persistent 

365 connection to a database). 

366 

367 Returns 

368 ------- 

369 context : `AbstractContextManager` [ `None` ] 

370 A context manager that does not return a value when entered. 

371 

372 Notes 

373 ----- 

374 This method should be used when a sequence of read-only SQL operations 

375 will be performed in rapid succession *without* a requirement that they 

376 yield consistent results in the presence of concurrent writes (or, more 

377 rarely, when conflicting concurrent writes are rare/impossible and the 

378 session will be open long enough that a transaction is inadvisable). 

379 """ 

380 with self._session(): 

381 yield 

382 

383 @final 

384 @contextmanager 

385 def transaction( 

386 self, 

387 *, 

388 interrupting: bool = False, 

389 savepoint: bool = False, 

390 lock: Iterable[sqlalchemy.schema.Table] = (), 

391 for_temp_tables: bool = False, 

392 ) -> Iterator[None]: 

393 """Return a context manager that represents a transaction. 

394 

395 Parameters 

396 ---------- 

397 interrupting : `bool`, optional 

398 If `True` (`False` is default), this transaction block may not be 

399 nested without an outer one, and attempting to do so is a logic 

400 (i.e. assertion) error. 

401 savepoint : `bool`, optional 

402 If `True` (`False` is default), create a `SAVEPOINT`, allowing 

403 exceptions raised by the database (e.g. due to constraint 

404 violations) during this transaction's context to be caught outside 

405 it without also rolling back all operations in an outer transaction 

406 block. If `False`, transactions may still be nested, but a 

407 rollback may be generated at any level and affects all levels, and 

408 commits are deferred until the outermost block completes. If any 

409 outer transaction block was created with ``savepoint=True``, all 

410 inner blocks will be as well (regardless of the actual value 

411 passed). This has no effect if this is the outermost transaction. 

412 lock : `~collections.abc.Iterable` [ `sqlalchemy.schema.Table` ], \ 

413 optional 

414 A list of tables to lock for the duration of this transaction. 

415 These locks are guaranteed to prevent concurrent writes and allow 

416 this transaction (only) to acquire the same locks (others should 

417 block), but only prevent concurrent reads if the database engine 

418 requires that in order to block concurrent writes. 

419 for_temp_tables : `bool`, optional 

420 If `True`, this transaction may involve creating temporary tables. 

421 

422 Returns 

423 ------- 

424 context : `AbstractContextManager` [ `None` ] 

425 A context manager that commits the transaction when it is exited 

426 without error and rolls back the transactoin when it is exited via 

427 an exception. 

428 

429 Notes 

430 ----- 

431 All transactions on a connection managed by one or more `Database` 

432 instances _must_ go through this method, or transaction state will not 

433 be correctly managed. 

434 """ 

435 with self._transaction( 

436 interrupting=interrupting, savepoint=savepoint, lock=lock, for_temp_tables=for_temp_tables 

437 ): 

438 yield 

439 

440 @contextmanager 

441 def temporary_table( 

442 self, spec: ddl.TableSpec, name: str | None = None 

443 ) -> Iterator[sqlalchemy.schema.Table]: 

444 """Return a context manager that creates and then drops a temporary 

445 table. 

446 

447 Parameters 

448 ---------- 

449 spec : `ddl.TableSpec` 

450 Specification for the columns. Unique and foreign key constraints 

451 may be ignored. 

452 name : `str`, optional 

453 If provided, the name of the SQL construct. If not provided, an 

454 opaque but unique identifier is generated. 

455 

456 Returns 

457 ------- 

458 context : `AbstractContextManager` [ `sqlalchemy.schema.Table` ] 

459 A context manager that returns a SQLAlchemy representation of the 

460 temporary table when entered. 

461 

462 Notes 

463 ----- 

464 Temporary tables may be created, dropped, and written to even in 

465 read-only databases - at least according to the Python-level 

466 protections in the `Database` classes. Server permissions may say 

467 otherwise, but in that case they probably need to be modified to 

468 support the full range of expected read-only butler behavior. 

469 """ 

470 with self._session() as connection: 

471 table = self._make_temporary_table(connection, spec=spec, name=name) 

472 self._temp_tables.add(table.key) 

473 try: 

474 yield table 

475 finally: 

476 with self._transaction(): 

477 table.drop(connection) 

478 self._temp_tables.remove(table.key) 

479 

480 @contextmanager 

481 def _session(self) -> Iterator[sqlalchemy.engine.Connection]: 

482 """Protected implementation for `session` that actually returns the 

483 connection. 

484 

485 This method is for internal `Database` calls that need the actual 

486 SQLAlchemy connection object. It should be overridden by subclasses 

487 instead of `session` itself. 

488 

489 Returns 

490 ------- 

491 context : `AbstractContextManager` [ `sqlalchemy.engine.Connection` ] 

492 A context manager that returns a SQLALchemy connection when 

493 entered. 

494 

495 """ 

496 if self._session_connection is not None: 

497 # session already started, just reuse that 

498 yield self._session_connection 

499 else: 

500 try: 

501 # open new connection and close it when done 

502 self._session_connection = self._engine.connect() 

503 yield self._session_connection 

504 finally: 

505 if self._session_connection is not None: 

506 self._session_connection.close() 

507 self._session_connection = None 

508 # Temporary tables only live within session 

509 self._temp_tables = set() 

510 

511 @contextmanager 

512 def _transaction( 

513 self, 

514 *, 

515 interrupting: bool = False, 

516 savepoint: bool = False, 

517 lock: Iterable[sqlalchemy.schema.Table] = (), 

518 for_temp_tables: bool = False, 

519 ) -> Iterator[tuple[bool, sqlalchemy.engine.Connection]]: 

520 """Protected implementation for `transaction` that actually returns the 

521 connection and whether this is a new outermost transaction. 

522 

523 This method is for internal `Database` calls that need the actual 

524 SQLAlchemy connection object. It should be overridden by subclasses 

525 instead of `transaction` itself. 

526 

527 Parameters 

528 ---------- 

529 interrupting : `bool`, optional 

530 If `True` (`False` is default), this transaction block may not be 

531 nested without an outer one, and attempting to do so is a logic 

532 (i.e. assertion) error. 

533 savepoint : `bool`, optional 

534 If `True` (`False` is default), create a `SAVEPOINT`, allowing 

535 exceptions raised by the database (e.g. due to constraint 

536 violations) during this transaction's context to be caught outside 

537 it without also rolling back all operations in an outer transaction 

538 block. If `False`, transactions may still be nested, but a 

539 rollback may be generated at any level and affects all levels, and 

540 commits are deferred until the outermost block completes. If any 

541 outer transaction block was created with ``savepoint=True``, all 

542 inner blocks will be as well (regardless of the actual value 

543 passed). This has no effect if this is the outermost transaction. 

544 lock : `~collections.abc.Iterable` [ `sqlalchemy.schema.Table` ], \ 

545 optional 

546 A list of tables to lock for the duration of this transaction. 

547 These locks are guaranteed to prevent concurrent writes and allow 

548 this transaction (only) to acquire the same locks (others should 

549 block), but only prevent concurrent reads if the database engine 

550 requires that in order to block concurrent writes. 

551 for_temp_tables : `bool`, optional 

552 If `True`, this transaction may involve creating temporary tables. 

553 

554 Returns 

555 ------- 

556 context : `AbstractContextManager` [ `tuple` [ `bool`, 

557 `sqlalchemy.engine.Connection` ] ] 

558 A context manager that commits the transaction when it is exited 

559 without error and rolls back the transactoin when it is exited via 

560 an exception. When entered, it returns a tuple of: 

561 

562 - ``is_new`` (`bool`): whether this is a new (outermost) 

563 transaction; 

564 - ``connection`` (`sqlalchemy.engine.Connection`): the connection. 

565 """ 

566 with self._session() as connection: 

567 already_in_transaction = connection.in_transaction() 

568 assert not (interrupting and already_in_transaction), ( 

569 "Logic error in transaction nesting: an operation that would " 

570 "interrupt the active transaction context has been requested." 

571 ) 

572 savepoint = savepoint or connection.in_nested_transaction() 

573 trans: sqlalchemy.engine.Transaction | None 

574 if already_in_transaction: 

575 if savepoint: 

576 trans = connection.begin_nested() 

577 else: 

578 # Nested non-savepoint transactions don't do anything. 

579 trans = None 

580 else: 

581 # Use a regular (non-savepoint) transaction always for the 

582 # outermost context. 

583 trans = connection.begin() 

584 self._lockTables(connection, lock) 

585 try: 

586 yield not already_in_transaction, connection 

587 if trans is not None: 

588 trans.commit() 

589 except BaseException: 

590 if trans is not None: 

591 trans.rollback() 

592 raise 

593 

594 @abstractmethod 

595 def _lockTables( 

596 self, connection: sqlalchemy.engine.Connection, tables: Iterable[sqlalchemy.schema.Table] = () 

597 ) -> None: 

598 """Acquire locks on the given tables. 

599 

600 This is an implementation hook for subclasses, called by `transaction`. 

601 It should not be called directly by other code. 

602 

603 Parameters 

604 ---------- 

605 connection : `sqlalchemy.engine.Connection` 

606 Database connection object. It is guaranteed that transaction is 

607 already in a progress for this connection. 

608 tables : `~collections.abc.Iterable` [ `sqlalchemy.schema.Table` ], \ 

609 optional 

610 A list of tables to lock for the duration of this transaction. 

611 These locks are guaranteed to prevent concurrent writes and allow 

612 this transaction (only) to acquire the same locks (others should 

613 block), but only prevent concurrent reads if the database engine 

614 requires that in order to block concurrent writes. 

615 """ 

616 raise NotImplementedError() 

617 

618 def isTableWriteable(self, table: sqlalchemy.schema.Table) -> bool: 

619 """Check whether a table is writeable, either because the database 

620 connection is read-write or the table is a temporary table. 

621 

622 Parameters 

623 ---------- 

624 table : `sqlalchemy.schema.Table` 

625 SQLAlchemy table object to check. 

626 

627 Returns 

628 ------- 

629 writeable : `bool` 

630 Whether this table is writeable. 

631 """ 

632 return self.isWriteable() or table.key in self._temp_tables 

633 

634 def assertTableWriteable(self, table: sqlalchemy.schema.Table, msg: str) -> None: 

635 """Raise if the given table is not writeable, either because the 

636 database connection is read-write or the table is a temporary table. 

637 

638 Parameters 

639 ---------- 

640 table : `sqlalchemy.schema.Table` 

641 SQLAlchemy table object to check. 

642 msg : `str`, optional 

643 If provided, raise `ReadOnlyDatabaseError` instead of returning 

644 `False`, with this message. 

645 """ 

646 if not self.isTableWriteable(table): 

647 raise ReadOnlyDatabaseError(msg) 

648 

649 @contextmanager 

650 def declareStaticTables(self, *, create: bool) -> Iterator[StaticTablesContext]: 

651 """Return a context manager in which the database's static DDL schema 

652 can be declared. 

653 

654 Parameters 

655 ---------- 

656 create : `bool` 

657 If `True`, attempt to create all tables at the end of the context. 

658 If `False`, they will be assumed to already exist. 

659 

660 Returns 

661 ------- 

662 schema : `StaticTablesContext` 

663 A helper object that is used to add new tables. 

664 

665 Raises 

666 ------ 

667 ReadOnlyDatabaseError 

668 Raised if ``create`` is `True`, `Database.isWriteable` is `False`, 

669 and one or more declared tables do not already exist. 

670 

671 Examples 

672 -------- 

673 Given a `Database` instance ``db``:: 

674 

675 with db.declareStaticTables(create=True) as schema: 

676 schema.addTable("table1", TableSpec(...)) 

677 schema.addTable("table2", TableSpec(...)) 

678 

679 Notes 

680 ----- 

681 A database's static DDL schema must be declared before any dynamic 

682 tables are managed via calls to `ensureTableExists` or 

683 `getExistingTable`. The order in which static schema tables are added 

684 inside the context block is unimportant; they will automatically be 

685 sorted and added in an order consistent with their foreign key 

686 relationships. 

687 """ 

688 if create and not self.isWriteable(): 

689 raise ReadOnlyDatabaseError(f"Cannot create tables in read-only database {self}.") 

690 self._metadata = sqlalchemy.MetaData(schema=self.namespace) 

691 try: 

692 with self._transaction() as (_, connection): 

693 context = StaticTablesContext(self, connection) 

694 if create and context._tableNames: 

695 # Looks like database is already initalized, to avoid 

696 # danger of modifying/destroying valid schema we refuse to 

697 # do anything in this case 

698 raise SchemaAlreadyDefinedError(f"Cannot create tables in non-empty database {self}.") 

699 yield context 

700 for table, foreignKey in context._foreignKeys: 

701 table.append_constraint(foreignKey) 

702 if create: 

703 if ( 

704 self.namespace is not None 

705 and self.namespace not in context._inspector.get_schema_names() 

706 ): 

707 connection.execute(sqlalchemy.schema.CreateSchema(self.namespace)) 

708 # In our tables we have columns that make use of sqlalchemy 

709 # Sequence objects. There is currently a bug in sqlalchemy 

710 # that causes a deprecation warning to be thrown on a 

711 # property of the Sequence object when the repr for the 

712 # sequence is created. Here a filter is used to catch these 

713 # deprecation warnings when tables are created. 

714 with warnings.catch_warnings(): 

715 warnings.simplefilter("ignore", category=sqlalchemy.exc.SADeprecationWarning) 

716 self._metadata.create_all(connection) 

717 # call all initializer methods sequentially 

718 for init in context._initializers: 

719 init(self) 

720 except BaseException: 

721 self._metadata = None 

722 raise 

723 

724 @abstractmethod 

725 def isWriteable(self) -> bool: 

726 """Return `True` if this database can be modified by this client.""" 

727 raise NotImplementedError() 

728 

729 @abstractmethod 

730 def __str__(self) -> str: 

731 """Return a human-readable identifier for this `Database`, including 

732 any namespace or schema that identifies its names within a `Registry`. 

733 """ 

734 raise NotImplementedError() 

735 

736 @property 

737 def dialect(self) -> sqlalchemy.engine.Dialect: 

738 """The SQLAlchemy dialect for this database engine 

739 (`sqlalchemy.engine.Dialect`). 

740 """ 

741 return self._engine.dialect 

742 

743 def shrinkDatabaseEntityName(self, original: str) -> str: 

744 """Return a version of the given name that fits within this database 

745 engine's length limits for table, constraint, indexes, and sequence 

746 names. 

747 

748 Implementations should not assume that simple truncation is safe, 

749 because multiple long names often begin with the same prefix. 

750 

751 The default implementation simply returns the given name. 

752 

753 Parameters 

754 ---------- 

755 original : `str` 

756 The original name. 

757 

758 Returns 

759 ------- 

760 shrunk : `str` 

761 The new, possibly shortened name. 

762 """ 

763 return original 

764 

765 def expandDatabaseEntityName(self, shrunk: str) -> str: 

766 """Retrieve the original name for a database entity that was too long 

767 to fit within the database engine's limits. 

768 

769 Parameters 

770 ---------- 

771 original : `str` 

772 The original name. 

773 

774 Returns 

775 ------- 

776 shrunk : `str` 

777 The new, possibly shortened name. 

778 """ 

779 return shrunk 

780 

781 def _mangleTableName(self, name: str) -> str: 

782 """Map a logical, user-visible table name to the true table name used 

783 in the database. 

784 

785 The default implementation returns the given name unchanged. 

786 

787 Parameters 

788 ---------- 

789 name : `str` 

790 Input table name. Should not include a namespace (i.e. schema) 

791 prefix. 

792 

793 Returns 

794 ------- 

795 mangled : `str` 

796 Mangled version of the table name (still with no namespace prefix). 

797 

798 Notes 

799 ----- 

800 Reimplementations of this method must be idempotent - mangling an 

801 already-mangled name must have no effect. 

802 """ 

803 return name 

804 

805 def _makeColumnConstraints(self, table: str, spec: ddl.FieldSpec) -> list[sqlalchemy.CheckConstraint]: 

806 """Create constraints based on this spec. 

807 

808 Parameters 

809 ---------- 

810 table : `str` 

811 Name of the table this column is being added to. 

812 spec : `FieldSpec` 

813 Specification for the field to be added. 

814 

815 Returns 

816 ------- 

817 constraint : `list` of `sqlalchemy.CheckConstraint` 

818 Constraint added for this column. 

819 """ 

820 # By default we return no additional constraints 

821 return [] 

822 

823 def _convertFieldSpec( 

824 self, table: str, spec: ddl.FieldSpec, metadata: sqlalchemy.MetaData, **kwargs: Any 

825 ) -> sqlalchemy.schema.Column: 

826 """Convert a `FieldSpec` to a `sqlalchemy.schema.Column`. 

827 

828 Parameters 

829 ---------- 

830 table : `str` 

831 Name of the table this column is being added to. 

832 spec : `FieldSpec` 

833 Specification for the field to be added. 

834 metadata : `sqlalchemy.MetaData` 

835 SQLAlchemy representation of the DDL schema this field's table is 

836 being added to. 

837 **kwargs 

838 Additional keyword arguments to forward to the 

839 `sqlalchemy.schema.Column` constructor. This is provided to make 

840 it easier for derived classes to delegate to ``super()`` while 

841 making only minor changes. 

842 

843 Returns 

844 ------- 

845 column : `sqlalchemy.schema.Column` 

846 SQLAlchemy representation of the field. 

847 """ 

848 args = [] 

849 if spec.autoincrement: 

850 # Generate a sequence to use for auto incrementing for databases 

851 # that do not support it natively. This will be ignored by 

852 # sqlalchemy for databases that do support it. 

853 args.append( 

854 sqlalchemy.Sequence( 

855 self.shrinkDatabaseEntityName(f"{table}_seq_{spec.name}"), metadata=metadata 

856 ) 

857 ) 

858 assert spec.doc is None or isinstance(spec.doc, str), f"Bad doc for {table}.{spec.name}." 

859 return sqlalchemy.schema.Column( 

860 spec.name, 

861 spec.getSizedColumnType(), 

862 *args, 

863 nullable=spec.nullable, 

864 primary_key=spec.primaryKey, 

865 comment=spec.doc, 

866 server_default=spec.default, 

867 **kwargs, 

868 ) 

869 

870 def _convertForeignKeySpec( 

871 self, table: str, spec: ddl.ForeignKeySpec, metadata: sqlalchemy.MetaData, **kwargs: Any 

872 ) -> sqlalchemy.schema.ForeignKeyConstraint: 

873 """Convert a `ForeignKeySpec` to a 

874 `sqlalchemy.schema.ForeignKeyConstraint`. 

875 

876 Parameters 

877 ---------- 

878 table : `str` 

879 Name of the table this foreign key is being added to. 

880 spec : `ForeignKeySpec` 

881 Specification for the foreign key to be added. 

882 metadata : `sqlalchemy.MetaData` 

883 SQLAlchemy representation of the DDL schema this constraint is 

884 being added to. 

885 **kwargs 

886 Additional keyword arguments to forward to the 

887 `sqlalchemy.schema.ForeignKeyConstraint` constructor. This is 

888 provided to make it easier for derived classes to delegate to 

889 ``super()`` while making only minor changes. 

890 

891 Returns 

892 ------- 

893 constraint : `sqlalchemy.schema.ForeignKeyConstraint` 

894 SQLAlchemy representation of the constraint. 

895 """ 

896 name = self.shrinkDatabaseEntityName( 

897 "_".join( 

898 ["fkey", table, self._mangleTableName(spec.table)] + list(spec.target) + list(spec.source) 

899 ) 

900 ) 

901 return sqlalchemy.schema.ForeignKeyConstraint( 

902 spec.source, 

903 [f"{self._mangleTableName(spec.table)}.{col}" for col in spec.target], 

904 name=name, 

905 ondelete=spec.onDelete, 

906 ) 

907 

908 def _convertExclusionConstraintSpec( 

909 self, 

910 table: str, 

911 spec: tuple[str | type[TimespanDatabaseRepresentation], ...], 

912 metadata: sqlalchemy.MetaData, 

913 ) -> sqlalchemy.schema.Constraint: 

914 """Convert a `tuple` from `ddl.TableSpec.exclusion` into a SQLAlchemy 

915 constraint representation. 

916 

917 Parameters 

918 ---------- 

919 table : `str` 

920 Name of the table this constraint is being added to. 

921 spec : `tuple` [ `str` or `type` ] 

922 A tuple of `str` column names and the `type` object returned by 

923 `getTimespanRepresentation` (which must appear exactly once), 

924 indicating the order of the columns in the index used to back the 

925 constraint. 

926 metadata : `sqlalchemy.MetaData` 

927 SQLAlchemy representation of the DDL schema this constraint is 

928 being added to. 

929 

930 Returns 

931 ------- 

932 constraint : `sqlalchemy.schema.Constraint` 

933 SQLAlchemy representation of the constraint. 

934 

935 Raises 

936 ------ 

937 NotImplementedError 

938 Raised if this database does not support exclusion constraints. 

939 """ 

940 raise NotImplementedError(f"Database {self} does not support exclusion constraints.") 

941 

942 def _convertTableSpec( 

943 self, name: str, spec: ddl.TableSpec, metadata: sqlalchemy.MetaData, **kwargs: Any 

944 ) -> sqlalchemy.schema.Table: 

945 """Convert a `TableSpec` to a `sqlalchemy.schema.Table`. 

946 

947 Parameters 

948 ---------- 

949 spec : `TableSpec` 

950 Specification for the foreign key to be added. 

951 metadata : `sqlalchemy.MetaData` 

952 SQLAlchemy representation of the DDL schema this table is being 

953 added to. 

954 **kwargs 

955 Additional keyword arguments to forward to the 

956 `sqlalchemy.schema.Table` constructor. This is provided to make it 

957 easier for derived classes to delegate to ``super()`` while making 

958 only minor changes. 

959 

960 Returns 

961 ------- 

962 table : `sqlalchemy.schema.Table` 

963 SQLAlchemy representation of the table. 

964 

965 Notes 

966 ----- 

967 This method does not handle ``spec.foreignKeys`` at all, in order to 

968 avoid circular dependencies. These are added by higher-level logic in 

969 `ensureTableExists`, `getExistingTable`, and `declareStaticTables`. 

970 """ 

971 name = self._mangleTableName(name) 

972 args: list[sqlalchemy.schema.SchemaItem] = [ 

973 self._convertFieldSpec(name, fieldSpec, metadata) for fieldSpec in spec.fields 

974 ] 

975 

976 # Add any column constraints 

977 for fieldSpec in spec.fields: 

978 args.extend(self._makeColumnConstraints(name, fieldSpec)) 

979 

980 # Track indexes added for primary key and unique constraints, to make 

981 # sure we don't add duplicate explicit or foreign key indexes for 

982 # those. 

983 allIndexes = {tuple(fieldSpec.name for fieldSpec in spec.fields if fieldSpec.primaryKey)} 

984 args.extend( 

985 sqlalchemy.schema.UniqueConstraint( 

986 *columns, name=self.shrinkDatabaseEntityName("_".join([name, "unq"] + list(columns))) 

987 ) 

988 for columns in spec.unique 

989 ) 

990 allIndexes.update(spec.unique) 

991 args.extend( 

992 sqlalchemy.schema.Index( 

993 self.shrinkDatabaseEntityName("_".join([name, "idx"] + list(index.columns))), 

994 *index.columns, 

995 unique=(index.columns in spec.unique), 

996 **index.kwargs, 

997 ) 

998 for index in spec.indexes 

999 if index.columns not in allIndexes 

1000 ) 

1001 allIndexes.update(index.columns for index in spec.indexes) 

1002 args.extend( 

1003 sqlalchemy.schema.Index( 

1004 self.shrinkDatabaseEntityName("_".join((name, "fkidx") + fk.source)), 

1005 *fk.source, 

1006 ) 

1007 for fk in spec.foreignKeys 

1008 if fk.addIndex and fk.source not in allIndexes 

1009 ) 

1010 

1011 args.extend(self._convertExclusionConstraintSpec(name, excl, metadata) for excl in spec.exclusion) 

1012 

1013 assert spec.doc is None or isinstance(spec.doc, str), f"Bad doc for {name}." 

1014 return sqlalchemy.schema.Table(name, metadata, *args, comment=spec.doc, info={"spec": spec}, **kwargs) 

1015 

1016 def ensureTableExists(self, name: str, spec: ddl.TableSpec) -> sqlalchemy.schema.Table: 

1017 """Ensure that a table with the given name and specification exists, 

1018 creating it if necessary. 

1019 

1020 Parameters 

1021 ---------- 

1022 name : `str` 

1023 Name of the table (not including namespace qualifiers). 

1024 spec : `TableSpec` 

1025 Specification for the table. This will be used when creating the 

1026 table, and *may* be used when obtaining an existing table to check 

1027 for consistency, but no such check is guaranteed. 

1028 

1029 Returns 

1030 ------- 

1031 table : `sqlalchemy.schema.Table` 

1032 SQLAlchemy representation of the table. 

1033 

1034 Raises 

1035 ------ 

1036 ReadOnlyDatabaseError 

1037 Raised if `isWriteable` returns `False`, and the table does not 

1038 already exist. 

1039 DatabaseConflictError 

1040 Raised if the table exists but ``spec`` is inconsistent with its 

1041 definition. 

1042 

1043 Notes 

1044 ----- 

1045 This method may not be called within transactions. It may be called on 

1046 read-only databases if and only if the table does in fact already 

1047 exist. 

1048 

1049 Subclasses may override this method, but usually should not need to. 

1050 """ 

1051 # TODO: if _engine is used to make a table then it uses separate 

1052 # connection and should not interfere with current transaction 

1053 assert ( 

1054 self._session_connection is None or not self._session_connection.in_transaction() 

1055 ), "Table creation interrupts transactions." 

1056 assert self._metadata is not None, "Static tables must be declared before dynamic tables." 

1057 table = self.getExistingTable(name, spec) 

1058 if table is not None: 

1059 return table 

1060 if not self.isWriteable(): 

1061 raise ReadOnlyDatabaseError( 

1062 f"Table {name} does not exist, and cannot be created because database {self} is read-only." 

1063 ) 

1064 table = self._convertTableSpec(name, spec, self._metadata) 

1065 for foreignKeySpec in spec.foreignKeys: 

1066 table.append_constraint(self._convertForeignKeySpec(name, foreignKeySpec, self._metadata)) 

1067 try: 

1068 with self._transaction() as (_, connection): 

1069 table.create(connection) 

1070 except sqlalchemy.exc.DatabaseError: 

1071 # Some other process could have created the table meanwhile, which 

1072 # usually causes OperationalError or ProgrammingError. We cannot 

1073 # use IF NOT EXISTS clause in this case due to PostgreSQL race 

1074 # condition on server side which causes IntegrityError. Instead we 

1075 # catch these exceptions (they all inherit DatabaseError) and 

1076 # re-check whether table is now there. 

1077 table = self.getExistingTable(name, spec) 

1078 if table is None: 

1079 raise 

1080 return table 

1081 

1082 def getExistingTable(self, name: str, spec: ddl.TableSpec) -> sqlalchemy.schema.Table | None: 

1083 """Obtain an existing table with the given name and specification. 

1084 

1085 Parameters 

1086 ---------- 

1087 name : `str` 

1088 Name of the table (not including namespace qualifiers). 

1089 spec : `TableSpec` 

1090 Specification for the table. This will be used when creating the 

1091 SQLAlchemy representation of the table, and it is used to 

1092 check that the actual table in the database is consistent. 

1093 

1094 Returns 

1095 ------- 

1096 table : `sqlalchemy.schema.Table` or `None` 

1097 SQLAlchemy representation of the table, or `None` if it does not 

1098 exist. 

1099 

1100 Raises 

1101 ------ 

1102 DatabaseConflictError 

1103 Raised if the table exists but ``spec`` is inconsistent with its 

1104 definition. 

1105 

1106 Notes 

1107 ----- 

1108 This method can be called within transactions and never modifies the 

1109 database. 

1110 

1111 Subclasses may override this method, but usually should not need to. 

1112 """ 

1113 assert self._metadata is not None, "Static tables must be declared before dynamic tables." 

1114 name = self._mangleTableName(name) 

1115 table = self._metadata.tables.get(name if self.namespace is None else f"{self.namespace}.{name}") 

1116 if table is not None: 

1117 if spec.fields.names != set(table.columns.keys()): 

1118 raise DatabaseConflictError( 

1119 f"Table '{name}' has already been defined differently; the new " 

1120 f"specification has columns {list(spec.fields.names)}, while " 

1121 f"the previous definition has {list(table.columns.keys())}." 

1122 ) 

1123 else: 

1124 inspector = sqlalchemy.inspect( 

1125 self._engine if self._session_connection is None else self._session_connection, raiseerr=True 

1126 ) 

1127 if name in inspector.get_table_names(schema=self.namespace): 

1128 _checkExistingTableDefinition(name, spec, inspector.get_columns(name, schema=self.namespace)) 

1129 table = self._convertTableSpec(name, spec, self._metadata) 

1130 for foreignKeySpec in spec.foreignKeys: 

1131 table.append_constraint(self._convertForeignKeySpec(name, foreignKeySpec, self._metadata)) 

1132 return table 

1133 return table 

1134 

1135 def _make_temporary_table( 

1136 self, 

1137 connection: sqlalchemy.engine.Connection, 

1138 spec: ddl.TableSpec, 

1139 name: str | None = None, 

1140 **kwargs: Any, 

1141 ) -> sqlalchemy.schema.Table: 

1142 """Create a temporary table. 

1143 

1144 Parameters 

1145 ---------- 

1146 connection : `sqlalchemy.engine.Connection` 

1147 Connection to use when creating the table. 

1148 spec : `TableSpec` 

1149 Specification for the table. 

1150 name : `str`, optional 

1151 A unique (within this session/connetion) name for the table. 

1152 Subclasses may override to modify the actual name used. If not 

1153 provided, a unique name will be generated. 

1154 **kwargs 

1155 Additional keyword arguments to forward to the 

1156 `sqlalchemy.schema.Table` constructor. This is provided to make it 

1157 easier for derived classes to delegate to ``super()`` while making 

1158 only minor changes. 

1159 

1160 Returns 

1161 ------- 

1162 table : `sqlalchemy.schema.Table` 

1163 SQLAlchemy representation of the table. 

1164 """ 

1165 if name is None: 

1166 name = f"tmp_{uuid.uuid4().hex}" 

1167 metadata = self._metadata 

1168 if metadata is None: 

1169 raise RuntimeError("Cannot create temporary table before static schema is defined.") 

1170 table = self._convertTableSpec( 

1171 name, spec, metadata, prefixes=["TEMPORARY"], schema=sqlalchemy.schema.BLANK_SCHEMA, **kwargs 

1172 ) 

1173 if table.key in self._temp_tables and table.key != name: 

1174 raise ValueError( 

1175 f"A temporary table with name {name} (transformed to {table.key} by " 

1176 "Database) already exists." 

1177 ) 

1178 for foreignKeySpec in spec.foreignKeys: 

1179 table.append_constraint(self._convertForeignKeySpec(name, foreignKeySpec, metadata)) 

1180 with self._transaction(): 

1181 table.create(connection) 

1182 return table 

1183 

1184 @classmethod 

1185 def getTimespanRepresentation(cls) -> type[TimespanDatabaseRepresentation]: 

1186 """Return a `type` that encapsulates the way `Timespan` objects are 

1187 stored in this database. 

1188 

1189 `Database` does not automatically use the return type of this method 

1190 anywhere else; calling code is responsible for making sure that DDL 

1191 and queries are consistent with it. 

1192 

1193 Returns 

1194 ------- 

1195 TimespanReprClass : `type` (`TimespanDatabaseRepresention` subclass) 

1196 A type that encapsulates the way `Timespan` objects should be 

1197 stored in this database. 

1198 

1199 Notes 

1200 ----- 

1201 There are two big reasons we've decided to keep timespan-mangling logic 

1202 outside the `Database` implementations, even though the choice of 

1203 representation is ultimately up to a `Database` implementation: 

1204 

1205 - Timespans appear in relatively few tables and queries in our 

1206 typical usage, and the code that operates on them is already aware 

1207 that it is working with timespans. In contrast, a 

1208 timespan-representation-aware implementation of, say, `insert`, 

1209 would need to have extra logic to identify when timespan-mangling 

1210 needed to occur, which would usually be useless overhead. 

1211 

1212 - SQLAlchemy's rich SELECT query expression system has no way to wrap 

1213 multiple columns in a single expression object (the ORM does, but 

1214 we are not using the ORM). So we would have to wrap _much_ more of 

1215 that code in our own interfaces to encapsulate timespan 

1216 representations there. 

1217 """ 

1218 return TimespanDatabaseRepresentation.Compound 

1219 

1220 def sync( 

1221 self, 

1222 table: sqlalchemy.schema.Table, 

1223 *, 

1224 keys: dict[str, Any], 

1225 compared: dict[str, Any] | None = None, 

1226 extra: dict[str, Any] | None = None, 

1227 returning: Sequence[str] | None = None, 

1228 update: bool = False, 

1229 ) -> tuple[dict[str, Any] | None, bool | dict[str, Any]]: 

1230 """Insert into a table as necessary to ensure database contains 

1231 values equivalent to the given ones. 

1232 

1233 Parameters 

1234 ---------- 

1235 table : `sqlalchemy.schema.Table` 

1236 Table to be queried and possibly inserted into. 

1237 keys : `dict` 

1238 Column name-value pairs used to search for an existing row; must 

1239 be a combination that can be used to select a single row if one 

1240 exists. If such a row does not exist, these values are used in 

1241 the insert. 

1242 compared : `dict`, optional 

1243 Column name-value pairs that are compared to those in any existing 

1244 row. If such a row does not exist, these rows are used in the 

1245 insert. 

1246 extra : `dict`, optional 

1247 Column name-value pairs that are ignored if a matching row exists, 

1248 but used in an insert if one is necessary. 

1249 returning : `~collections.abc.Sequence` of `str`, optional 

1250 The names of columns whose values should be returned. 

1251 update : `bool`, optional 

1252 If `True` (`False` is default), update the existing row with the 

1253 values in ``compared`` instead of raising `DatabaseConflictError`. 

1254 

1255 Returns 

1256 ------- 

1257 row : `dict`, optional 

1258 The value of the fields indicated by ``returning``, or `None` if 

1259 ``returning`` is `None`. 

1260 inserted_or_updated : `bool` or `dict` 

1261 If `True`, a new row was inserted; if `False`, a matching row 

1262 already existed. If a `dict` (only possible if ``update=True``), 

1263 then an existing row was updated, and the dict maps the names of 

1264 the updated columns to their *old* values (new values can be 

1265 obtained from ``compared``). 

1266 

1267 Raises 

1268 ------ 

1269 DatabaseConflictError 

1270 Raised if the values in ``compared`` do not match the values in the 

1271 database. 

1272 ReadOnlyDatabaseError 

1273 Raised if `isWriteable` returns `False`, and no matching record 

1274 already exists. 

1275 

1276 Notes 

1277 ----- 

1278 May be used inside transaction contexts, so implementations may not 

1279 perform operations that interrupt transactions. 

1280 

1281 It may be called on read-only databases if and only if the matching row 

1282 does in fact already exist. 

1283 """ 

1284 

1285 def check() -> tuple[int, dict[str, Any] | None, list | None]: 

1286 """Query for a row that matches the ``key`` argument, and compare 

1287 to what was given by the caller. 

1288 

1289 Returns 

1290 ------- 

1291 n : `int` 

1292 Number of matching rows. ``n != 1`` is always an error, but 

1293 it's a different kind of error depending on where `check` is 

1294 being called. 

1295 bad : `dict` or `None` 

1296 The subset of the keys of ``compared`` for which the existing 

1297 values did not match the given one, mapped to the existing 

1298 values in the database. Once again, ``not bad`` is always an 

1299 error, but a different kind on context. `None` if ``n != 1`` 

1300 result : `list` or `None` 

1301 Results in the database that correspond to the columns given 

1302 in ``returning``, or `None` if ``returning is None``. 

1303 """ 

1304 toSelect: set[str] = set() 

1305 if compared is not None: 

1306 toSelect.update(compared.keys()) 

1307 if returning is not None: 

1308 toSelect.update(returning) 

1309 if not toSelect: 

1310 # Need to select some column, even if we just want to see 

1311 # how many rows we get back. 

1312 toSelect.add(next(iter(keys.keys()))) 

1313 selectSql = ( 

1314 sqlalchemy.sql.select(*[table.columns[k].label(k) for k in toSelect]) 

1315 .select_from(table) 

1316 .where(sqlalchemy.sql.and_(*[table.columns[k] == v for k, v in keys.items()])) 

1317 ) 

1318 with self._transaction() as (_, connection): 

1319 fetched = list(connection.execute(selectSql).mappings()) 

1320 if len(fetched) != 1: 

1321 return len(fetched), None, None 

1322 existing = fetched[0] 

1323 if compared is not None: 

1324 

1325 def safeNotEqual(a: Any, b: Any) -> bool: 

1326 if isinstance(a, astropy.time.Time): 

1327 return not time_utils.TimeConverter().times_equal(a, b) 

1328 return a != b 

1329 

1330 inconsistencies = { 

1331 k: existing[k] for k, v in compared.items() if safeNotEqual(existing[k], v) 

1332 } 

1333 else: 

1334 inconsistencies = {} 

1335 if returning is not None: 

1336 toReturn: list | None = [existing[k] for k in returning] 

1337 else: 

1338 toReturn = None 

1339 return 1, inconsistencies, toReturn 

1340 

1341 def format_bad(inconsistencies: dict[str, Any]) -> str: 

1342 """Format the 'bad' dictionary of existing values returned by 

1343 ``check`` into a string suitable for an error message. 

1344 """ 

1345 assert compared is not None, "Should not be able to get inconsistencies without comparing." 

1346 return ", ".join(f"{k}: {v!r} != {compared[k]!r}" for k, v in inconsistencies.items()) 

1347 

1348 if self.isTableWriteable(table): 

1349 # Try an insert first, but allow it to fail (in only specific 

1350 # ways). 

1351 row = keys.copy() 

1352 if compared is not None: 

1353 row.update(compared) 

1354 if extra is not None: 

1355 row.update(extra) 

1356 with self.transaction(): 

1357 inserted = bool(self.ensure(table, row)) 

1358 inserted_or_updated: bool | dict[str, Any] 

1359 # Need to perform check() for this branch inside the 

1360 # transaction, so we roll back an insert that didn't do 

1361 # what we expected. That limits the extent to which we 

1362 # can reduce duplication between this block and the other 

1363 # ones that perform similar logic. 

1364 n, bad, result = check() 

1365 if n < 1: 

1366 raise ConflictingDefinitionError( 

1367 f"Attempted to ensure {row} exists by inserting it with ON CONFLICT IGNORE, " 

1368 f"but a post-insert query on {keys} returned no results. " 

1369 f"Insert was {'' if inserted else 'not '}reported as successful. " 

1370 "This can occur if the insert violated a database constraint other than the " 

1371 "unique constraint or primary key used to identify the row in this call." 

1372 ) 

1373 elif n > 1: 

1374 raise RuntimeError( 

1375 f"Keys passed to sync {keys.keys()} do not comprise a " 

1376 f"unique constraint for table {table.name}." 

1377 ) 

1378 elif bad: 

1379 assert ( 

1380 compared is not None 

1381 ), "Should not be able to get inconsistencies without comparing." 

1382 if inserted: 

1383 raise RuntimeError( 

1384 f"Conflict ({bad}) in sync after successful insert; this is " 

1385 "possible if the same table is being updated by a concurrent " 

1386 "process that isn't using sync, but it may also be a bug in " 

1387 "daf_butler." 

1388 ) 

1389 elif update: 

1390 with self._transaction() as (_, connection): 

1391 connection.execute( 

1392 table.update() 

1393 .where(sqlalchemy.sql.and_(*[table.columns[k] == v for k, v in keys.items()])) 

1394 .values(**{k: compared[k] for k in bad}) 

1395 ) 

1396 inserted_or_updated = bad 

1397 else: 

1398 raise DatabaseConflictError( 

1399 f"Conflict in sync for table {table.name} on column(s) {format_bad(bad)}." 

1400 ) 

1401 else: 

1402 inserted_or_updated = inserted 

1403 else: 

1404 # Database is not writeable; just see if the row exists. 

1405 n, bad, result = check() 

1406 if n < 1: 

1407 raise ReadOnlyDatabaseError("sync needs to insert, but database is read-only.") 

1408 elif n > 1: 

1409 raise RuntimeError("Keys passed to sync do not comprise a unique constraint.") 

1410 elif bad: 

1411 if update: 

1412 raise ReadOnlyDatabaseError("sync needs to update, but database is read-only.") 

1413 else: 

1414 raise DatabaseConflictError( 

1415 f"Conflict in sync for table {table.name} on column(s) {format_bad(bad)}." 

1416 ) 

1417 inserted_or_updated = False 

1418 if returning is None: 

1419 return None, inserted_or_updated 

1420 else: 

1421 assert result is not None 

1422 return dict(zip(returning, result, strict=True)), inserted_or_updated 

1423 

1424 def insert( 

1425 self, 

1426 table: sqlalchemy.schema.Table, 

1427 *rows: dict, 

1428 returnIds: bool = False, 

1429 select: sqlalchemy.sql.expression.SelectBase | None = None, 

1430 names: Iterable[str] | None = None, 

1431 ) -> list[int] | None: 

1432 """Insert one or more rows into a table, optionally returning 

1433 autoincrement primary key values. 

1434 

1435 Parameters 

1436 ---------- 

1437 table : `sqlalchemy.schema.Table` 

1438 Table rows should be inserted into. 

1439 returnIds: `bool` 

1440 If `True` (`False` is default), return the values of the table's 

1441 autoincrement primary key field (which much exist). 

1442 select : `sqlalchemy.sql.SelectBase`, optional 

1443 A SELECT query expression to insert rows from. Cannot be provided 

1444 with either ``rows`` or ``returnIds=True``. 

1445 names : `~collections.abc.Iterable` [ `str` ], optional 

1446 Names of columns in ``table`` to be populated, ordered to match the 

1447 columns returned by ``select``. Ignored if ``select`` is `None`. 

1448 If not provided, the columns returned by ``select`` must be named 

1449 to match the desired columns of ``table``. 

1450 *rows 

1451 Positional arguments are the rows to be inserted, as dictionaries 

1452 mapping column name to value. The keys in all dictionaries must 

1453 be the same. 

1454 

1455 Returns 

1456 ------- 

1457 ids : `None`, or `list` of `int` 

1458 If ``returnIds`` is `True`, a `list` containing the inserted 

1459 values for the table's autoincrement primary key. 

1460 

1461 Raises 

1462 ------ 

1463 ReadOnlyDatabaseError 

1464 Raised if `isWriteable` returns `False` when this method is called. 

1465 

1466 Notes 

1467 ----- 

1468 The default implementation uses bulk insert syntax when ``returnIds`` 

1469 is `False`, and a loop over single-row insert operations when it is 

1470 `True`. 

1471 

1472 Derived classes should reimplement when they can provide a more 

1473 efficient implementation (especially for the latter case). 

1474 

1475 May be used inside transaction contexts, so implementations may not 

1476 perform operations that interrupt transactions. 

1477 """ 

1478 self.assertTableWriteable(table, f"Cannot insert into read-only table {table}.") 

1479 if select is not None and (rows or returnIds): 

1480 raise TypeError("'select' is incompatible with passing value rows or returnIds=True.") 

1481 if not rows and select is None: 

1482 if returnIds: 

1483 return [] 

1484 else: 

1485 return None 

1486 with self._transaction() as (_, connection): 

1487 if not returnIds: 

1488 if select is not None: 

1489 if names is None: 

1490 # columns() is deprecated since 1.4, but 

1491 # selected_columns() method did not exist in 1.3. 

1492 if hasattr(select, "selected_columns"): 

1493 names = select.selected_columns.keys() 

1494 else: 

1495 names = select.columns.keys() 

1496 connection.execute(table.insert().from_select(list(names), select)) 

1497 else: 

1498 connection.execute(table.insert(), rows) 

1499 return None 

1500 else: 

1501 sql = table.insert() 

1502 return [connection.execute(sql, row).inserted_primary_key[0] for row in rows] 

1503 

1504 @abstractmethod 

1505 def replace(self, table: sqlalchemy.schema.Table, *rows: dict) -> None: 

1506 """Insert one or more rows into a table, replacing any existing rows 

1507 for which insertion of a new row would violate the primary key 

1508 constraint. 

1509 

1510 Parameters 

1511 ---------- 

1512 table : `sqlalchemy.schema.Table` 

1513 Table rows should be inserted into. 

1514 *rows 

1515 Positional arguments are the rows to be inserted, as dictionaries 

1516 mapping column name to value. The keys in all dictionaries must 

1517 be the same. 

1518 

1519 Raises 

1520 ------ 

1521 ReadOnlyDatabaseError 

1522 Raised if `isWriteable` returns `False` when this method is called. 

1523 

1524 Notes 

1525 ----- 

1526 May be used inside transaction contexts, so implementations may not 

1527 perform operations that interrupt transactions. 

1528 

1529 Implementations should raise a `sqlalchemy.exc.IntegrityError` 

1530 exception when a constraint other than the primary key would be 

1531 violated. 

1532 

1533 Implementations are not required to support `replace` on tables 

1534 with autoincrement keys. 

1535 """ 

1536 raise NotImplementedError() 

1537 

1538 @abstractmethod 

1539 def ensure(self, table: sqlalchemy.schema.Table, *rows: dict, primary_key_only: bool = False) -> int: 

1540 """Insert one or more rows into a table, skipping any rows for which 

1541 insertion would violate a unique constraint. 

1542 

1543 Parameters 

1544 ---------- 

1545 table : `sqlalchemy.schema.Table` 

1546 Table rows should be inserted into. 

1547 *rows 

1548 Positional arguments are the rows to be inserted, as dictionaries 

1549 mapping column name to value. The keys in all dictionaries must 

1550 be the same. 

1551 primary_key_only : `bool`, optional 

1552 If `True` (`False` is default), only skip rows that violate the 

1553 primary key constraint, and raise an exception (and rollback 

1554 transactions) for other constraint violations. 

1555 

1556 Returns 

1557 ------- 

1558 count : `int` 

1559 The number of rows actually inserted. 

1560 

1561 Raises 

1562 ------ 

1563 ReadOnlyDatabaseError 

1564 Raised if `isWriteable` returns `False` when this method is called. 

1565 This is raised even if the operation would do nothing even on a 

1566 writeable database. 

1567 

1568 Notes 

1569 ----- 

1570 May be used inside transaction contexts, so implementations may not 

1571 perform operations that interrupt transactions. 

1572 

1573 Implementations are not required to support `ensure` on tables 

1574 with autoincrement keys. 

1575 """ 

1576 raise NotImplementedError() 

1577 

1578 def delete(self, table: sqlalchemy.schema.Table, columns: Iterable[str], *rows: dict) -> int: 

1579 """Delete one or more rows from a table. 

1580 

1581 Parameters 

1582 ---------- 

1583 table : `sqlalchemy.schema.Table` 

1584 Table that rows should be deleted from. 

1585 columns: `~collections.abc.Iterable` of `str` 

1586 The names of columns that will be used to constrain the rows to 

1587 be deleted; these will be combined via ``AND`` to form the 

1588 ``WHERE`` clause of the delete query. 

1589 *rows 

1590 Positional arguments are the keys of rows to be deleted, as 

1591 dictionaries mapping column name to value. The keys in all 

1592 dictionaries must be exactly the names in ``columns``. 

1593 

1594 Returns 

1595 ------- 

1596 count : `int` 

1597 Number of rows deleted. 

1598 

1599 Raises 

1600 ------ 

1601 ReadOnlyDatabaseError 

1602 Raised if `isWriteable` returns `False` when this method is called. 

1603 

1604 Notes 

1605 ----- 

1606 May be used inside transaction contexts, so implementations may not 

1607 perform operations that interrupt transactions. 

1608 

1609 The default implementation should be sufficient for most derived 

1610 classes. 

1611 """ 

1612 self.assertTableWriteable(table, f"Cannot delete from read-only table {table}.") 

1613 if columns and not rows: 

1614 # If there are no columns, this operation is supposed to delete 

1615 # everything (so we proceed as usual). But if there are columns, 

1616 # but no rows, it was a constrained bulk operation where the 

1617 # constraint is that no rows match, and we should short-circuit 

1618 # while reporting that no rows were affected. 

1619 return 0 

1620 sql = table.delete() 

1621 columns = list(columns) # Force iterators to list 

1622 

1623 # More efficient to use IN operator if there is only one 

1624 # variable changing across all rows. 

1625 content: dict[str, set] = defaultdict(set) 

1626 if len(columns) == 1: 

1627 # Nothing to calculate since we can always use IN 

1628 column = columns[0] 

1629 changing_columns = [column] 

1630 content[column] = {row[column] for row in rows} 

1631 else: 

1632 for row in rows: 

1633 for k, v in row.items(): 

1634 content[k].add(v) 

1635 changing_columns = [col for col, values in content.items() if len(values) > 1] 

1636 

1637 if len(changing_columns) != 1: 

1638 # More than one column changes each time so do explicit bind 

1639 # parameters and have each row processed separately. 

1640 whereTerms = [table.columns[name] == sqlalchemy.sql.bindparam(name) for name in columns] 

1641 if whereTerms: 

1642 sql = sql.where(sqlalchemy.sql.and_(*whereTerms)) 

1643 with self._transaction() as (_, connection): 

1644 return connection.execute(sql, rows).rowcount 

1645 else: 

1646 # One of the columns has changing values but any others are 

1647 # fixed. In this case we can use an IN operator and be more 

1648 # efficient. 

1649 name = changing_columns.pop() 

1650 

1651 # Simple where clause for the unchanging columns 

1652 clauses = [] 

1653 for k, v in content.items(): 

1654 if k == name: 

1655 continue 

1656 column = table.columns[k] 

1657 # The set only has one element 

1658 clauses.append(column == v.pop()) 

1659 

1660 # The IN operator will not work for "infinite" numbers of 

1661 # rows so must batch it up into distinct calls. 

1662 in_content = list(content[name]) 

1663 n_elements = len(in_content) 

1664 

1665 rowcount = 0 

1666 iposn = 0 

1667 n_per_loop = 1_000 # Controls how many items to put in IN clause 

1668 with self._transaction() as (_, connection): 

1669 for iposn in range(0, n_elements, n_per_loop): 

1670 endpos = iposn + n_per_loop 

1671 in_clause = table.columns[name].in_(in_content[iposn:endpos]) 

1672 

1673 newsql = sql.where(sqlalchemy.sql.and_(*clauses, in_clause)) 

1674 rowcount += connection.execute(newsql).rowcount 

1675 return rowcount 

1676 

1677 def deleteWhere(self, table: sqlalchemy.schema.Table, where: sqlalchemy.sql.ColumnElement) -> int: 

1678 """Delete rows from a table with pre-constructed WHERE clause. 

1679 

1680 Parameters 

1681 ---------- 

1682 table : `sqlalchemy.schema.Table` 

1683 Table that rows should be deleted from. 

1684 where: `sqlalchemy.sql.ClauseElement` 

1685 The names of columns that will be used to constrain the rows to 

1686 be deleted; these will be combined via ``AND`` to form the 

1687 ``WHERE`` clause of the delete query. 

1688 

1689 Returns 

1690 ------- 

1691 count : `int` 

1692 Number of rows deleted. 

1693 

1694 Raises 

1695 ------ 

1696 ReadOnlyDatabaseError 

1697 Raised if `isWriteable` returns `False` when this method is called. 

1698 

1699 Notes 

1700 ----- 

1701 May be used inside transaction contexts, so implementations may not 

1702 perform operations that interrupt transactions. 

1703 

1704 The default implementation should be sufficient for most derived 

1705 classes. 

1706 """ 

1707 self.assertTableWriteable(table, f"Cannot delete from read-only table {table}.") 

1708 

1709 sql = table.delete().where(where) 

1710 with self._transaction() as (_, connection): 

1711 return connection.execute(sql).rowcount 

1712 

1713 def update(self, table: sqlalchemy.schema.Table, where: dict[str, str], *rows: dict) -> int: 

1714 """Update one or more rows in a table. 

1715 

1716 Parameters 

1717 ---------- 

1718 table : `sqlalchemy.schema.Table` 

1719 Table containing the rows to be updated. 

1720 where : `dict` [`str`, `str`] 

1721 A mapping from the names of columns that will be used to search for 

1722 existing rows to the keys that will hold these values in the 

1723 ``rows`` dictionaries. Note that these may not be the same due to 

1724 SQLAlchemy limitations. 

1725 *rows 

1726 Positional arguments are the rows to be updated. The keys in all 

1727 dictionaries must be the same, and may correspond to either a 

1728 value in the ``where`` dictionary or the name of a column to be 

1729 updated. 

1730 

1731 Returns 

1732 ------- 

1733 count : `int` 

1734 Number of rows matched (regardless of whether the update actually 

1735 modified them). 

1736 

1737 Raises 

1738 ------ 

1739 ReadOnlyDatabaseError 

1740 Raised if `isWriteable` returns `False` when this method is called. 

1741 

1742 Notes 

1743 ----- 

1744 May be used inside transaction contexts, so implementations may not 

1745 perform operations that interrupt transactions. 

1746 

1747 The default implementation should be sufficient for most derived 

1748 classes. 

1749 """ 

1750 self.assertTableWriteable(table, f"Cannot update read-only table {table}.") 

1751 if not rows: 

1752 return 0 

1753 sql = table.update().where( 

1754 sqlalchemy.sql.and_(*[table.columns[k] == sqlalchemy.sql.bindparam(v) for k, v in where.items()]) 

1755 ) 

1756 with self._transaction() as (_, connection): 

1757 return connection.execute(sql, rows).rowcount 

1758 

1759 @contextmanager 

1760 def query( 

1761 self, 

1762 sql: sqlalchemy.sql.expression.Executable | sqlalchemy.sql.expression.SelectBase, 

1763 *args: Any, 

1764 **kwargs: Any, 

1765 ) -> Iterator[sqlalchemy.engine.CursorResult]: 

1766 """Run a SELECT query against the database. 

1767 

1768 Parameters 

1769 ---------- 

1770 sql : `sqlalchemy.sql.expression.SelectBase` 

1771 A SQLAlchemy representation of a ``SELECT`` query. 

1772 *args 

1773 Additional positional arguments are forwarded to 

1774 `sqlalchemy.engine.Connection.execute`. 

1775 **kwargs 

1776 Additional keyword arguments are forwarded to 

1777 `sqlalchemy.engine.Connection.execute`. 

1778 

1779 Returns 

1780 ------- 

1781 result_context : `sqlalchemy.engine.CursorResults` 

1782 Context manager that returns the query result object when entered. 

1783 These results are invalidated when the context is exited. 

1784 """ 

1785 if self._session_connection is None: 

1786 connection = self._engine.connect() 

1787 else: 

1788 connection = self._session_connection 

1789 # TODO: SelectBase is not good for execute(), but it used everywhere, 

1790 # e.g. in daf_relation. We should switch to Executable at some point. 

1791 result = connection.execute(cast(sqlalchemy.sql.expression.Executable, sql), *args, **kwargs) 

1792 try: 

1793 yield result 

1794 finally: 

1795 if connection is not self._session_connection: 

1796 connection.close() 

1797 

1798 @abstractmethod 

1799 def constant_rows( 

1800 self, 

1801 fields: NamedValueAbstractSet[ddl.FieldSpec], 

1802 *rows: dict, 

1803 name: str | None = None, 

1804 ) -> sqlalchemy.sql.FromClause: 

1805 """Return a SQLAlchemy object that represents a small number of 

1806 constant-valued rows. 

1807 

1808 Parameters 

1809 ---------- 

1810 fields : `NamedValueAbstractSet` [ `ddl.FieldSpec` ] 

1811 The columns of the rows. Unique and foreign key constraints are 

1812 ignored. 

1813 *rows : `dict` 

1814 Values for the rows. 

1815 name : `str`, optional 

1816 If provided, the name of the SQL construct. If not provided, an 

1817 opaque but unique identifier is generated. 

1818 

1819 Returns 

1820 ------- 

1821 from_clause : `sqlalchemy.sql.FromClause` 

1822 SQLAlchemy object representing the given rows. This is guaranteed 

1823 to be something that can be directly joined into a ``SELECT`` 

1824 query's ``FROM`` clause, and will not involve a temporary table 

1825 that needs to be cleaned up later. 

1826 

1827 Notes 

1828 ----- 

1829 The default implementation uses the SQL-standard ``VALUES`` construct, 

1830 but support for that construct is varied enough across popular RDBMSs 

1831 that the method is still marked abstract to force explicit opt-in via 

1832 delegation to `super`. 

1833 """ 

1834 if name is None: 

1835 name = f"tmp_{uuid.uuid4().hex}" 

1836 return sqlalchemy.sql.values( 

1837 *[sqlalchemy.Column(field.name, field.getSizedColumnType()) for field in fields], 

1838 name=name, 

1839 ).data([tuple(row[name] for name in fields.names) for row in rows]) 

1840 

1841 def get_constant_rows_max(self) -> int: 

1842 """Return the maximum number of rows that should be passed to 

1843 `constant_rows` for this backend. 

1844 

1845 Returns 

1846 ------- 

1847 max : `int` 

1848 Maximum number of rows. 

1849 

1850 Notes 

1851 ----- 

1852 This should reflect typical performance profiles (or a guess at these), 

1853 not just hard database engine limits. 

1854 """ 

1855 return 100 

1856 

1857 origin: int 

1858 """An integer ID that should be used as the default for any datasets, 

1859 quanta, or other entities that use a (autoincrement, origin) compound 

1860 primary key (`int`). 

1861 """ 

1862 

1863 namespace: str | None 

1864 """The schema or namespace this database instance is associated with 

1865 (`str` or `None`). 

1866 """