Coverage for python/lsst/daf/butler/registry/interfaces/_database.py: 22%

413 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-10-27 09:44 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27from __future__ import annotations 

28 

29from ... import ddl, time_utils 

30 

31__all__ = [ 

32 "Database", 

33 "ReadOnlyDatabaseError", 

34 "DatabaseConflictError", 

35 "DatabaseInsertMode", 

36 "SchemaAlreadyDefinedError", 

37 "StaticTablesContext", 

38] 

39 

40import enum 

41import uuid 

42import warnings 

43from abc import ABC, abstractmethod 

44from collections import defaultdict 

45from collections.abc import Callable, Iterable, Iterator, Sequence 

46from contextlib import contextmanager 

47from typing import Any, cast, final 

48 

49import astropy.time 

50import sqlalchemy 

51 

52from ..._named import NamedValueAbstractSet 

53from ..._timespan import TimespanDatabaseRepresentation 

54from .._exceptions import ConflictingDefinitionError 

55 

56 

57class DatabaseInsertMode(enum.Enum): 

58 """Mode options available for inserting database records.""" 

59 

60 INSERT = enum.auto() 

61 """Insert records, failing if they already exist.""" 

62 

63 REPLACE = enum.auto() 

64 """Replace records, overwriting existing.""" 

65 

66 ENSURE = enum.auto() 

67 """Insert records, skipping any that already exist.""" 

68 

69 

70# TODO: method is called with list[ReflectedColumn] in SA 2, and 

71# ReflectedColumn does not exist in 1.4. 

72def _checkExistingTableDefinition(name: str, spec: ddl.TableSpec, inspection: list) -> None: 

73 """Test that the definition of a table in a `ddl.TableSpec` and from 

74 database introspection are consistent. 

75 

76 Parameters 

77 ---------- 

78 name : `str` 

79 Name of the table (only used in error messages). 

80 spec : `ddl.TableSpec` 

81 Specification of the table. 

82 inspection : `dict` 

83 Dictionary returned by 

84 `sqlalchemy.engine.reflection.Inspector.get_columns`. 

85 

86 Raises 

87 ------ 

88 DatabaseConflictError 

89 Raised if the definitions are inconsistent. 

90 """ 

91 columnNames = [c["name"] for c in inspection] 

92 if spec.fields.names != set(columnNames): 

93 raise DatabaseConflictError( 

94 f"Table '{name}' exists but is defined differently in the database; " 

95 f"specification has columns {list(spec.fields.names)}, while the " 

96 f"table in the database has {columnNames}." 

97 ) 

98 

99 

100class ReadOnlyDatabaseError(RuntimeError): 

101 """Exception raised when a write operation is called on a read-only 

102 `Database`. 

103 """ 

104 

105 

106class DatabaseConflictError(ConflictingDefinitionError): 

107 """Exception raised when database content (row values or schema entities) 

108 are inconsistent with what this client expects. 

109 """ 

110 

111 

112class SchemaAlreadyDefinedError(RuntimeError): 

113 """Exception raised when trying to initialize database schema when some 

114 tables already exist. 

115 """ 

116 

117 

118class StaticTablesContext: 

119 """Helper class used to declare the static schema for a registry layer 

120 in a database. 

121 

122 An instance of this class is returned by `Database.declareStaticTables`, 

123 which should be the only way it should be constructed. 

124 """ 

125 

126 def __init__(self, db: Database, connection: sqlalchemy.engine.Connection): 

127 self._db = db 

128 self._foreignKeys: list[tuple[sqlalchemy.schema.Table, sqlalchemy.schema.ForeignKeyConstraint]] = [] 

129 self._inspector = sqlalchemy.inspect(connection) 

130 self._tableNames = frozenset(self._inspector.get_table_names(schema=self._db.namespace)) 

131 self._initializers: list[Callable[[Database], None]] = [] 

132 

133 def addTable(self, name: str, spec: ddl.TableSpec) -> sqlalchemy.schema.Table: 

134 """Add a new table to the schema, returning its sqlalchemy 

135 representation. 

136 

137 The new table may not actually be created until the end of the 

138 context created by `Database.declareStaticTables`, allowing tables 

139 to be declared in any order even in the presence of foreign key 

140 relationships. 

141 """ 

142 name = self._db._mangleTableName(name) 

143 if name in self._tableNames: 

144 _checkExistingTableDefinition( 

145 name, spec, self._inspector.get_columns(name, schema=self._db.namespace) 

146 ) 

147 metadata = self._db._metadata 

148 assert metadata is not None, "Guaranteed by context manager that returns this object." 

149 table = self._db._convertTableSpec(name, spec, metadata) 

150 for foreignKeySpec in spec.foreignKeys: 

151 self._foreignKeys.append((table, self._db._convertForeignKeySpec(name, foreignKeySpec, metadata))) 

152 return table 

153 

154 def addTableTuple(self, specs: tuple[ddl.TableSpec, ...]) -> tuple[sqlalchemy.schema.Table, ...]: 

155 """Add a named tuple of tables to the schema, returning their 

156 SQLAlchemy representations in a named tuple of the same type. 

157 

158 The new tables may not actually be created until the end of the 

159 context created by `Database.declareStaticTables`, allowing tables 

160 to be declared in any order even in the presence of foreign key 

161 relationships. 

162 

163 Notes 

164 ----- 

165 ``specs`` *must* be an instance of a type created by 

166 `collections.namedtuple`, not just regular tuple, and the returned 

167 object is guaranteed to be the same. Because `~collections.namedtuple` 

168 is just a factory for `type` objects, not an actual type itself, 

169 we cannot represent this with type annotations. 

170 """ 

171 return specs._make( # type: ignore 

172 self.addTable(name, spec) for name, spec in zip(specs._fields, specs, strict=True) # type: ignore 

173 ) 

174 

175 def addInitializer(self, initializer: Callable[[Database], None]) -> None: 

176 """Add a method that does one-time initialization of a database. 

177 

178 Initialization can mean anything that changes state of a database 

179 and needs to be done exactly once after database schema was created. 

180 An example for that could be population of schema attributes. 

181 

182 Parameters 

183 ---------- 

184 initializer : callable 

185 Method of a single argument which is a `Database` instance. 

186 """ 

187 self._initializers.append(initializer) 

188 

189 

190class Database(ABC): 

191 """An abstract interface that represents a particular database engine's 

192 representation of a single schema/namespace/database. 

193 

194 Parameters 

195 ---------- 

196 origin : `int` 

197 An integer ID that should be used as the default for any datasets, 

198 quanta, or other entities that use a (autoincrement, origin) compound 

199 primary key. 

200 engine : `sqlalchemy.engine.Engine` 

201 The SQLAlchemy engine for this `Database`. 

202 namespace : `str`, optional 

203 Name of the schema or namespace this instance is associated with. 

204 This is passed as the ``schema`` argument when constructing a 

205 `sqlalchemy.schema.MetaData` instance. We use ``namespace`` instead to 

206 avoid confusion between "schema means namespace" and "schema means 

207 table definitions". 

208 

209 Notes 

210 ----- 

211 `Database` requires all write operations to go through its special named 

212 methods. Our write patterns are sufficiently simple that we don't really 

213 need the full flexibility of SQL insert/update/delete syntax, and we need 

214 non-standard (but common) functionality in these operations sufficiently 

215 often that it seems worthwhile to provide our own generic API. 

216 

217 In contrast, `Database.query` allows arbitrary ``SELECT`` queries (via 

218 their SQLAlchemy representation) to be run, as we expect these to require 

219 significantly more sophistication while still being limited to standard 

220 SQL. 

221 

222 `Database` itself has several underscore-prefixed attributes: 

223 

224 - ``_engine``: SQLAlchemy object representing its engine. 

225 - ``_connection``: method returning a context manager for 

226 `sqlalchemy.engine.Connection` object. 

227 - ``_metadata``: the `sqlalchemy.schema.MetaData` object representing 

228 the tables and other schema entities. 

229 

230 These are considered protected (derived classes may access them, but other 

231 code should not), and read-only, aside from executing SQL via 

232 ``_connection``. 

233 """ 

234 

235 def __init__(self, *, origin: int, engine: sqlalchemy.engine.Engine, namespace: str | None = None): 

236 self.origin = origin 

237 self.namespace = namespace 

238 self._engine = engine 

239 self._session_connection: sqlalchemy.engine.Connection | None = None 

240 self._metadata: sqlalchemy.schema.MetaData | None = None 

241 self._temp_tables: set[str] = set() 

242 

243 def __repr__(self) -> str: 

244 # Rather than try to reproduce all the parameters used to create 

245 # the object, instead report the more useful information of the 

246 # connection URL. 

247 if self._engine.url.password is not None: 

248 uri = str(self._engine.url.set(password="***")) 

249 else: 

250 uri = str(self._engine.url) 

251 if self.namespace: 

252 uri += f"#{self.namespace}" 

253 return f'{type(self).__name__}("{uri}")' 

254 

255 @classmethod 

256 def makeDefaultUri(cls, root: str) -> str | None: 

257 """Create a default connection URI appropriate for the given root 

258 directory, or `None` if there can be no such default. 

259 """ 

260 return None 

261 

262 @classmethod 

263 def fromUri( 

264 cls, 

265 uri: str | sqlalchemy.engine.URL, 

266 *, 

267 origin: int, 

268 namespace: str | None = None, 

269 writeable: bool = True, 

270 ) -> Database: 

271 """Construct a database from a SQLAlchemy URI. 

272 

273 Parameters 

274 ---------- 

275 uri : `str` or `sqlalchemy.engine.URL` 

276 A SQLAlchemy URI connection string. 

277 origin : `int` 

278 An integer ID that should be used as the default for any datasets, 

279 quanta, or other entities that use a (autoincrement, origin) 

280 compound primary key. 

281 namespace : `str`, optional 

282 A database namespace (i.e. schema) the new instance should be 

283 associated with. If `None` (default), the namespace (if any) is 

284 inferred from the URI. 

285 writeable : `bool`, optional 

286 If `True`, allow write operations on the database, including 

287 ``CREATE TABLE``. 

288 

289 Returns 

290 ------- 

291 db : `Database` 

292 A new `Database` instance. 

293 """ 

294 return cls.fromEngine( 

295 cls.makeEngine(uri, writeable=writeable), origin=origin, namespace=namespace, writeable=writeable 

296 ) 

297 

298 @classmethod 

299 @abstractmethod 

300 def makeEngine( 

301 cls, uri: str | sqlalchemy.engine.URL, *, writeable: bool = True 

302 ) -> sqlalchemy.engine.Engine: 

303 """Create a `sqlalchemy.engine.Engine` from a SQLAlchemy URI. 

304 

305 Parameters 

306 ---------- 

307 uri : `str` or `sqlalchemy.engine.URL` 

308 A SQLAlchemy URI connection string. 

309 writeable : `bool`, optional 

310 If `True`, allow write operations on the database, including 

311 ``CREATE TABLE``. 

312 

313 Returns 

314 ------- 

315 engine : `sqlalchemy.engine.Engine` 

316 A database engine. 

317 

318 Notes 

319 ----- 

320 Subclasses that support other ways to connect to a database are 

321 encouraged to add optional arguments to their implementation of this 

322 method, as long as they maintain compatibility with the base class 

323 call signature. 

324 """ 

325 raise NotImplementedError() 

326 

327 @classmethod 

328 @abstractmethod 

329 def fromEngine( 

330 cls, 

331 engine: sqlalchemy.engine.Engine, 

332 *, 

333 origin: int, 

334 namespace: str | None = None, 

335 writeable: bool = True, 

336 ) -> Database: 

337 """Create a new `Database` from an existing `sqlalchemy.engine.Engine`. 

338 

339 Parameters 

340 ---------- 

341 engine : `sqlalchemy.engine.Engine` 

342 The engine for the database. May be shared between `Database` 

343 instances. 

344 origin : `int` 

345 An integer ID that should be used as the default for any datasets, 

346 quanta, or other entities that use a (autoincrement, origin) 

347 compound primary key. 

348 namespace : `str`, optional 

349 A different database namespace (i.e. schema) the new instance 

350 should be associated with. If `None` (default), the namespace 

351 (if any) is inferred from the connection. 

352 writeable : `bool`, optional 

353 If `True`, allow write operations on the database, including 

354 ``CREATE TABLE``. 

355 

356 Returns 

357 ------- 

358 db : `Database` 

359 A new `Database` instance. 

360 

361 Notes 

362 ----- 

363 This method allows different `Database` instances to share the same 

364 engine, which is desirable when they represent different namespaces 

365 can be queried together. 

366 """ 

367 raise NotImplementedError() 

368 

369 @final 

370 @contextmanager 

371 def session(self) -> Iterator[None]: 

372 """Return a context manager that represents a session (persistent 

373 connection to a database). 

374 

375 Returns 

376 ------- 

377 context : `AbstractContextManager` [ `None` ] 

378 A context manager that does not return a value when entered. 

379 

380 Notes 

381 ----- 

382 This method should be used when a sequence of read-only SQL operations 

383 will be performed in rapid succession *without* a requirement that they 

384 yield consistent results in the presence of concurrent writes (or, more 

385 rarely, when conflicting concurrent writes are rare/impossible and the 

386 session will be open long enough that a transaction is inadvisable). 

387 """ 

388 with self._session(): 

389 yield 

390 

391 @final 

392 @contextmanager 

393 def transaction( 

394 self, 

395 *, 

396 interrupting: bool = False, 

397 savepoint: bool = False, 

398 lock: Iterable[sqlalchemy.schema.Table] = (), 

399 for_temp_tables: bool = False, 

400 ) -> Iterator[None]: 

401 """Return a context manager that represents a transaction. 

402 

403 Parameters 

404 ---------- 

405 interrupting : `bool`, optional 

406 If `True` (`False` is default), this transaction block may not be 

407 nested without an outer one, and attempting to do so is a logic 

408 (i.e. assertion) error. 

409 savepoint : `bool`, optional 

410 If `True` (`False` is default), create a `SAVEPOINT`, allowing 

411 exceptions raised by the database (e.g. due to constraint 

412 violations) during this transaction's context to be caught outside 

413 it without also rolling back all operations in an outer transaction 

414 block. If `False`, transactions may still be nested, but a 

415 rollback may be generated at any level and affects all levels, and 

416 commits are deferred until the outermost block completes. If any 

417 outer transaction block was created with ``savepoint=True``, all 

418 inner blocks will be as well (regardless of the actual value 

419 passed). This has no effect if this is the outermost transaction. 

420 lock : `~collections.abc.Iterable` [ `sqlalchemy.schema.Table` ], \ 

421 optional 

422 A list of tables to lock for the duration of this transaction. 

423 These locks are guaranteed to prevent concurrent writes and allow 

424 this transaction (only) to acquire the same locks (others should 

425 block), but only prevent concurrent reads if the database engine 

426 requires that in order to block concurrent writes. 

427 for_temp_tables : `bool`, optional 

428 If `True`, this transaction may involve creating temporary tables. 

429 

430 Returns 

431 ------- 

432 context : `AbstractContextManager` [ `None` ] 

433 A context manager that commits the transaction when it is exited 

434 without error and rolls back the transactoin when it is exited via 

435 an exception. 

436 

437 Notes 

438 ----- 

439 All transactions on a connection managed by one or more `Database` 

440 instances _must_ go through this method, or transaction state will not 

441 be correctly managed. 

442 """ 

443 with self._transaction( 

444 interrupting=interrupting, savepoint=savepoint, lock=lock, for_temp_tables=for_temp_tables 

445 ): 

446 yield 

447 

448 @contextmanager 

449 def temporary_table( 

450 self, spec: ddl.TableSpec, name: str | None = None 

451 ) -> Iterator[sqlalchemy.schema.Table]: 

452 """Return a context manager that creates and then drops a temporary 

453 table. 

454 

455 Parameters 

456 ---------- 

457 spec : `ddl.TableSpec` 

458 Specification for the columns. Unique and foreign key constraints 

459 may be ignored. 

460 name : `str`, optional 

461 If provided, the name of the SQL construct. If not provided, an 

462 opaque but unique identifier is generated. 

463 

464 Returns 

465 ------- 

466 context : `AbstractContextManager` [ `sqlalchemy.schema.Table` ] 

467 A context manager that returns a SQLAlchemy representation of the 

468 temporary table when entered. 

469 

470 Notes 

471 ----- 

472 Temporary tables may be created, dropped, and written to even in 

473 read-only databases - at least according to the Python-level 

474 protections in the `Database` classes. Server permissions may say 

475 otherwise, but in that case they probably need to be modified to 

476 support the full range of expected read-only butler behavior. 

477 """ 

478 with self._session() as connection: 

479 table = self._make_temporary_table(connection, spec=spec, name=name) 

480 self._temp_tables.add(table.key) 

481 try: 

482 yield table 

483 finally: 

484 with self._transaction(): 

485 table.drop(connection) 

486 self._temp_tables.remove(table.key) 

487 

488 @contextmanager 

489 def _session(self) -> Iterator[sqlalchemy.engine.Connection]: 

490 """Protected implementation for `session` that actually returns the 

491 connection. 

492 

493 This method is for internal `Database` calls that need the actual 

494 SQLAlchemy connection object. It should be overridden by subclasses 

495 instead of `session` itself. 

496 

497 Returns 

498 ------- 

499 context : `AbstractContextManager` [ `sqlalchemy.engine.Connection` ] 

500 A context manager that returns a SQLALchemy connection when 

501 entered. 

502 

503 """ 

504 if self._session_connection is not None: 

505 # session already started, just reuse that 

506 yield self._session_connection 

507 else: 

508 try: 

509 # open new connection and close it when done 

510 self._session_connection = self._engine.connect() 

511 yield self._session_connection 

512 finally: 

513 if self._session_connection is not None: 

514 self._session_connection.close() 

515 self._session_connection = None 

516 # Temporary tables only live within session 

517 self._temp_tables = set() 

518 

519 @contextmanager 

520 def _transaction( 

521 self, 

522 *, 

523 interrupting: bool = False, 

524 savepoint: bool = False, 

525 lock: Iterable[sqlalchemy.schema.Table] = (), 

526 for_temp_tables: bool = False, 

527 ) -> Iterator[tuple[bool, sqlalchemy.engine.Connection]]: 

528 """Protected implementation for `transaction` that actually returns the 

529 connection and whether this is a new outermost transaction. 

530 

531 This method is for internal `Database` calls that need the actual 

532 SQLAlchemy connection object. It should be overridden by subclasses 

533 instead of `transaction` itself. 

534 

535 Parameters 

536 ---------- 

537 interrupting : `bool`, optional 

538 If `True` (`False` is default), this transaction block may not be 

539 nested without an outer one, and attempting to do so is a logic 

540 (i.e. assertion) error. 

541 savepoint : `bool`, optional 

542 If `True` (`False` is default), create a `SAVEPOINT`, allowing 

543 exceptions raised by the database (e.g. due to constraint 

544 violations) during this transaction's context to be caught outside 

545 it without also rolling back all operations in an outer transaction 

546 block. If `False`, transactions may still be nested, but a 

547 rollback may be generated at any level and affects all levels, and 

548 commits are deferred until the outermost block completes. If any 

549 outer transaction block was created with ``savepoint=True``, all 

550 inner blocks will be as well (regardless of the actual value 

551 passed). This has no effect if this is the outermost transaction. 

552 lock : `~collections.abc.Iterable` [ `sqlalchemy.schema.Table` ], \ 

553 optional 

554 A list of tables to lock for the duration of this transaction. 

555 These locks are guaranteed to prevent concurrent writes and allow 

556 this transaction (only) to acquire the same locks (others should 

557 block), but only prevent concurrent reads if the database engine 

558 requires that in order to block concurrent writes. 

559 for_temp_tables : `bool`, optional 

560 If `True`, this transaction may involve creating temporary tables. 

561 

562 Returns 

563 ------- 

564 context : `AbstractContextManager` [ `tuple` [ `bool`, 

565 `sqlalchemy.engine.Connection` ] ] 

566 A context manager that commits the transaction when it is exited 

567 without error and rolls back the transactoin when it is exited via 

568 an exception. When entered, it returns a tuple of: 

569 

570 - ``is_new`` (`bool`): whether this is a new (outermost) 

571 transaction; 

572 - ``connection`` (`sqlalchemy.engine.Connection`): the connection. 

573 """ 

574 with self._session() as connection: 

575 already_in_transaction = connection.in_transaction() 

576 assert not (interrupting and already_in_transaction), ( 

577 "Logic error in transaction nesting: an operation that would " 

578 "interrupt the active transaction context has been requested." 

579 ) 

580 savepoint = savepoint or connection.in_nested_transaction() 

581 trans: sqlalchemy.engine.Transaction | None 

582 if already_in_transaction: 

583 if savepoint: 

584 trans = connection.begin_nested() 

585 else: 

586 # Nested non-savepoint transactions don't do anything. 

587 trans = None 

588 else: 

589 # Use a regular (non-savepoint) transaction always for the 

590 # outermost context. 

591 trans = connection.begin() 

592 self._lockTables(connection, lock) 

593 try: 

594 yield not already_in_transaction, connection 

595 if trans is not None: 

596 trans.commit() 

597 except BaseException: 

598 if trans is not None: 

599 trans.rollback() 

600 raise 

601 

602 @abstractmethod 

603 def _lockTables( 

604 self, connection: sqlalchemy.engine.Connection, tables: Iterable[sqlalchemy.schema.Table] = () 

605 ) -> None: 

606 """Acquire locks on the given tables. 

607 

608 This is an implementation hook for subclasses, called by `transaction`. 

609 It should not be called directly by other code. 

610 

611 Parameters 

612 ---------- 

613 connection : `sqlalchemy.engine.Connection` 

614 Database connection object. It is guaranteed that transaction is 

615 already in a progress for this connection. 

616 tables : `~collections.abc.Iterable` [ `sqlalchemy.schema.Table` ], \ 

617 optional 

618 A list of tables to lock for the duration of this transaction. 

619 These locks are guaranteed to prevent concurrent writes and allow 

620 this transaction (only) to acquire the same locks (others should 

621 block), but only prevent concurrent reads if the database engine 

622 requires that in order to block concurrent writes. 

623 """ 

624 raise NotImplementedError() 

625 

626 def isTableWriteable(self, table: sqlalchemy.schema.Table) -> bool: 

627 """Check whether a table is writeable, either because the database 

628 connection is read-write or the table is a temporary table. 

629 

630 Parameters 

631 ---------- 

632 table : `sqlalchemy.schema.Table` 

633 SQLAlchemy table object to check. 

634 

635 Returns 

636 ------- 

637 writeable : `bool` 

638 Whether this table is writeable. 

639 """ 

640 return self.isWriteable() or table.key in self._temp_tables 

641 

642 def assertTableWriteable(self, table: sqlalchemy.schema.Table, msg: str) -> None: 

643 """Raise if the given table is not writeable, either because the 

644 database connection is read-write or the table is a temporary table. 

645 

646 Parameters 

647 ---------- 

648 table : `sqlalchemy.schema.Table` 

649 SQLAlchemy table object to check. 

650 msg : `str`, optional 

651 If provided, raise `ReadOnlyDatabaseError` instead of returning 

652 `False`, with this message. 

653 """ 

654 if not self.isTableWriteable(table): 

655 raise ReadOnlyDatabaseError(msg) 

656 

657 @contextmanager 

658 def declareStaticTables(self, *, create: bool) -> Iterator[StaticTablesContext]: 

659 """Return a context manager in which the database's static DDL schema 

660 can be declared. 

661 

662 Parameters 

663 ---------- 

664 create : `bool` 

665 If `True`, attempt to create all tables at the end of the context. 

666 If `False`, they will be assumed to already exist. 

667 

668 Returns 

669 ------- 

670 schema : `StaticTablesContext` 

671 A helper object that is used to add new tables. 

672 

673 Raises 

674 ------ 

675 ReadOnlyDatabaseError 

676 Raised if ``create`` is `True`, `Database.isWriteable` is `False`, 

677 and one or more declared tables do not already exist. 

678 

679 Examples 

680 -------- 

681 Given a `Database` instance ``db``:: 

682 

683 with db.declareStaticTables(create=True) as schema: 

684 schema.addTable("table1", TableSpec(...)) 

685 schema.addTable("table2", TableSpec(...)) 

686 

687 Notes 

688 ----- 

689 A database's static DDL schema must be declared before any dynamic 

690 tables are managed via calls to `ensureTableExists` or 

691 `getExistingTable`. The order in which static schema tables are added 

692 inside the context block is unimportant; they will automatically be 

693 sorted and added in an order consistent with their foreign key 

694 relationships. 

695 """ 

696 if create and not self.isWriteable(): 

697 raise ReadOnlyDatabaseError(f"Cannot create tables in read-only database {self}.") 

698 self._metadata = sqlalchemy.MetaData(schema=self.namespace) 

699 try: 

700 with self._transaction() as (_, connection): 

701 context = StaticTablesContext(self, connection) 

702 if create and context._tableNames: 

703 # Looks like database is already initalized, to avoid 

704 # danger of modifying/destroying valid schema we refuse to 

705 # do anything in this case 

706 raise SchemaAlreadyDefinedError(f"Cannot create tables in non-empty database {self}.") 

707 yield context 

708 for table, foreignKey in context._foreignKeys: 

709 table.append_constraint(foreignKey) 

710 if create: 

711 if ( 

712 self.namespace is not None 

713 and self.namespace not in context._inspector.get_schema_names() 

714 ): 

715 connection.execute(sqlalchemy.schema.CreateSchema(self.namespace)) 

716 # In our tables we have columns that make use of sqlalchemy 

717 # Sequence objects. There is currently a bug in sqlalchemy 

718 # that causes a deprecation warning to be thrown on a 

719 # property of the Sequence object when the repr for the 

720 # sequence is created. Here a filter is used to catch these 

721 # deprecation warnings when tables are created. 

722 with warnings.catch_warnings(): 

723 warnings.simplefilter("ignore", category=sqlalchemy.exc.SADeprecationWarning) 

724 self._metadata.create_all(connection) 

725 # call all initializer methods sequentially 

726 for init in context._initializers: 

727 init(self) 

728 except BaseException: 

729 self._metadata = None 

730 raise 

731 

732 @abstractmethod 

733 def isWriteable(self) -> bool: 

734 """Return `True` if this database can be modified by this client.""" 

735 raise NotImplementedError() 

736 

737 @abstractmethod 

738 def __str__(self) -> str: 

739 """Return a human-readable identifier for this `Database`, including 

740 any namespace or schema that identifies its names within a `Registry`. 

741 """ 

742 raise NotImplementedError() 

743 

744 @property 

745 def dialect(self) -> sqlalchemy.engine.Dialect: 

746 """The SQLAlchemy dialect for this database engine 

747 (`sqlalchemy.engine.Dialect`). 

748 """ 

749 return self._engine.dialect 

750 

751 def shrinkDatabaseEntityName(self, original: str) -> str: 

752 """Return a version of the given name that fits within this database 

753 engine's length limits for table, constraint, indexes, and sequence 

754 names. 

755 

756 Implementations should not assume that simple truncation is safe, 

757 because multiple long names often begin with the same prefix. 

758 

759 The default implementation simply returns the given name. 

760 

761 Parameters 

762 ---------- 

763 original : `str` 

764 The original name. 

765 

766 Returns 

767 ------- 

768 shrunk : `str` 

769 The new, possibly shortened name. 

770 """ 

771 return original 

772 

773 def expandDatabaseEntityName(self, shrunk: str) -> str: 

774 """Retrieve the original name for a database entity that was too long 

775 to fit within the database engine's limits. 

776 

777 Parameters 

778 ---------- 

779 original : `str` 

780 The original name. 

781 

782 Returns 

783 ------- 

784 shrunk : `str` 

785 The new, possibly shortened name. 

786 """ 

787 return shrunk 

788 

789 def _mangleTableName(self, name: str) -> str: 

790 """Map a logical, user-visible table name to the true table name used 

791 in the database. 

792 

793 The default implementation returns the given name unchanged. 

794 

795 Parameters 

796 ---------- 

797 name : `str` 

798 Input table name. Should not include a namespace (i.e. schema) 

799 prefix. 

800 

801 Returns 

802 ------- 

803 mangled : `str` 

804 Mangled version of the table name (still with no namespace prefix). 

805 

806 Notes 

807 ----- 

808 Reimplementations of this method must be idempotent - mangling an 

809 already-mangled name must have no effect. 

810 """ 

811 return name 

812 

813 def _makeColumnConstraints(self, table: str, spec: ddl.FieldSpec) -> list[sqlalchemy.CheckConstraint]: 

814 """Create constraints based on this spec. 

815 

816 Parameters 

817 ---------- 

818 table : `str` 

819 Name of the table this column is being added to. 

820 spec : `FieldSpec` 

821 Specification for the field to be added. 

822 

823 Returns 

824 ------- 

825 constraint : `list` of `sqlalchemy.CheckConstraint` 

826 Constraint added for this column. 

827 """ 

828 # By default we return no additional constraints 

829 return [] 

830 

831 def _convertFieldSpec( 

832 self, table: str, spec: ddl.FieldSpec, metadata: sqlalchemy.MetaData, **kwargs: Any 

833 ) -> sqlalchemy.schema.Column: 

834 """Convert a `FieldSpec` to a `sqlalchemy.schema.Column`. 

835 

836 Parameters 

837 ---------- 

838 table : `str` 

839 Name of the table this column is being added to. 

840 spec : `FieldSpec` 

841 Specification for the field to be added. 

842 metadata : `sqlalchemy.MetaData` 

843 SQLAlchemy representation of the DDL schema this field's table is 

844 being added to. 

845 **kwargs 

846 Additional keyword arguments to forward to the 

847 `sqlalchemy.schema.Column` constructor. This is provided to make 

848 it easier for derived classes to delegate to ``super()`` while 

849 making only minor changes. 

850 

851 Returns 

852 ------- 

853 column : `sqlalchemy.schema.Column` 

854 SQLAlchemy representation of the field. 

855 """ 

856 args = [] 

857 if spec.autoincrement: 

858 # Generate a sequence to use for auto incrementing for databases 

859 # that do not support it natively. This will be ignored by 

860 # sqlalchemy for databases that do support it. 

861 args.append( 

862 sqlalchemy.Sequence( 

863 self.shrinkDatabaseEntityName(f"{table}_seq_{spec.name}"), metadata=metadata 

864 ) 

865 ) 

866 assert spec.doc is None or isinstance(spec.doc, str), f"Bad doc for {table}.{spec.name}." 

867 return sqlalchemy.schema.Column( 

868 spec.name, 

869 spec.getSizedColumnType(), 

870 *args, 

871 nullable=spec.nullable, 

872 primary_key=spec.primaryKey, 

873 comment=spec.doc, 

874 server_default=spec.default, 

875 **kwargs, 

876 ) 

877 

878 def _convertForeignKeySpec( 

879 self, table: str, spec: ddl.ForeignKeySpec, metadata: sqlalchemy.MetaData, **kwargs: Any 

880 ) -> sqlalchemy.schema.ForeignKeyConstraint: 

881 """Convert a `ForeignKeySpec` to a 

882 `sqlalchemy.schema.ForeignKeyConstraint`. 

883 

884 Parameters 

885 ---------- 

886 table : `str` 

887 Name of the table this foreign key is being added to. 

888 spec : `ForeignKeySpec` 

889 Specification for the foreign key to be added. 

890 metadata : `sqlalchemy.MetaData` 

891 SQLAlchemy representation of the DDL schema this constraint is 

892 being added to. 

893 **kwargs 

894 Additional keyword arguments to forward to the 

895 `sqlalchemy.schema.ForeignKeyConstraint` constructor. This is 

896 provided to make it easier for derived classes to delegate to 

897 ``super()`` while making only minor changes. 

898 

899 Returns 

900 ------- 

901 constraint : `sqlalchemy.schema.ForeignKeyConstraint` 

902 SQLAlchemy representation of the constraint. 

903 """ 

904 name = self.shrinkDatabaseEntityName( 

905 "_".join( 

906 ["fkey", table, self._mangleTableName(spec.table)] + list(spec.target) + list(spec.source) 

907 ) 

908 ) 

909 return sqlalchemy.schema.ForeignKeyConstraint( 

910 spec.source, 

911 [f"{self._mangleTableName(spec.table)}.{col}" for col in spec.target], 

912 name=name, 

913 ondelete=spec.onDelete, 

914 ) 

915 

916 def _convertExclusionConstraintSpec( 

917 self, 

918 table: str, 

919 spec: tuple[str | type[TimespanDatabaseRepresentation], ...], 

920 metadata: sqlalchemy.MetaData, 

921 ) -> sqlalchemy.schema.Constraint: 

922 """Convert a `tuple` from `ddl.TableSpec.exclusion` into a SQLAlchemy 

923 constraint representation. 

924 

925 Parameters 

926 ---------- 

927 table : `str` 

928 Name of the table this constraint is being added to. 

929 spec : `tuple` [ `str` or `type` ] 

930 A tuple of `str` column names and the `type` object returned by 

931 `getTimespanRepresentation` (which must appear exactly once), 

932 indicating the order of the columns in the index used to back the 

933 constraint. 

934 metadata : `sqlalchemy.MetaData` 

935 SQLAlchemy representation of the DDL schema this constraint is 

936 being added to. 

937 

938 Returns 

939 ------- 

940 constraint : `sqlalchemy.schema.Constraint` 

941 SQLAlchemy representation of the constraint. 

942 

943 Raises 

944 ------ 

945 NotImplementedError 

946 Raised if this database does not support exclusion constraints. 

947 """ 

948 raise NotImplementedError(f"Database {self} does not support exclusion constraints.") 

949 

950 def _convertTableSpec( 

951 self, name: str, spec: ddl.TableSpec, metadata: sqlalchemy.MetaData, **kwargs: Any 

952 ) -> sqlalchemy.schema.Table: 

953 """Convert a `TableSpec` to a `sqlalchemy.schema.Table`. 

954 

955 Parameters 

956 ---------- 

957 spec : `TableSpec` 

958 Specification for the foreign key to be added. 

959 metadata : `sqlalchemy.MetaData` 

960 SQLAlchemy representation of the DDL schema this table is being 

961 added to. 

962 **kwargs 

963 Additional keyword arguments to forward to the 

964 `sqlalchemy.schema.Table` constructor. This is provided to make it 

965 easier for derived classes to delegate to ``super()`` while making 

966 only minor changes. 

967 

968 Returns 

969 ------- 

970 table : `sqlalchemy.schema.Table` 

971 SQLAlchemy representation of the table. 

972 

973 Notes 

974 ----- 

975 This method does not handle ``spec.foreignKeys`` at all, in order to 

976 avoid circular dependencies. These are added by higher-level logic in 

977 `ensureTableExists`, `getExistingTable`, and `declareStaticTables`. 

978 """ 

979 name = self._mangleTableName(name) 

980 args: list[sqlalchemy.schema.SchemaItem] = [ 

981 self._convertFieldSpec(name, fieldSpec, metadata) for fieldSpec in spec.fields 

982 ] 

983 

984 # Add any column constraints 

985 for fieldSpec in spec.fields: 

986 args.extend(self._makeColumnConstraints(name, fieldSpec)) 

987 

988 # Track indexes added for primary key and unique constraints, to make 

989 # sure we don't add duplicate explicit or foreign key indexes for 

990 # those. 

991 allIndexes = {tuple(fieldSpec.name for fieldSpec in spec.fields if fieldSpec.primaryKey)} 

992 args.extend( 

993 sqlalchemy.schema.UniqueConstraint( 

994 *columns, name=self.shrinkDatabaseEntityName("_".join([name, "unq"] + list(columns))) 

995 ) 

996 for columns in spec.unique 

997 ) 

998 allIndexes.update(spec.unique) 

999 args.extend( 

1000 sqlalchemy.schema.Index( 

1001 self.shrinkDatabaseEntityName("_".join([name, "idx"] + list(index.columns))), 

1002 *index.columns, 

1003 unique=(index.columns in spec.unique), 

1004 **index.kwargs, 

1005 ) 

1006 for index in spec.indexes 

1007 if index.columns not in allIndexes 

1008 ) 

1009 allIndexes.update(index.columns for index in spec.indexes) 

1010 args.extend( 

1011 sqlalchemy.schema.Index( 

1012 self.shrinkDatabaseEntityName("_".join((name, "fkidx") + fk.source)), 

1013 *fk.source, 

1014 ) 

1015 for fk in spec.foreignKeys 

1016 if fk.addIndex and fk.source not in allIndexes 

1017 ) 

1018 

1019 args.extend(self._convertExclusionConstraintSpec(name, excl, metadata) for excl in spec.exclusion) 

1020 

1021 assert spec.doc is None or isinstance(spec.doc, str), f"Bad doc for {name}." 

1022 return sqlalchemy.schema.Table(name, metadata, *args, comment=spec.doc, info={"spec": spec}, **kwargs) 

1023 

1024 def ensureTableExists(self, name: str, spec: ddl.TableSpec) -> sqlalchemy.schema.Table: 

1025 """Ensure that a table with the given name and specification exists, 

1026 creating it if necessary. 

1027 

1028 Parameters 

1029 ---------- 

1030 name : `str` 

1031 Name of the table (not including namespace qualifiers). 

1032 spec : `TableSpec` 

1033 Specification for the table. This will be used when creating the 

1034 table, and *may* be used when obtaining an existing table to check 

1035 for consistency, but no such check is guaranteed. 

1036 

1037 Returns 

1038 ------- 

1039 table : `sqlalchemy.schema.Table` 

1040 SQLAlchemy representation of the table. 

1041 

1042 Raises 

1043 ------ 

1044 ReadOnlyDatabaseError 

1045 Raised if `isWriteable` returns `False`, and the table does not 

1046 already exist. 

1047 DatabaseConflictError 

1048 Raised if the table exists but ``spec`` is inconsistent with its 

1049 definition. 

1050 

1051 Notes 

1052 ----- 

1053 This method may not be called within transactions. It may be called on 

1054 read-only databases if and only if the table does in fact already 

1055 exist. 

1056 

1057 Subclasses may override this method, but usually should not need to. 

1058 """ 

1059 # TODO: if _engine is used to make a table then it uses separate 

1060 # connection and should not interfere with current transaction 

1061 assert ( 

1062 self._session_connection is None or not self._session_connection.in_transaction() 

1063 ), "Table creation interrupts transactions." 

1064 assert self._metadata is not None, "Static tables must be declared before dynamic tables." 

1065 table = self.getExistingTable(name, spec) 

1066 if table is not None: 

1067 return table 

1068 if not self.isWriteable(): 

1069 raise ReadOnlyDatabaseError( 

1070 f"Table {name} does not exist, and cannot be created because database {self} is read-only." 

1071 ) 

1072 table = self._convertTableSpec(name, spec, self._metadata) 

1073 for foreignKeySpec in spec.foreignKeys: 

1074 table.append_constraint(self._convertForeignKeySpec(name, foreignKeySpec, self._metadata)) 

1075 try: 

1076 with self._transaction() as (_, connection): 

1077 table.create(connection) 

1078 except sqlalchemy.exc.DatabaseError: 

1079 # Some other process could have created the table meanwhile, which 

1080 # usually causes OperationalError or ProgrammingError. We cannot 

1081 # use IF NOT EXISTS clause in this case due to PostgreSQL race 

1082 # condition on server side which causes IntegrityError. Instead we 

1083 # catch these exceptions (they all inherit DatabaseError) and 

1084 # re-check whether table is now there. 

1085 table = self.getExistingTable(name, spec) 

1086 if table is None: 

1087 raise 

1088 return table 

1089 

1090 def getExistingTable(self, name: str, spec: ddl.TableSpec) -> sqlalchemy.schema.Table | None: 

1091 """Obtain an existing table with the given name and specification. 

1092 

1093 Parameters 

1094 ---------- 

1095 name : `str` 

1096 Name of the table (not including namespace qualifiers). 

1097 spec : `TableSpec` 

1098 Specification for the table. This will be used when creating the 

1099 SQLAlchemy representation of the table, and it is used to 

1100 check that the actual table in the database is consistent. 

1101 

1102 Returns 

1103 ------- 

1104 table : `sqlalchemy.schema.Table` or `None` 

1105 SQLAlchemy representation of the table, or `None` if it does not 

1106 exist. 

1107 

1108 Raises 

1109 ------ 

1110 DatabaseConflictError 

1111 Raised if the table exists but ``spec`` is inconsistent with its 

1112 definition. 

1113 

1114 Notes 

1115 ----- 

1116 This method can be called within transactions and never modifies the 

1117 database. 

1118 

1119 Subclasses may override this method, but usually should not need to. 

1120 """ 

1121 assert self._metadata is not None, "Static tables must be declared before dynamic tables." 

1122 name = self._mangleTableName(name) 

1123 table = self._metadata.tables.get(name if self.namespace is None else f"{self.namespace}.{name}") 

1124 if table is not None: 

1125 if spec.fields.names != set(table.columns.keys()): 

1126 raise DatabaseConflictError( 

1127 f"Table '{name}' has already been defined differently; the new " 

1128 f"specification has columns {list(spec.fields.names)}, while " 

1129 f"the previous definition has {list(table.columns.keys())}." 

1130 ) 

1131 else: 

1132 inspector = sqlalchemy.inspect( 

1133 self._engine if self._session_connection is None else self._session_connection, raiseerr=True 

1134 ) 

1135 if name in inspector.get_table_names(schema=self.namespace): 

1136 _checkExistingTableDefinition(name, spec, inspector.get_columns(name, schema=self.namespace)) 

1137 table = self._convertTableSpec(name, spec, self._metadata) 

1138 for foreignKeySpec in spec.foreignKeys: 

1139 table.append_constraint(self._convertForeignKeySpec(name, foreignKeySpec, self._metadata)) 

1140 return table 

1141 return table 

1142 

1143 def _make_temporary_table( 

1144 self, 

1145 connection: sqlalchemy.engine.Connection, 

1146 spec: ddl.TableSpec, 

1147 name: str | None = None, 

1148 **kwargs: Any, 

1149 ) -> sqlalchemy.schema.Table: 

1150 """Create a temporary table. 

1151 

1152 Parameters 

1153 ---------- 

1154 connection : `sqlalchemy.engine.Connection` 

1155 Connection to use when creating the table. 

1156 spec : `TableSpec` 

1157 Specification for the table. 

1158 name : `str`, optional 

1159 A unique (within this session/connetion) name for the table. 

1160 Subclasses may override to modify the actual name used. If not 

1161 provided, a unique name will be generated. 

1162 **kwargs 

1163 Additional keyword arguments to forward to the 

1164 `sqlalchemy.schema.Table` constructor. This is provided to make it 

1165 easier for derived classes to delegate to ``super()`` while making 

1166 only minor changes. 

1167 

1168 Returns 

1169 ------- 

1170 table : `sqlalchemy.schema.Table` 

1171 SQLAlchemy representation of the table. 

1172 """ 

1173 if name is None: 

1174 name = f"tmp_{uuid.uuid4().hex}" 

1175 metadata = self._metadata 

1176 if metadata is None: 

1177 raise RuntimeError("Cannot create temporary table before static schema is defined.") 

1178 table = self._convertTableSpec( 

1179 name, spec, metadata, prefixes=["TEMPORARY"], schema=sqlalchemy.schema.BLANK_SCHEMA, **kwargs 

1180 ) 

1181 if table.key in self._temp_tables and table.key != name: 

1182 raise ValueError( 

1183 f"A temporary table with name {name} (transformed to {table.key} by " 

1184 "Database) already exists." 

1185 ) 

1186 for foreignKeySpec in spec.foreignKeys: 

1187 table.append_constraint(self._convertForeignKeySpec(name, foreignKeySpec, metadata)) 

1188 with self._transaction(): 

1189 table.create(connection) 

1190 return table 

1191 

1192 @classmethod 

1193 def getTimespanRepresentation(cls) -> type[TimespanDatabaseRepresentation]: 

1194 """Return a `type` that encapsulates the way `Timespan` objects are 

1195 stored in this database. 

1196 

1197 `Database` does not automatically use the return type of this method 

1198 anywhere else; calling code is responsible for making sure that DDL 

1199 and queries are consistent with it. 

1200 

1201 Returns 

1202 ------- 

1203 TimespanReprClass : `type` (`TimespanDatabaseRepresention` subclass) 

1204 A type that encapsulates the way `Timespan` objects should be 

1205 stored in this database. 

1206 

1207 Notes 

1208 ----- 

1209 There are two big reasons we've decided to keep timespan-mangling logic 

1210 outside the `Database` implementations, even though the choice of 

1211 representation is ultimately up to a `Database` implementation: 

1212 

1213 - Timespans appear in relatively few tables and queries in our 

1214 typical usage, and the code that operates on them is already aware 

1215 that it is working with timespans. In contrast, a 

1216 timespan-representation-aware implementation of, say, `insert`, 

1217 would need to have extra logic to identify when timespan-mangling 

1218 needed to occur, which would usually be useless overhead. 

1219 

1220 - SQLAlchemy's rich SELECT query expression system has no way to wrap 

1221 multiple columns in a single expression object (the ORM does, but 

1222 we are not using the ORM). So we would have to wrap _much_ more of 

1223 that code in our own interfaces to encapsulate timespan 

1224 representations there. 

1225 """ 

1226 return TimespanDatabaseRepresentation.Compound 

1227 

1228 def sync( 

1229 self, 

1230 table: sqlalchemy.schema.Table, 

1231 *, 

1232 keys: dict[str, Any], 

1233 compared: dict[str, Any] | None = None, 

1234 extra: dict[str, Any] | None = None, 

1235 returning: Sequence[str] | None = None, 

1236 update: bool = False, 

1237 ) -> tuple[dict[str, Any] | None, bool | dict[str, Any]]: 

1238 """Insert into a table as necessary to ensure database contains 

1239 values equivalent to the given ones. 

1240 

1241 Parameters 

1242 ---------- 

1243 table : `sqlalchemy.schema.Table` 

1244 Table to be queried and possibly inserted into. 

1245 keys : `dict` 

1246 Column name-value pairs used to search for an existing row; must 

1247 be a combination that can be used to select a single row if one 

1248 exists. If such a row does not exist, these values are used in 

1249 the insert. 

1250 compared : `dict`, optional 

1251 Column name-value pairs that are compared to those in any existing 

1252 row. If such a row does not exist, these rows are used in the 

1253 insert. 

1254 extra : `dict`, optional 

1255 Column name-value pairs that are ignored if a matching row exists, 

1256 but used in an insert if one is necessary. 

1257 returning : `~collections.abc.Sequence` of `str`, optional 

1258 The names of columns whose values should be returned. 

1259 update : `bool`, optional 

1260 If `True` (`False` is default), update the existing row with the 

1261 values in ``compared`` instead of raising `DatabaseConflictError`. 

1262 

1263 Returns 

1264 ------- 

1265 row : `dict`, optional 

1266 The value of the fields indicated by ``returning``, or `None` if 

1267 ``returning`` is `None`. 

1268 inserted_or_updated : `bool` or `dict` 

1269 If `True`, a new row was inserted; if `False`, a matching row 

1270 already existed. If a `dict` (only possible if ``update=True``), 

1271 then an existing row was updated, and the dict maps the names of 

1272 the updated columns to their *old* values (new values can be 

1273 obtained from ``compared``). 

1274 

1275 Raises 

1276 ------ 

1277 DatabaseConflictError 

1278 Raised if the values in ``compared`` do not match the values in the 

1279 database. 

1280 ReadOnlyDatabaseError 

1281 Raised if `isWriteable` returns `False`, and no matching record 

1282 already exists. 

1283 

1284 Notes 

1285 ----- 

1286 May be used inside transaction contexts, so implementations may not 

1287 perform operations that interrupt transactions. 

1288 

1289 It may be called on read-only databases if and only if the matching row 

1290 does in fact already exist. 

1291 """ 

1292 

1293 def check() -> tuple[int, dict[str, Any] | None, list | None]: 

1294 """Query for a row that matches the ``key`` argument, and compare 

1295 to what was given by the caller. 

1296 

1297 Returns 

1298 ------- 

1299 n : `int` 

1300 Number of matching rows. ``n != 1`` is always an error, but 

1301 it's a different kind of error depending on where `check` is 

1302 being called. 

1303 bad : `dict` or `None` 

1304 The subset of the keys of ``compared`` for which the existing 

1305 values did not match the given one, mapped to the existing 

1306 values in the database. Once again, ``not bad`` is always an 

1307 error, but a different kind on context. `None` if ``n != 1`` 

1308 result : `list` or `None` 

1309 Results in the database that correspond to the columns given 

1310 in ``returning``, or `None` if ``returning is None``. 

1311 """ 

1312 toSelect: set[str] = set() 

1313 if compared is not None: 

1314 toSelect.update(compared.keys()) 

1315 if returning is not None: 

1316 toSelect.update(returning) 

1317 if not toSelect: 

1318 # Need to select some column, even if we just want to see 

1319 # how many rows we get back. 

1320 toSelect.add(next(iter(keys.keys()))) 

1321 selectSql = ( 

1322 sqlalchemy.sql.select(*[table.columns[k].label(k) for k in toSelect]) 

1323 .select_from(table) 

1324 .where(sqlalchemy.sql.and_(*[table.columns[k] == v for k, v in keys.items()])) 

1325 ) 

1326 with self._transaction() as (_, connection): 

1327 fetched = list(connection.execute(selectSql).mappings()) 

1328 if len(fetched) != 1: 

1329 return len(fetched), None, None 

1330 existing = fetched[0] 

1331 if compared is not None: 

1332 

1333 def safeNotEqual(a: Any, b: Any) -> bool: 

1334 if isinstance(a, astropy.time.Time): 

1335 return not time_utils.TimeConverter().times_equal(a, b) 

1336 return a != b 

1337 

1338 inconsistencies = { 

1339 k: existing[k] for k, v in compared.items() if safeNotEqual(existing[k], v) 

1340 } 

1341 else: 

1342 inconsistencies = {} 

1343 if returning is not None: 

1344 toReturn: list | None = [existing[k] for k in returning] 

1345 else: 

1346 toReturn = None 

1347 return 1, inconsistencies, toReturn 

1348 

1349 def format_bad(inconsistencies: dict[str, Any]) -> str: 

1350 """Format the 'bad' dictionary of existing values returned by 

1351 ``check`` into a string suitable for an error message. 

1352 """ 

1353 assert compared is not None, "Should not be able to get inconsistencies without comparing." 

1354 return ", ".join(f"{k}: {v!r} != {compared[k]!r}" for k, v in inconsistencies.items()) 

1355 

1356 if self.isTableWriteable(table): 

1357 # Try an insert first, but allow it to fail (in only specific 

1358 # ways). 

1359 row = keys.copy() 

1360 if compared is not None: 

1361 row.update(compared) 

1362 if extra is not None: 

1363 row.update(extra) 

1364 with self.transaction(): 

1365 inserted = bool(self.ensure(table, row)) 

1366 inserted_or_updated: bool | dict[str, Any] 

1367 # Need to perform check() for this branch inside the 

1368 # transaction, so we roll back an insert that didn't do 

1369 # what we expected. That limits the extent to which we 

1370 # can reduce duplication between this block and the other 

1371 # ones that perform similar logic. 

1372 n, bad, result = check() 

1373 if n < 1: 

1374 raise ConflictingDefinitionError( 

1375 f"Attempted to ensure {row} exists by inserting it with ON CONFLICT IGNORE, " 

1376 f"but a post-insert query on {keys} returned no results. " 

1377 f"Insert was {'' if inserted else 'not '}reported as successful. " 

1378 "This can occur if the insert violated a database constraint other than the " 

1379 "unique constraint or primary key used to identify the row in this call." 

1380 ) 

1381 elif n > 1: 

1382 raise RuntimeError( 

1383 f"Keys passed to sync {keys.keys()} do not comprise a " 

1384 f"unique constraint for table {table.name}." 

1385 ) 

1386 elif bad: 

1387 assert ( 

1388 compared is not None 

1389 ), "Should not be able to get inconsistencies without comparing." 

1390 if inserted: 

1391 raise RuntimeError( 

1392 f"Conflict ({bad}) in sync after successful insert; this is " 

1393 "possible if the same table is being updated by a concurrent " 

1394 "process that isn't using sync, but it may also be a bug in " 

1395 "daf_butler." 

1396 ) 

1397 elif update: 

1398 with self._transaction() as (_, connection): 

1399 connection.execute( 

1400 table.update() 

1401 .where(sqlalchemy.sql.and_(*[table.columns[k] == v for k, v in keys.items()])) 

1402 .values(**{k: compared[k] for k in bad}) 

1403 ) 

1404 inserted_or_updated = bad 

1405 else: 

1406 raise DatabaseConflictError( 

1407 f"Conflict in sync for table {table.name} on column(s) {format_bad(bad)}." 

1408 ) 

1409 else: 

1410 inserted_or_updated = inserted 

1411 else: 

1412 # Database is not writeable; just see if the row exists. 

1413 n, bad, result = check() 

1414 if n < 1: 

1415 raise ReadOnlyDatabaseError("sync needs to insert, but database is read-only.") 

1416 elif n > 1: 

1417 raise RuntimeError("Keys passed to sync do not comprise a unique constraint.") 

1418 elif bad: 

1419 if update: 

1420 raise ReadOnlyDatabaseError("sync needs to update, but database is read-only.") 

1421 else: 

1422 raise DatabaseConflictError( 

1423 f"Conflict in sync for table {table.name} on column(s) {format_bad(bad)}." 

1424 ) 

1425 inserted_or_updated = False 

1426 if returning is None: 

1427 return None, inserted_or_updated 

1428 else: 

1429 assert result is not None 

1430 return dict(zip(returning, result, strict=True)), inserted_or_updated 

1431 

1432 def insert( 

1433 self, 

1434 table: sqlalchemy.schema.Table, 

1435 *rows: dict, 

1436 returnIds: bool = False, 

1437 select: sqlalchemy.sql.expression.SelectBase | None = None, 

1438 names: Iterable[str] | None = None, 

1439 ) -> list[int] | None: 

1440 """Insert one or more rows into a table, optionally returning 

1441 autoincrement primary key values. 

1442 

1443 Parameters 

1444 ---------- 

1445 table : `sqlalchemy.schema.Table` 

1446 Table rows should be inserted into. 

1447 returnIds: `bool` 

1448 If `True` (`False` is default), return the values of the table's 

1449 autoincrement primary key field (which much exist). 

1450 select : `sqlalchemy.sql.SelectBase`, optional 

1451 A SELECT query expression to insert rows from. Cannot be provided 

1452 with either ``rows`` or ``returnIds=True``. 

1453 names : `~collections.abc.Iterable` [ `str` ], optional 

1454 Names of columns in ``table`` to be populated, ordered to match the 

1455 columns returned by ``select``. Ignored if ``select`` is `None`. 

1456 If not provided, the columns returned by ``select`` must be named 

1457 to match the desired columns of ``table``. 

1458 *rows 

1459 Positional arguments are the rows to be inserted, as dictionaries 

1460 mapping column name to value. The keys in all dictionaries must 

1461 be the same. 

1462 

1463 Returns 

1464 ------- 

1465 ids : `None`, or `list` of `int` 

1466 If ``returnIds`` is `True`, a `list` containing the inserted 

1467 values for the table's autoincrement primary key. 

1468 

1469 Raises 

1470 ------ 

1471 ReadOnlyDatabaseError 

1472 Raised if `isWriteable` returns `False` when this method is called. 

1473 

1474 Notes 

1475 ----- 

1476 The default implementation uses bulk insert syntax when ``returnIds`` 

1477 is `False`, and a loop over single-row insert operations when it is 

1478 `True`. 

1479 

1480 Derived classes should reimplement when they can provide a more 

1481 efficient implementation (especially for the latter case). 

1482 

1483 May be used inside transaction contexts, so implementations may not 

1484 perform operations that interrupt transactions. 

1485 """ 

1486 self.assertTableWriteable(table, f"Cannot insert into read-only table {table}.") 

1487 if select is not None and (rows or returnIds): 

1488 raise TypeError("'select' is incompatible with passing value rows or returnIds=True.") 

1489 if not rows and select is None: 

1490 if returnIds: 

1491 return [] 

1492 else: 

1493 return None 

1494 with self._transaction() as (_, connection): 

1495 if not returnIds: 

1496 if select is not None: 

1497 if names is None: 

1498 # columns() is deprecated since 1.4, but 

1499 # selected_columns() method did not exist in 1.3. 

1500 if hasattr(select, "selected_columns"): 

1501 names = select.selected_columns.keys() 

1502 else: 

1503 names = select.columns.keys() 

1504 connection.execute(table.insert().from_select(list(names), select)) 

1505 else: 

1506 connection.execute(table.insert(), rows) 

1507 return None 

1508 else: 

1509 sql = table.insert() 

1510 return [connection.execute(sql, row).inserted_primary_key[0] for row in rows] 

1511 

1512 @abstractmethod 

1513 def replace(self, table: sqlalchemy.schema.Table, *rows: dict) -> None: 

1514 """Insert one or more rows into a table, replacing any existing rows 

1515 for which insertion of a new row would violate the primary key 

1516 constraint. 

1517 

1518 Parameters 

1519 ---------- 

1520 table : `sqlalchemy.schema.Table` 

1521 Table rows should be inserted into. 

1522 *rows 

1523 Positional arguments are the rows to be inserted, as dictionaries 

1524 mapping column name to value. The keys in all dictionaries must 

1525 be the same. 

1526 

1527 Raises 

1528 ------ 

1529 ReadOnlyDatabaseError 

1530 Raised if `isWriteable` returns `False` when this method is called. 

1531 

1532 Notes 

1533 ----- 

1534 May be used inside transaction contexts, so implementations may not 

1535 perform operations that interrupt transactions. 

1536 

1537 Implementations should raise a `sqlalchemy.exc.IntegrityError` 

1538 exception when a constraint other than the primary key would be 

1539 violated. 

1540 

1541 Implementations are not required to support `replace` on tables 

1542 with autoincrement keys. 

1543 """ 

1544 raise NotImplementedError() 

1545 

1546 @abstractmethod 

1547 def ensure(self, table: sqlalchemy.schema.Table, *rows: dict, primary_key_only: bool = False) -> int: 

1548 """Insert one or more rows into a table, skipping any rows for which 

1549 insertion would violate a unique constraint. 

1550 

1551 Parameters 

1552 ---------- 

1553 table : `sqlalchemy.schema.Table` 

1554 Table rows should be inserted into. 

1555 *rows 

1556 Positional arguments are the rows to be inserted, as dictionaries 

1557 mapping column name to value. The keys in all dictionaries must 

1558 be the same. 

1559 primary_key_only : `bool`, optional 

1560 If `True` (`False` is default), only skip rows that violate the 

1561 primary key constraint, and raise an exception (and rollback 

1562 transactions) for other constraint violations. 

1563 

1564 Returns 

1565 ------- 

1566 count : `int` 

1567 The number of rows actually inserted. 

1568 

1569 Raises 

1570 ------ 

1571 ReadOnlyDatabaseError 

1572 Raised if `isWriteable` returns `False` when this method is called. 

1573 This is raised even if the operation would do nothing even on a 

1574 writeable database. 

1575 

1576 Notes 

1577 ----- 

1578 May be used inside transaction contexts, so implementations may not 

1579 perform operations that interrupt transactions. 

1580 

1581 Implementations are not required to support `ensure` on tables 

1582 with autoincrement keys. 

1583 """ 

1584 raise NotImplementedError() 

1585 

1586 def delete(self, table: sqlalchemy.schema.Table, columns: Iterable[str], *rows: dict) -> int: 

1587 """Delete one or more rows from a table. 

1588 

1589 Parameters 

1590 ---------- 

1591 table : `sqlalchemy.schema.Table` 

1592 Table that rows should be deleted from. 

1593 columns: `~collections.abc.Iterable` of `str` 

1594 The names of columns that will be used to constrain the rows to 

1595 be deleted; these will be combined via ``AND`` to form the 

1596 ``WHERE`` clause of the delete query. 

1597 *rows 

1598 Positional arguments are the keys of rows to be deleted, as 

1599 dictionaries mapping column name to value. The keys in all 

1600 dictionaries must be exactly the names in ``columns``. 

1601 

1602 Returns 

1603 ------- 

1604 count : `int` 

1605 Number of rows deleted. 

1606 

1607 Raises 

1608 ------ 

1609 ReadOnlyDatabaseError 

1610 Raised if `isWriteable` returns `False` when this method is called. 

1611 

1612 Notes 

1613 ----- 

1614 May be used inside transaction contexts, so implementations may not 

1615 perform operations that interrupt transactions. 

1616 

1617 The default implementation should be sufficient for most derived 

1618 classes. 

1619 """ 

1620 self.assertTableWriteable(table, f"Cannot delete from read-only table {table}.") 

1621 if columns and not rows: 

1622 # If there are no columns, this operation is supposed to delete 

1623 # everything (so we proceed as usual). But if there are columns, 

1624 # but no rows, it was a constrained bulk operation where the 

1625 # constraint is that no rows match, and we should short-circuit 

1626 # while reporting that no rows were affected. 

1627 return 0 

1628 sql = table.delete() 

1629 columns = list(columns) # Force iterators to list 

1630 

1631 # More efficient to use IN operator if there is only one 

1632 # variable changing across all rows. 

1633 content: dict[str, set] = defaultdict(set) 

1634 if len(columns) == 1: 

1635 # Nothing to calculate since we can always use IN 

1636 column = columns[0] 

1637 changing_columns = [column] 

1638 content[column] = {row[column] for row in rows} 

1639 else: 

1640 for row in rows: 

1641 for k, v in row.items(): 

1642 content[k].add(v) 

1643 changing_columns = [col for col, values in content.items() if len(values) > 1] 

1644 

1645 if len(changing_columns) != 1: 

1646 # More than one column changes each time so do explicit bind 

1647 # parameters and have each row processed separately. 

1648 whereTerms = [table.columns[name] == sqlalchemy.sql.bindparam(name) for name in columns] 

1649 if whereTerms: 

1650 sql = sql.where(sqlalchemy.sql.and_(*whereTerms)) 

1651 with self._transaction() as (_, connection): 

1652 return connection.execute(sql, rows).rowcount 

1653 else: 

1654 # One of the columns has changing values but any others are 

1655 # fixed. In this case we can use an IN operator and be more 

1656 # efficient. 

1657 name = changing_columns.pop() 

1658 

1659 # Simple where clause for the unchanging columns 

1660 clauses = [] 

1661 for k, v in content.items(): 

1662 if k == name: 

1663 continue 

1664 column = table.columns[k] 

1665 # The set only has one element 

1666 clauses.append(column == v.pop()) 

1667 

1668 # The IN operator will not work for "infinite" numbers of 

1669 # rows so must batch it up into distinct calls. 

1670 in_content = list(content[name]) 

1671 n_elements = len(in_content) 

1672 

1673 rowcount = 0 

1674 iposn = 0 

1675 n_per_loop = 1_000 # Controls how many items to put in IN clause 

1676 with self._transaction() as (_, connection): 

1677 for iposn in range(0, n_elements, n_per_loop): 

1678 endpos = iposn + n_per_loop 

1679 in_clause = table.columns[name].in_(in_content[iposn:endpos]) 

1680 

1681 newsql = sql.where(sqlalchemy.sql.and_(*clauses, in_clause)) 

1682 rowcount += connection.execute(newsql).rowcount 

1683 return rowcount 

1684 

1685 def deleteWhere(self, table: sqlalchemy.schema.Table, where: sqlalchemy.sql.ColumnElement) -> int: 

1686 """Delete rows from a table with pre-constructed WHERE clause. 

1687 

1688 Parameters 

1689 ---------- 

1690 table : `sqlalchemy.schema.Table` 

1691 Table that rows should be deleted from. 

1692 where: `sqlalchemy.sql.ClauseElement` 

1693 The names of columns that will be used to constrain the rows to 

1694 be deleted; these will be combined via ``AND`` to form the 

1695 ``WHERE`` clause of the delete query. 

1696 

1697 Returns 

1698 ------- 

1699 count : `int` 

1700 Number of rows deleted. 

1701 

1702 Raises 

1703 ------ 

1704 ReadOnlyDatabaseError 

1705 Raised if `isWriteable` returns `False` when this method is called. 

1706 

1707 Notes 

1708 ----- 

1709 May be used inside transaction contexts, so implementations may not 

1710 perform operations that interrupt transactions. 

1711 

1712 The default implementation should be sufficient for most derived 

1713 classes. 

1714 """ 

1715 self.assertTableWriteable(table, f"Cannot delete from read-only table {table}.") 

1716 

1717 sql = table.delete().where(where) 

1718 with self._transaction() as (_, connection): 

1719 return connection.execute(sql).rowcount 

1720 

1721 def update(self, table: sqlalchemy.schema.Table, where: dict[str, str], *rows: dict) -> int: 

1722 """Update one or more rows in a table. 

1723 

1724 Parameters 

1725 ---------- 

1726 table : `sqlalchemy.schema.Table` 

1727 Table containing the rows to be updated. 

1728 where : `dict` [`str`, `str`] 

1729 A mapping from the names of columns that will be used to search for 

1730 existing rows to the keys that will hold these values in the 

1731 ``rows`` dictionaries. Note that these may not be the same due to 

1732 SQLAlchemy limitations. 

1733 *rows 

1734 Positional arguments are the rows to be updated. The keys in all 

1735 dictionaries must be the same, and may correspond to either a 

1736 value in the ``where`` dictionary or the name of a column to be 

1737 updated. 

1738 

1739 Returns 

1740 ------- 

1741 count : `int` 

1742 Number of rows matched (regardless of whether the update actually 

1743 modified them). 

1744 

1745 Raises 

1746 ------ 

1747 ReadOnlyDatabaseError 

1748 Raised if `isWriteable` returns `False` when this method is called. 

1749 

1750 Notes 

1751 ----- 

1752 May be used inside transaction contexts, so implementations may not 

1753 perform operations that interrupt transactions. 

1754 

1755 The default implementation should be sufficient for most derived 

1756 classes. 

1757 """ 

1758 self.assertTableWriteable(table, f"Cannot update read-only table {table}.") 

1759 if not rows: 

1760 return 0 

1761 sql = table.update().where( 

1762 sqlalchemy.sql.and_(*[table.columns[k] == sqlalchemy.sql.bindparam(v) for k, v in where.items()]) 

1763 ) 

1764 with self._transaction() as (_, connection): 

1765 return connection.execute(sql, rows).rowcount 

1766 

1767 @contextmanager 

1768 def query( 

1769 self, 

1770 sql: sqlalchemy.sql.expression.Executable | sqlalchemy.sql.expression.SelectBase, 

1771 *args: Any, 

1772 **kwargs: Any, 

1773 ) -> Iterator[sqlalchemy.engine.CursorResult]: 

1774 """Run a SELECT query against the database. 

1775 

1776 Parameters 

1777 ---------- 

1778 sql : `sqlalchemy.sql.expression.SelectBase` 

1779 A SQLAlchemy representation of a ``SELECT`` query. 

1780 *args 

1781 Additional positional arguments are forwarded to 

1782 `sqlalchemy.engine.Connection.execute`. 

1783 **kwargs 

1784 Additional keyword arguments are forwarded to 

1785 `sqlalchemy.engine.Connection.execute`. 

1786 

1787 Returns 

1788 ------- 

1789 result_context : `sqlalchemy.engine.CursorResults` 

1790 Context manager that returns the query result object when entered. 

1791 These results are invalidated when the context is exited. 

1792 """ 

1793 if self._session_connection is None: 

1794 connection = self._engine.connect() 

1795 else: 

1796 connection = self._session_connection 

1797 # TODO: SelectBase is not good for execute(), but it used everywhere, 

1798 # e.g. in daf_relation. We should switch to Executable at some point. 

1799 result = connection.execute(cast(sqlalchemy.sql.expression.Executable, sql), *args, **kwargs) 

1800 try: 

1801 yield result 

1802 finally: 

1803 if connection is not self._session_connection: 

1804 connection.close() 

1805 

1806 @abstractmethod 

1807 def constant_rows( 

1808 self, 

1809 fields: NamedValueAbstractSet[ddl.FieldSpec], 

1810 *rows: dict, 

1811 name: str | None = None, 

1812 ) -> sqlalchemy.sql.FromClause: 

1813 """Return a SQLAlchemy object that represents a small number of 

1814 constant-valued rows. 

1815 

1816 Parameters 

1817 ---------- 

1818 fields : `NamedValueAbstractSet` [ `ddl.FieldSpec` ] 

1819 The columns of the rows. Unique and foreign key constraints are 

1820 ignored. 

1821 *rows : `dict` 

1822 Values for the rows. 

1823 name : `str`, optional 

1824 If provided, the name of the SQL construct. If not provided, an 

1825 opaque but unique identifier is generated. 

1826 

1827 Returns 

1828 ------- 

1829 from_clause : `sqlalchemy.sql.FromClause` 

1830 SQLAlchemy object representing the given rows. This is guaranteed 

1831 to be something that can be directly joined into a ``SELECT`` 

1832 query's ``FROM`` clause, and will not involve a temporary table 

1833 that needs to be cleaned up later. 

1834 

1835 Notes 

1836 ----- 

1837 The default implementation uses the SQL-standard ``VALUES`` construct, 

1838 but support for that construct is varied enough across popular RDBMSs 

1839 that the method is still marked abstract to force explicit opt-in via 

1840 delegation to `super`. 

1841 """ 

1842 if name is None: 

1843 name = f"tmp_{uuid.uuid4().hex}" 

1844 return sqlalchemy.sql.values( 

1845 *[sqlalchemy.Column(field.name, field.getSizedColumnType()) for field in fields], 

1846 name=name, 

1847 ).data([tuple(row[name] for name in fields.names) for row in rows]) 

1848 

1849 def get_constant_rows_max(self) -> int: 

1850 """Return the maximum number of rows that should be passed to 

1851 `constant_rows` for this backend. 

1852 

1853 Returns 

1854 ------- 

1855 max : `int` 

1856 Maximum number of rows. 

1857 

1858 Notes 

1859 ----- 

1860 This should reflect typical performance profiles (or a guess at these), 

1861 not just hard database engine limits. 

1862 """ 

1863 return 100 

1864 

1865 origin: int 

1866 """An integer ID that should be used as the default for any datasets, 

1867 quanta, or other entities that use a (autoincrement, origin) compound 

1868 primary key (`int`). 

1869 """ 

1870 

1871 namespace: str | None 

1872 """The schema or namespace this database instance is associated with 

1873 (`str` or `None`). 

1874 """