Coverage for python/lsst/daf/butler/registry/interfaces/_database.py: 22%

412 statements  

« prev     ^ index     » next       coverage.py v7.3.1, created at 2023-10-02 08:00 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27from __future__ import annotations 

28 

29__all__ = [ 

30 "Database", 

31 "ReadOnlyDatabaseError", 

32 "DatabaseConflictError", 

33 "DatabaseInsertMode", 

34 "SchemaAlreadyDefinedError", 

35 "StaticTablesContext", 

36] 

37 

38import enum 

39import uuid 

40import warnings 

41from abc import ABC, abstractmethod 

42from collections import defaultdict 

43from collections.abc import Callable, Iterable, Iterator, Sequence 

44from contextlib import contextmanager 

45from typing import Any, cast, final 

46 

47import astropy.time 

48import sqlalchemy 

49 

50from ...core import TimespanDatabaseRepresentation, ddl, time_utils 

51from ...core.named import NamedValueAbstractSet 

52from .._exceptions import ConflictingDefinitionError 

53 

54 

55class DatabaseInsertMode(enum.Enum): 

56 """Mode options available for inserting database records.""" 

57 

58 INSERT = enum.auto() 

59 """Insert records, failing if they already exist.""" 

60 

61 REPLACE = enum.auto() 

62 """Replace records, overwriting existing.""" 

63 

64 ENSURE = enum.auto() 

65 """Insert records, skipping any that already exist.""" 

66 

67 

68# TODO: method is called with list[ReflectedColumn] in SA 2, and 

69# ReflectedColumn does not exist in 1.4. 

70def _checkExistingTableDefinition(name: str, spec: ddl.TableSpec, inspection: list) -> None: 

71 """Test that the definition of a table in a `ddl.TableSpec` and from 

72 database introspection are consistent. 

73 

74 Parameters 

75 ---------- 

76 name : `str` 

77 Name of the table (only used in error messages). 

78 spec : `ddl.TableSpec` 

79 Specification of the table. 

80 inspection : `dict` 

81 Dictionary returned by 

82 `sqlalchemy.engine.reflection.Inspector.get_columns`. 

83 

84 Raises 

85 ------ 

86 DatabaseConflictError 

87 Raised if the definitions are inconsistent. 

88 """ 

89 columnNames = [c["name"] for c in inspection] 

90 if spec.fields.names != set(columnNames): 

91 raise DatabaseConflictError( 

92 f"Table '{name}' exists but is defined differently in the database; " 

93 f"specification has columns {list(spec.fields.names)}, while the " 

94 f"table in the database has {columnNames}." 

95 ) 

96 

97 

98class ReadOnlyDatabaseError(RuntimeError): 

99 """Exception raised when a write operation is called on a read-only 

100 `Database`. 

101 """ 

102 

103 

104class DatabaseConflictError(ConflictingDefinitionError): 

105 """Exception raised when database content (row values or schema entities) 

106 are inconsistent with what this client expects. 

107 """ 

108 

109 

110class SchemaAlreadyDefinedError(RuntimeError): 

111 """Exception raised when trying to initialize database schema when some 

112 tables already exist. 

113 """ 

114 

115 

116class StaticTablesContext: 

117 """Helper class used to declare the static schema for a registry layer 

118 in a database. 

119 

120 An instance of this class is returned by `Database.declareStaticTables`, 

121 which should be the only way it should be constructed. 

122 """ 

123 

124 def __init__(self, db: Database, connection: sqlalchemy.engine.Connection): 

125 self._db = db 

126 self._foreignKeys: list[tuple[sqlalchemy.schema.Table, sqlalchemy.schema.ForeignKeyConstraint]] = [] 

127 self._inspector = sqlalchemy.inspect(connection) 

128 self._tableNames = frozenset(self._inspector.get_table_names(schema=self._db.namespace)) 

129 self._initializers: list[Callable[[Database], None]] = [] 

130 

131 def addTable(self, name: str, spec: ddl.TableSpec) -> sqlalchemy.schema.Table: 

132 """Add a new table to the schema, returning its sqlalchemy 

133 representation. 

134 

135 The new table may not actually be created until the end of the 

136 context created by `Database.declareStaticTables`, allowing tables 

137 to be declared in any order even in the presence of foreign key 

138 relationships. 

139 """ 

140 name = self._db._mangleTableName(name) 

141 if name in self._tableNames: 

142 _checkExistingTableDefinition( 

143 name, spec, self._inspector.get_columns(name, schema=self._db.namespace) 

144 ) 

145 metadata = self._db._metadata 

146 assert metadata is not None, "Guaranteed by context manager that returns this object." 

147 table = self._db._convertTableSpec(name, spec, metadata) 

148 for foreignKeySpec in spec.foreignKeys: 

149 self._foreignKeys.append((table, self._db._convertForeignKeySpec(name, foreignKeySpec, metadata))) 

150 return table 

151 

152 def addTableTuple(self, specs: tuple[ddl.TableSpec, ...]) -> tuple[sqlalchemy.schema.Table, ...]: 

153 """Add a named tuple of tables to the schema, returning their 

154 SQLAlchemy representations in a named tuple of the same type. 

155 

156 The new tables may not actually be created until the end of the 

157 context created by `Database.declareStaticTables`, allowing tables 

158 to be declared in any order even in the presence of foreign key 

159 relationships. 

160 

161 Notes 

162 ----- 

163 ``specs`` *must* be an instance of a type created by 

164 `collections.namedtuple`, not just regular tuple, and the returned 

165 object is guaranteed to be the same. Because `~collections.namedtuple` 

166 is just a factory for `type` objects, not an actual type itself, 

167 we cannot represent this with type annotations. 

168 """ 

169 return specs._make( # type: ignore 

170 self.addTable(name, spec) for name, spec in zip(specs._fields, specs, strict=True) # type: ignore 

171 ) 

172 

173 def addInitializer(self, initializer: Callable[[Database], None]) -> None: 

174 """Add a method that does one-time initialization of a database. 

175 

176 Initialization can mean anything that changes state of a database 

177 and needs to be done exactly once after database schema was created. 

178 An example for that could be population of schema attributes. 

179 

180 Parameters 

181 ---------- 

182 initializer : callable 

183 Method of a single argument which is a `Database` instance. 

184 """ 

185 self._initializers.append(initializer) 

186 

187 

188class Database(ABC): 

189 """An abstract interface that represents a particular database engine's 

190 representation of a single schema/namespace/database. 

191 

192 Parameters 

193 ---------- 

194 origin : `int` 

195 An integer ID that should be used as the default for any datasets, 

196 quanta, or other entities that use a (autoincrement, origin) compound 

197 primary key. 

198 engine : `sqlalchemy.engine.Engine` 

199 The SQLAlchemy engine for this `Database`. 

200 namespace : `str`, optional 

201 Name of the schema or namespace this instance is associated with. 

202 This is passed as the ``schema`` argument when constructing a 

203 `sqlalchemy.schema.MetaData` instance. We use ``namespace`` instead to 

204 avoid confusion between "schema means namespace" and "schema means 

205 table definitions". 

206 

207 Notes 

208 ----- 

209 `Database` requires all write operations to go through its special named 

210 methods. Our write patterns are sufficiently simple that we don't really 

211 need the full flexibility of SQL insert/update/delete syntax, and we need 

212 non-standard (but common) functionality in these operations sufficiently 

213 often that it seems worthwhile to provide our own generic API. 

214 

215 In contrast, `Database.query` allows arbitrary ``SELECT`` queries (via 

216 their SQLAlchemy representation) to be run, as we expect these to require 

217 significantly more sophistication while still being limited to standard 

218 SQL. 

219 

220 `Database` itself has several underscore-prefixed attributes: 

221 

222 - ``_engine``: SQLAlchemy object representing its engine. 

223 - ``_connection``: method returning a context manager for 

224 `sqlalchemy.engine.Connection` object. 

225 - ``_metadata``: the `sqlalchemy.schema.MetaData` object representing 

226 the tables and other schema entities. 

227 

228 These are considered protected (derived classes may access them, but other 

229 code should not), and read-only, aside from executing SQL via 

230 ``_connection``. 

231 """ 

232 

233 def __init__(self, *, origin: int, engine: sqlalchemy.engine.Engine, namespace: str | None = None): 

234 self.origin = origin 

235 self.namespace = namespace 

236 self._engine = engine 

237 self._session_connection: sqlalchemy.engine.Connection | None = None 

238 self._metadata: sqlalchemy.schema.MetaData | None = None 

239 self._temp_tables: set[str] = set() 

240 

241 def __repr__(self) -> str: 

242 # Rather than try to reproduce all the parameters used to create 

243 # the object, instead report the more useful information of the 

244 # connection URL. 

245 if self._engine.url.password is not None: 

246 uri = str(self._engine.url.set(password="***")) 

247 else: 

248 uri = str(self._engine.url) 

249 if self.namespace: 

250 uri += f"#{self.namespace}" 

251 return f'{type(self).__name__}("{uri}")' 

252 

253 @classmethod 

254 def makeDefaultUri(cls, root: str) -> str | None: 

255 """Create a default connection URI appropriate for the given root 

256 directory, or `None` if there can be no such default. 

257 """ 

258 return None 

259 

260 @classmethod 

261 def fromUri( 

262 cls, 

263 uri: str | sqlalchemy.engine.URL, 

264 *, 

265 origin: int, 

266 namespace: str | None = None, 

267 writeable: bool = True, 

268 ) -> Database: 

269 """Construct a database from a SQLAlchemy URI. 

270 

271 Parameters 

272 ---------- 

273 uri : `str` or `sqlalchemy.engine.URL` 

274 A SQLAlchemy URI connection string. 

275 origin : `int` 

276 An integer ID that should be used as the default for any datasets, 

277 quanta, or other entities that use a (autoincrement, origin) 

278 compound primary key. 

279 namespace : `str`, optional 

280 A database namespace (i.e. schema) the new instance should be 

281 associated with. If `None` (default), the namespace (if any) is 

282 inferred from the URI. 

283 writeable : `bool`, optional 

284 If `True`, allow write operations on the database, including 

285 ``CREATE TABLE``. 

286 

287 Returns 

288 ------- 

289 db : `Database` 

290 A new `Database` instance. 

291 """ 

292 return cls.fromEngine( 

293 cls.makeEngine(uri, writeable=writeable), origin=origin, namespace=namespace, writeable=writeable 

294 ) 

295 

296 @classmethod 

297 @abstractmethod 

298 def makeEngine( 

299 cls, uri: str | sqlalchemy.engine.URL, *, writeable: bool = True 

300 ) -> sqlalchemy.engine.Engine: 

301 """Create a `sqlalchemy.engine.Engine` from a SQLAlchemy URI. 

302 

303 Parameters 

304 ---------- 

305 uri : `str` or `sqlalchemy.engine.URL` 

306 A SQLAlchemy URI connection string. 

307 writeable : `bool`, optional 

308 If `True`, allow write operations on the database, including 

309 ``CREATE TABLE``. 

310 

311 Returns 

312 ------- 

313 engine : `sqlalchemy.engine.Engine` 

314 A database engine. 

315 

316 Notes 

317 ----- 

318 Subclasses that support other ways to connect to a database are 

319 encouraged to add optional arguments to their implementation of this 

320 method, as long as they maintain compatibility with the base class 

321 call signature. 

322 """ 

323 raise NotImplementedError() 

324 

325 @classmethod 

326 @abstractmethod 

327 def fromEngine( 

328 cls, 

329 engine: sqlalchemy.engine.Engine, 

330 *, 

331 origin: int, 

332 namespace: str | None = None, 

333 writeable: bool = True, 

334 ) -> Database: 

335 """Create a new `Database` from an existing `sqlalchemy.engine.Engine`. 

336 

337 Parameters 

338 ---------- 

339 engine : `sqlalchemy.engine.Engine` 

340 The engine for the database. May be shared between `Database` 

341 instances. 

342 origin : `int` 

343 An integer ID that should be used as the default for any datasets, 

344 quanta, or other entities that use a (autoincrement, origin) 

345 compound primary key. 

346 namespace : `str`, optional 

347 A different database namespace (i.e. schema) the new instance 

348 should be associated with. If `None` (default), the namespace 

349 (if any) is inferred from the connection. 

350 writeable : `bool`, optional 

351 If `True`, allow write operations on the database, including 

352 ``CREATE TABLE``. 

353 

354 Returns 

355 ------- 

356 db : `Database` 

357 A new `Database` instance. 

358 

359 Notes 

360 ----- 

361 This method allows different `Database` instances to share the same 

362 engine, which is desirable when they represent different namespaces 

363 can be queried together. 

364 """ 

365 raise NotImplementedError() 

366 

367 @final 

368 @contextmanager 

369 def session(self) -> Iterator[None]: 

370 """Return a context manager that represents a session (persistent 

371 connection to a database). 

372 

373 Returns 

374 ------- 

375 context : `AbstractContextManager` [ `None` ] 

376 A context manager that does not return a value when entered. 

377 

378 Notes 

379 ----- 

380 This method should be used when a sequence of read-only SQL operations 

381 will be performed in rapid succession *without* a requirement that they 

382 yield consistent results in the presence of concurrent writes (or, more 

383 rarely, when conflicting concurrent writes are rare/impossible and the 

384 session will be open long enough that a transaction is inadvisable). 

385 """ 

386 with self._session(): 

387 yield 

388 

389 @final 

390 @contextmanager 

391 def transaction( 

392 self, 

393 *, 

394 interrupting: bool = False, 

395 savepoint: bool = False, 

396 lock: Iterable[sqlalchemy.schema.Table] = (), 

397 for_temp_tables: bool = False, 

398 ) -> Iterator[None]: 

399 """Return a context manager that represents a transaction. 

400 

401 Parameters 

402 ---------- 

403 interrupting : `bool`, optional 

404 If `True` (`False` is default), this transaction block may not be 

405 nested without an outer one, and attempting to do so is a logic 

406 (i.e. assertion) error. 

407 savepoint : `bool`, optional 

408 If `True` (`False` is default), create a `SAVEPOINT`, allowing 

409 exceptions raised by the database (e.g. due to constraint 

410 violations) during this transaction's context to be caught outside 

411 it without also rolling back all operations in an outer transaction 

412 block. If `False`, transactions may still be nested, but a 

413 rollback may be generated at any level and affects all levels, and 

414 commits are deferred until the outermost block completes. If any 

415 outer transaction block was created with ``savepoint=True``, all 

416 inner blocks will be as well (regardless of the actual value 

417 passed). This has no effect if this is the outermost transaction. 

418 lock : `~collections.abc.Iterable` [ `sqlalchemy.schema.Table` ], \ 

419 optional 

420 A list of tables to lock for the duration of this transaction. 

421 These locks are guaranteed to prevent concurrent writes and allow 

422 this transaction (only) to acquire the same locks (others should 

423 block), but only prevent concurrent reads if the database engine 

424 requires that in order to block concurrent writes. 

425 for_temp_tables : `bool`, optional 

426 If `True`, this transaction may involve creating temporary tables. 

427 

428 Returns 

429 ------- 

430 context : `AbstractContextManager` [ `None` ] 

431 A context manager that commits the transaction when it is exited 

432 without error and rolls back the transactoin when it is exited via 

433 an exception. 

434 

435 Notes 

436 ----- 

437 All transactions on a connection managed by one or more `Database` 

438 instances _must_ go through this method, or transaction state will not 

439 be correctly managed. 

440 """ 

441 with self._transaction( 

442 interrupting=interrupting, savepoint=savepoint, lock=lock, for_temp_tables=for_temp_tables 

443 ): 

444 yield 

445 

446 @contextmanager 

447 def temporary_table( 

448 self, spec: ddl.TableSpec, name: str | None = None 

449 ) -> Iterator[sqlalchemy.schema.Table]: 

450 """Return a context manager that creates and then drops a temporary 

451 table. 

452 

453 Parameters 

454 ---------- 

455 spec : `ddl.TableSpec` 

456 Specification for the columns. Unique and foreign key constraints 

457 may be ignored. 

458 name : `str`, optional 

459 If provided, the name of the SQL construct. If not provided, an 

460 opaque but unique identifier is generated. 

461 

462 Returns 

463 ------- 

464 context : `AbstractContextManager` [ `sqlalchemy.schema.Table` ] 

465 A context manager that returns a SQLAlchemy representation of the 

466 temporary table when entered. 

467 

468 Notes 

469 ----- 

470 Temporary tables may be created, dropped, and written to even in 

471 read-only databases - at least according to the Python-level 

472 protections in the `Database` classes. Server permissions may say 

473 otherwise, but in that case they probably need to be modified to 

474 support the full range of expected read-only butler behavior. 

475 """ 

476 with self._session() as connection: 

477 table = self._make_temporary_table(connection, spec=spec, name=name) 

478 self._temp_tables.add(table.key) 

479 try: 

480 yield table 

481 finally: 

482 with self._transaction(): 

483 table.drop(connection) 

484 self._temp_tables.remove(table.key) 

485 

486 @contextmanager 

487 def _session(self) -> Iterator[sqlalchemy.engine.Connection]: 

488 """Protected implementation for `session` that actually returns the 

489 connection. 

490 

491 This method is for internal `Database` calls that need the actual 

492 SQLAlchemy connection object. It should be overridden by subclasses 

493 instead of `session` itself. 

494 

495 Returns 

496 ------- 

497 context : `AbstractContextManager` [ `sqlalchemy.engine.Connection` ] 

498 A context manager that returns a SQLALchemy connection when 

499 entered. 

500 

501 """ 

502 if self._session_connection is not None: 

503 # session already started, just reuse that 

504 yield self._session_connection 

505 else: 

506 try: 

507 # open new connection and close it when done 

508 self._session_connection = self._engine.connect() 

509 yield self._session_connection 

510 finally: 

511 if self._session_connection is not None: 

512 self._session_connection.close() 

513 self._session_connection = None 

514 # Temporary tables only live within session 

515 self._temp_tables = set() 

516 

517 @contextmanager 

518 def _transaction( 

519 self, 

520 *, 

521 interrupting: bool = False, 

522 savepoint: bool = False, 

523 lock: Iterable[sqlalchemy.schema.Table] = (), 

524 for_temp_tables: bool = False, 

525 ) -> Iterator[tuple[bool, sqlalchemy.engine.Connection]]: 

526 """Protected implementation for `transaction` that actually returns the 

527 connection and whether this is a new outermost transaction. 

528 

529 This method is for internal `Database` calls that need the actual 

530 SQLAlchemy connection object. It should be overridden by subclasses 

531 instead of `transaction` itself. 

532 

533 Parameters 

534 ---------- 

535 interrupting : `bool`, optional 

536 If `True` (`False` is default), this transaction block may not be 

537 nested without an outer one, and attempting to do so is a logic 

538 (i.e. assertion) error. 

539 savepoint : `bool`, optional 

540 If `True` (`False` is default), create a `SAVEPOINT`, allowing 

541 exceptions raised by the database (e.g. due to constraint 

542 violations) during this transaction's context to be caught outside 

543 it without also rolling back all operations in an outer transaction 

544 block. If `False`, transactions may still be nested, but a 

545 rollback may be generated at any level and affects all levels, and 

546 commits are deferred until the outermost block completes. If any 

547 outer transaction block was created with ``savepoint=True``, all 

548 inner blocks will be as well (regardless of the actual value 

549 passed). This has no effect if this is the outermost transaction. 

550 lock : `~collections.abc.Iterable` [ `sqlalchemy.schema.Table` ], \ 

551 optional 

552 A list of tables to lock for the duration of this transaction. 

553 These locks are guaranteed to prevent concurrent writes and allow 

554 this transaction (only) to acquire the same locks (others should 

555 block), but only prevent concurrent reads if the database engine 

556 requires that in order to block concurrent writes. 

557 for_temp_tables : `bool`, optional 

558 If `True`, this transaction may involve creating temporary tables. 

559 

560 Returns 

561 ------- 

562 context : `AbstractContextManager` [ `tuple` [ `bool`, 

563 `sqlalchemy.engine.Connection` ] ] 

564 A context manager that commits the transaction when it is exited 

565 without error and rolls back the transactoin when it is exited via 

566 an exception. When entered, it returns a tuple of: 

567 

568 - ``is_new`` (`bool`): whether this is a new (outermost) 

569 transaction; 

570 - ``connection`` (`sqlalchemy.engine.Connection`): the connection. 

571 """ 

572 with self._session() as connection: 

573 already_in_transaction = connection.in_transaction() 

574 assert not (interrupting and already_in_transaction), ( 

575 "Logic error in transaction nesting: an operation that would " 

576 "interrupt the active transaction context has been requested." 

577 ) 

578 savepoint = savepoint or connection.in_nested_transaction() 

579 trans: sqlalchemy.engine.Transaction | None 

580 if already_in_transaction: 

581 if savepoint: 

582 trans = connection.begin_nested() 

583 else: 

584 # Nested non-savepoint transactions don't do anything. 

585 trans = None 

586 else: 

587 # Use a regular (non-savepoint) transaction always for the 

588 # outermost context. 

589 trans = connection.begin() 

590 self._lockTables(connection, lock) 

591 try: 

592 yield not already_in_transaction, connection 

593 if trans is not None: 

594 trans.commit() 

595 except BaseException: 

596 if trans is not None: 

597 trans.rollback() 

598 raise 

599 

600 @abstractmethod 

601 def _lockTables( 

602 self, connection: sqlalchemy.engine.Connection, tables: Iterable[sqlalchemy.schema.Table] = () 

603 ) -> None: 

604 """Acquire locks on the given tables. 

605 

606 This is an implementation hook for subclasses, called by `transaction`. 

607 It should not be called directly by other code. 

608 

609 Parameters 

610 ---------- 

611 connection : `sqlalchemy.engine.Connection` 

612 Database connection object. It is guaranteed that transaction is 

613 already in a progress for this connection. 

614 tables : `~collections.abc.Iterable` [ `sqlalchemy.schema.Table` ], \ 

615 optional 

616 A list of tables to lock for the duration of this transaction. 

617 These locks are guaranteed to prevent concurrent writes and allow 

618 this transaction (only) to acquire the same locks (others should 

619 block), but only prevent concurrent reads if the database engine 

620 requires that in order to block concurrent writes. 

621 """ 

622 raise NotImplementedError() 

623 

624 def isTableWriteable(self, table: sqlalchemy.schema.Table) -> bool: 

625 """Check whether a table is writeable, either because the database 

626 connection is read-write or the table is a temporary table. 

627 

628 Parameters 

629 ---------- 

630 table : `sqlalchemy.schema.Table` 

631 SQLAlchemy table object to check. 

632 

633 Returns 

634 ------- 

635 writeable : `bool` 

636 Whether this table is writeable. 

637 """ 

638 return self.isWriteable() or table.key in self._temp_tables 

639 

640 def assertTableWriteable(self, table: sqlalchemy.schema.Table, msg: str) -> None: 

641 """Raise if the given table is not writeable, either because the 

642 database connection is read-write or the table is a temporary table. 

643 

644 Parameters 

645 ---------- 

646 table : `sqlalchemy.schema.Table` 

647 SQLAlchemy table object to check. 

648 msg : `str`, optional 

649 If provided, raise `ReadOnlyDatabaseError` instead of returning 

650 `False`, with this message. 

651 """ 

652 if not self.isTableWriteable(table): 

653 raise ReadOnlyDatabaseError(msg) 

654 

655 @contextmanager 

656 def declareStaticTables(self, *, create: bool) -> Iterator[StaticTablesContext]: 

657 """Return a context manager in which the database's static DDL schema 

658 can be declared. 

659 

660 Parameters 

661 ---------- 

662 create : `bool` 

663 If `True`, attempt to create all tables at the end of the context. 

664 If `False`, they will be assumed to already exist. 

665 

666 Returns 

667 ------- 

668 schema : `StaticTablesContext` 

669 A helper object that is used to add new tables. 

670 

671 Raises 

672 ------ 

673 ReadOnlyDatabaseError 

674 Raised if ``create`` is `True`, `Database.isWriteable` is `False`, 

675 and one or more declared tables do not already exist. 

676 

677 Examples 

678 -------- 

679 Given a `Database` instance ``db``:: 

680 

681 with db.declareStaticTables(create=True) as schema: 

682 schema.addTable("table1", TableSpec(...)) 

683 schema.addTable("table2", TableSpec(...)) 

684 

685 Notes 

686 ----- 

687 A database's static DDL schema must be declared before any dynamic 

688 tables are managed via calls to `ensureTableExists` or 

689 `getExistingTable`. The order in which static schema tables are added 

690 inside the context block is unimportant; they will automatically be 

691 sorted and added in an order consistent with their foreign key 

692 relationships. 

693 """ 

694 if create and not self.isWriteable(): 

695 raise ReadOnlyDatabaseError(f"Cannot create tables in read-only database {self}.") 

696 self._metadata = sqlalchemy.MetaData(schema=self.namespace) 

697 try: 

698 with self._transaction() as (_, connection): 

699 context = StaticTablesContext(self, connection) 

700 if create and context._tableNames: 

701 # Looks like database is already initalized, to avoid 

702 # danger of modifying/destroying valid schema we refuse to 

703 # do anything in this case 

704 raise SchemaAlreadyDefinedError(f"Cannot create tables in non-empty database {self}.") 

705 yield context 

706 for table, foreignKey in context._foreignKeys: 

707 table.append_constraint(foreignKey) 

708 if create: 

709 if ( 

710 self.namespace is not None 

711 and self.namespace not in context._inspector.get_schema_names() 

712 ): 

713 connection.execute(sqlalchemy.schema.CreateSchema(self.namespace)) 

714 # In our tables we have columns that make use of sqlalchemy 

715 # Sequence objects. There is currently a bug in sqlalchemy 

716 # that causes a deprecation warning to be thrown on a 

717 # property of the Sequence object when the repr for the 

718 # sequence is created. Here a filter is used to catch these 

719 # deprecation warnings when tables are created. 

720 with warnings.catch_warnings(): 

721 warnings.simplefilter("ignore", category=sqlalchemy.exc.SADeprecationWarning) 

722 self._metadata.create_all(connection) 

723 # call all initializer methods sequentially 

724 for init in context._initializers: 

725 init(self) 

726 except BaseException: 

727 self._metadata = None 

728 raise 

729 

730 @abstractmethod 

731 def isWriteable(self) -> bool: 

732 """Return `True` if this database can be modified by this client.""" 

733 raise NotImplementedError() 

734 

735 @abstractmethod 

736 def __str__(self) -> str: 

737 """Return a human-readable identifier for this `Database`, including 

738 any namespace or schema that identifies its names within a `Registry`. 

739 """ 

740 raise NotImplementedError() 

741 

742 @property 

743 def dialect(self) -> sqlalchemy.engine.Dialect: 

744 """The SQLAlchemy dialect for this database engine 

745 (`sqlalchemy.engine.Dialect`). 

746 """ 

747 return self._engine.dialect 

748 

749 def shrinkDatabaseEntityName(self, original: str) -> str: 

750 """Return a version of the given name that fits within this database 

751 engine's length limits for table, constraint, indexes, and sequence 

752 names. 

753 

754 Implementations should not assume that simple truncation is safe, 

755 because multiple long names often begin with the same prefix. 

756 

757 The default implementation simply returns the given name. 

758 

759 Parameters 

760 ---------- 

761 original : `str` 

762 The original name. 

763 

764 Returns 

765 ------- 

766 shrunk : `str` 

767 The new, possibly shortened name. 

768 """ 

769 return original 

770 

771 def expandDatabaseEntityName(self, shrunk: str) -> str: 

772 """Retrieve the original name for a database entity that was too long 

773 to fit within the database engine's limits. 

774 

775 Parameters 

776 ---------- 

777 original : `str` 

778 The original name. 

779 

780 Returns 

781 ------- 

782 shrunk : `str` 

783 The new, possibly shortened name. 

784 """ 

785 return shrunk 

786 

787 def _mangleTableName(self, name: str) -> str: 

788 """Map a logical, user-visible table name to the true table name used 

789 in the database. 

790 

791 The default implementation returns the given name unchanged. 

792 

793 Parameters 

794 ---------- 

795 name : `str` 

796 Input table name. Should not include a namespace (i.e. schema) 

797 prefix. 

798 

799 Returns 

800 ------- 

801 mangled : `str` 

802 Mangled version of the table name (still with no namespace prefix). 

803 

804 Notes 

805 ----- 

806 Reimplementations of this method must be idempotent - mangling an 

807 already-mangled name must have no effect. 

808 """ 

809 return name 

810 

811 def _makeColumnConstraints(self, table: str, spec: ddl.FieldSpec) -> list[sqlalchemy.CheckConstraint]: 

812 """Create constraints based on this spec. 

813 

814 Parameters 

815 ---------- 

816 table : `str` 

817 Name of the table this column is being added to. 

818 spec : `FieldSpec` 

819 Specification for the field to be added. 

820 

821 Returns 

822 ------- 

823 constraint : `list` of `sqlalchemy.CheckConstraint` 

824 Constraint added for this column. 

825 """ 

826 # By default we return no additional constraints 

827 return [] 

828 

829 def _convertFieldSpec( 

830 self, table: str, spec: ddl.FieldSpec, metadata: sqlalchemy.MetaData, **kwargs: Any 

831 ) -> sqlalchemy.schema.Column: 

832 """Convert a `FieldSpec` to a `sqlalchemy.schema.Column`. 

833 

834 Parameters 

835 ---------- 

836 table : `str` 

837 Name of the table this column is being added to. 

838 spec : `FieldSpec` 

839 Specification for the field to be added. 

840 metadata : `sqlalchemy.MetaData` 

841 SQLAlchemy representation of the DDL schema this field's table is 

842 being added to. 

843 **kwargs 

844 Additional keyword arguments to forward to the 

845 `sqlalchemy.schema.Column` constructor. This is provided to make 

846 it easier for derived classes to delegate to ``super()`` while 

847 making only minor changes. 

848 

849 Returns 

850 ------- 

851 column : `sqlalchemy.schema.Column` 

852 SQLAlchemy representation of the field. 

853 """ 

854 args = [] 

855 if spec.autoincrement: 

856 # Generate a sequence to use for auto incrementing for databases 

857 # that do not support it natively. This will be ignored by 

858 # sqlalchemy for databases that do support it. 

859 args.append( 

860 sqlalchemy.Sequence( 

861 self.shrinkDatabaseEntityName(f"{table}_seq_{spec.name}"), metadata=metadata 

862 ) 

863 ) 

864 assert spec.doc is None or isinstance(spec.doc, str), f"Bad doc for {table}.{spec.name}." 

865 return sqlalchemy.schema.Column( 

866 spec.name, 

867 spec.getSizedColumnType(), 

868 *args, 

869 nullable=spec.nullable, 

870 primary_key=spec.primaryKey, 

871 comment=spec.doc, 

872 server_default=spec.default, 

873 **kwargs, 

874 ) 

875 

876 def _convertForeignKeySpec( 

877 self, table: str, spec: ddl.ForeignKeySpec, metadata: sqlalchemy.MetaData, **kwargs: Any 

878 ) -> sqlalchemy.schema.ForeignKeyConstraint: 

879 """Convert a `ForeignKeySpec` to a 

880 `sqlalchemy.schema.ForeignKeyConstraint`. 

881 

882 Parameters 

883 ---------- 

884 table : `str` 

885 Name of the table this foreign key is being added to. 

886 spec : `ForeignKeySpec` 

887 Specification for the foreign key to be added. 

888 metadata : `sqlalchemy.MetaData` 

889 SQLAlchemy representation of the DDL schema this constraint is 

890 being added to. 

891 **kwargs 

892 Additional keyword arguments to forward to the 

893 `sqlalchemy.schema.ForeignKeyConstraint` constructor. This is 

894 provided to make it easier for derived classes to delegate to 

895 ``super()`` while making only minor changes. 

896 

897 Returns 

898 ------- 

899 constraint : `sqlalchemy.schema.ForeignKeyConstraint` 

900 SQLAlchemy representation of the constraint. 

901 """ 

902 name = self.shrinkDatabaseEntityName( 

903 "_".join( 

904 ["fkey", table, self._mangleTableName(spec.table)] + list(spec.target) + list(spec.source) 

905 ) 

906 ) 

907 return sqlalchemy.schema.ForeignKeyConstraint( 

908 spec.source, 

909 [f"{self._mangleTableName(spec.table)}.{col}" for col in spec.target], 

910 name=name, 

911 ondelete=spec.onDelete, 

912 ) 

913 

914 def _convertExclusionConstraintSpec( 

915 self, 

916 table: str, 

917 spec: tuple[str | type[TimespanDatabaseRepresentation], ...], 

918 metadata: sqlalchemy.MetaData, 

919 ) -> sqlalchemy.schema.Constraint: 

920 """Convert a `tuple` from `ddl.TableSpec.exclusion` into a SQLAlchemy 

921 constraint representation. 

922 

923 Parameters 

924 ---------- 

925 table : `str` 

926 Name of the table this constraint is being added to. 

927 spec : `tuple` [ `str` or `type` ] 

928 A tuple of `str` column names and the `type` object returned by 

929 `getTimespanRepresentation` (which must appear exactly once), 

930 indicating the order of the columns in the index used to back the 

931 constraint. 

932 metadata : `sqlalchemy.MetaData` 

933 SQLAlchemy representation of the DDL schema this constraint is 

934 being added to. 

935 

936 Returns 

937 ------- 

938 constraint : `sqlalchemy.schema.Constraint` 

939 SQLAlchemy representation of the constraint. 

940 

941 Raises 

942 ------ 

943 NotImplementedError 

944 Raised if this database does not support exclusion constraints. 

945 """ 

946 raise NotImplementedError(f"Database {self} does not support exclusion constraints.") 

947 

948 def _convertTableSpec( 

949 self, name: str, spec: ddl.TableSpec, metadata: sqlalchemy.MetaData, **kwargs: Any 

950 ) -> sqlalchemy.schema.Table: 

951 """Convert a `TableSpec` to a `sqlalchemy.schema.Table`. 

952 

953 Parameters 

954 ---------- 

955 spec : `TableSpec` 

956 Specification for the foreign key to be added. 

957 metadata : `sqlalchemy.MetaData` 

958 SQLAlchemy representation of the DDL schema this table is being 

959 added to. 

960 **kwargs 

961 Additional keyword arguments to forward to the 

962 `sqlalchemy.schema.Table` constructor. This is provided to make it 

963 easier for derived classes to delegate to ``super()`` while making 

964 only minor changes. 

965 

966 Returns 

967 ------- 

968 table : `sqlalchemy.schema.Table` 

969 SQLAlchemy representation of the table. 

970 

971 Notes 

972 ----- 

973 This method does not handle ``spec.foreignKeys`` at all, in order to 

974 avoid circular dependencies. These are added by higher-level logic in 

975 `ensureTableExists`, `getExistingTable`, and `declareStaticTables`. 

976 """ 

977 name = self._mangleTableName(name) 

978 args: list[sqlalchemy.schema.SchemaItem] = [ 

979 self._convertFieldSpec(name, fieldSpec, metadata) for fieldSpec in spec.fields 

980 ] 

981 

982 # Add any column constraints 

983 for fieldSpec in spec.fields: 

984 args.extend(self._makeColumnConstraints(name, fieldSpec)) 

985 

986 # Track indexes added for primary key and unique constraints, to make 

987 # sure we don't add duplicate explicit or foreign key indexes for 

988 # those. 

989 allIndexes = {tuple(fieldSpec.name for fieldSpec in spec.fields if fieldSpec.primaryKey)} 

990 args.extend( 

991 sqlalchemy.schema.UniqueConstraint( 

992 *columns, name=self.shrinkDatabaseEntityName("_".join([name, "unq"] + list(columns))) 

993 ) 

994 for columns in spec.unique 

995 ) 

996 allIndexes.update(spec.unique) 

997 args.extend( 

998 sqlalchemy.schema.Index( 

999 self.shrinkDatabaseEntityName("_".join([name, "idx"] + list(index.columns))), 

1000 *index.columns, 

1001 unique=(index.columns in spec.unique), 

1002 **index.kwargs, 

1003 ) 

1004 for index in spec.indexes 

1005 if index.columns not in allIndexes 

1006 ) 

1007 allIndexes.update(index.columns for index in spec.indexes) 

1008 args.extend( 

1009 sqlalchemy.schema.Index( 

1010 self.shrinkDatabaseEntityName("_".join((name, "fkidx") + fk.source)), 

1011 *fk.source, 

1012 ) 

1013 for fk in spec.foreignKeys 

1014 if fk.addIndex and fk.source not in allIndexes 

1015 ) 

1016 

1017 args.extend(self._convertExclusionConstraintSpec(name, excl, metadata) for excl in spec.exclusion) 

1018 

1019 assert spec.doc is None or isinstance(spec.doc, str), f"Bad doc for {name}." 

1020 return sqlalchemy.schema.Table(name, metadata, *args, comment=spec.doc, info={"spec": spec}, **kwargs) 

1021 

1022 def ensureTableExists(self, name: str, spec: ddl.TableSpec) -> sqlalchemy.schema.Table: 

1023 """Ensure that a table with the given name and specification exists, 

1024 creating it if necessary. 

1025 

1026 Parameters 

1027 ---------- 

1028 name : `str` 

1029 Name of the table (not including namespace qualifiers). 

1030 spec : `TableSpec` 

1031 Specification for the table. This will be used when creating the 

1032 table, and *may* be used when obtaining an existing table to check 

1033 for consistency, but no such check is guaranteed. 

1034 

1035 Returns 

1036 ------- 

1037 table : `sqlalchemy.schema.Table` 

1038 SQLAlchemy representation of the table. 

1039 

1040 Raises 

1041 ------ 

1042 ReadOnlyDatabaseError 

1043 Raised if `isWriteable` returns `False`, and the table does not 

1044 already exist. 

1045 DatabaseConflictError 

1046 Raised if the table exists but ``spec`` is inconsistent with its 

1047 definition. 

1048 

1049 Notes 

1050 ----- 

1051 This method may not be called within transactions. It may be called on 

1052 read-only databases if and only if the table does in fact already 

1053 exist. 

1054 

1055 Subclasses may override this method, but usually should not need to. 

1056 """ 

1057 # TODO: if _engine is used to make a table then it uses separate 

1058 # connection and should not interfere with current transaction 

1059 assert ( 

1060 self._session_connection is None or not self._session_connection.in_transaction() 

1061 ), "Table creation interrupts transactions." 

1062 assert self._metadata is not None, "Static tables must be declared before dynamic tables." 

1063 table = self.getExistingTable(name, spec) 

1064 if table is not None: 

1065 return table 

1066 if not self.isWriteable(): 

1067 raise ReadOnlyDatabaseError( 

1068 f"Table {name} does not exist, and cannot be created because database {self} is read-only." 

1069 ) 

1070 table = self._convertTableSpec(name, spec, self._metadata) 

1071 for foreignKeySpec in spec.foreignKeys: 

1072 table.append_constraint(self._convertForeignKeySpec(name, foreignKeySpec, self._metadata)) 

1073 try: 

1074 with self._transaction() as (_, connection): 

1075 table.create(connection) 

1076 except sqlalchemy.exc.DatabaseError: 

1077 # Some other process could have created the table meanwhile, which 

1078 # usually causes OperationalError or ProgrammingError. We cannot 

1079 # use IF NOT EXISTS clause in this case due to PostgreSQL race 

1080 # condition on server side which causes IntegrityError. Instead we 

1081 # catch these exceptions (they all inherit DatabaseError) and 

1082 # re-check whether table is now there. 

1083 table = self.getExistingTable(name, spec) 

1084 if table is None: 

1085 raise 

1086 return table 

1087 

1088 def getExistingTable(self, name: str, spec: ddl.TableSpec) -> sqlalchemy.schema.Table | None: 

1089 """Obtain an existing table with the given name and specification. 

1090 

1091 Parameters 

1092 ---------- 

1093 name : `str` 

1094 Name of the table (not including namespace qualifiers). 

1095 spec : `TableSpec` 

1096 Specification for the table. This will be used when creating the 

1097 SQLAlchemy representation of the table, and it is used to 

1098 check that the actual table in the database is consistent. 

1099 

1100 Returns 

1101 ------- 

1102 table : `sqlalchemy.schema.Table` or `None` 

1103 SQLAlchemy representation of the table, or `None` if it does not 

1104 exist. 

1105 

1106 Raises 

1107 ------ 

1108 DatabaseConflictError 

1109 Raised if the table exists but ``spec`` is inconsistent with its 

1110 definition. 

1111 

1112 Notes 

1113 ----- 

1114 This method can be called within transactions and never modifies the 

1115 database. 

1116 

1117 Subclasses may override this method, but usually should not need to. 

1118 """ 

1119 assert self._metadata is not None, "Static tables must be declared before dynamic tables." 

1120 name = self._mangleTableName(name) 

1121 table = self._metadata.tables.get(name if self.namespace is None else f"{self.namespace}.{name}") 

1122 if table is not None: 

1123 if spec.fields.names != set(table.columns.keys()): 

1124 raise DatabaseConflictError( 

1125 f"Table '{name}' has already been defined differently; the new " 

1126 f"specification has columns {list(spec.fields.names)}, while " 

1127 f"the previous definition has {list(table.columns.keys())}." 

1128 ) 

1129 else: 

1130 inspector = sqlalchemy.inspect( 

1131 self._engine if self._session_connection is None else self._session_connection, raiseerr=True 

1132 ) 

1133 if name in inspector.get_table_names(schema=self.namespace): 

1134 _checkExistingTableDefinition(name, spec, inspector.get_columns(name, schema=self.namespace)) 

1135 table = self._convertTableSpec(name, spec, self._metadata) 

1136 for foreignKeySpec in spec.foreignKeys: 

1137 table.append_constraint(self._convertForeignKeySpec(name, foreignKeySpec, self._metadata)) 

1138 return table 

1139 return table 

1140 

1141 def _make_temporary_table( 

1142 self, 

1143 connection: sqlalchemy.engine.Connection, 

1144 spec: ddl.TableSpec, 

1145 name: str | None = None, 

1146 **kwargs: Any, 

1147 ) -> sqlalchemy.schema.Table: 

1148 """Create a temporary table. 

1149 

1150 Parameters 

1151 ---------- 

1152 connection : `sqlalchemy.engine.Connection` 

1153 Connection to use when creating the table. 

1154 spec : `TableSpec` 

1155 Specification for the table. 

1156 name : `str`, optional 

1157 A unique (within this session/connetion) name for the table. 

1158 Subclasses may override to modify the actual name used. If not 

1159 provided, a unique name will be generated. 

1160 **kwargs 

1161 Additional keyword arguments to forward to the 

1162 `sqlalchemy.schema.Table` constructor. This is provided to make it 

1163 easier for derived classes to delegate to ``super()`` while making 

1164 only minor changes. 

1165 

1166 Returns 

1167 ------- 

1168 table : `sqlalchemy.schema.Table` 

1169 SQLAlchemy representation of the table. 

1170 """ 

1171 if name is None: 

1172 name = f"tmp_{uuid.uuid4().hex}" 

1173 metadata = self._metadata 

1174 if metadata is None: 

1175 raise RuntimeError("Cannot create temporary table before static schema is defined.") 

1176 table = self._convertTableSpec( 

1177 name, spec, metadata, prefixes=["TEMPORARY"], schema=sqlalchemy.schema.BLANK_SCHEMA, **kwargs 

1178 ) 

1179 if table.key in self._temp_tables and table.key != name: 

1180 raise ValueError( 

1181 f"A temporary table with name {name} (transformed to {table.key} by " 

1182 "Database) already exists." 

1183 ) 

1184 for foreignKeySpec in spec.foreignKeys: 

1185 table.append_constraint(self._convertForeignKeySpec(name, foreignKeySpec, metadata)) 

1186 with self._transaction(): 

1187 table.create(connection) 

1188 return table 

1189 

1190 @classmethod 

1191 def getTimespanRepresentation(cls) -> type[TimespanDatabaseRepresentation]: 

1192 """Return a `type` that encapsulates the way `Timespan` objects are 

1193 stored in this database. 

1194 

1195 `Database` does not automatically use the return type of this method 

1196 anywhere else; calling code is responsible for making sure that DDL 

1197 and queries are consistent with it. 

1198 

1199 Returns 

1200 ------- 

1201 TimespanReprClass : `type` (`TimespanDatabaseRepresention` subclass) 

1202 A type that encapsulates the way `Timespan` objects should be 

1203 stored in this database. 

1204 

1205 Notes 

1206 ----- 

1207 There are two big reasons we've decided to keep timespan-mangling logic 

1208 outside the `Database` implementations, even though the choice of 

1209 representation is ultimately up to a `Database` implementation: 

1210 

1211 - Timespans appear in relatively few tables and queries in our 

1212 typical usage, and the code that operates on them is already aware 

1213 that it is working with timespans. In contrast, a 

1214 timespan-representation-aware implementation of, say, `insert`, 

1215 would need to have extra logic to identify when timespan-mangling 

1216 needed to occur, which would usually be useless overhead. 

1217 

1218 - SQLAlchemy's rich SELECT query expression system has no way to wrap 

1219 multiple columns in a single expression object (the ORM does, but 

1220 we are not using the ORM). So we would have to wrap _much_ more of 

1221 that code in our own interfaces to encapsulate timespan 

1222 representations there. 

1223 """ 

1224 return TimespanDatabaseRepresentation.Compound 

1225 

1226 def sync( 

1227 self, 

1228 table: sqlalchemy.schema.Table, 

1229 *, 

1230 keys: dict[str, Any], 

1231 compared: dict[str, Any] | None = None, 

1232 extra: dict[str, Any] | None = None, 

1233 returning: Sequence[str] | None = None, 

1234 update: bool = False, 

1235 ) -> tuple[dict[str, Any] | None, bool | dict[str, Any]]: 

1236 """Insert into a table as necessary to ensure database contains 

1237 values equivalent to the given ones. 

1238 

1239 Parameters 

1240 ---------- 

1241 table : `sqlalchemy.schema.Table` 

1242 Table to be queried and possibly inserted into. 

1243 keys : `dict` 

1244 Column name-value pairs used to search for an existing row; must 

1245 be a combination that can be used to select a single row if one 

1246 exists. If such a row does not exist, these values are used in 

1247 the insert. 

1248 compared : `dict`, optional 

1249 Column name-value pairs that are compared to those in any existing 

1250 row. If such a row does not exist, these rows are used in the 

1251 insert. 

1252 extra : `dict`, optional 

1253 Column name-value pairs that are ignored if a matching row exists, 

1254 but used in an insert if one is necessary. 

1255 returning : `~collections.abc.Sequence` of `str`, optional 

1256 The names of columns whose values should be returned. 

1257 update : `bool`, optional 

1258 If `True` (`False` is default), update the existing row with the 

1259 values in ``compared`` instead of raising `DatabaseConflictError`. 

1260 

1261 Returns 

1262 ------- 

1263 row : `dict`, optional 

1264 The value of the fields indicated by ``returning``, or `None` if 

1265 ``returning`` is `None`. 

1266 inserted_or_updated : `bool` or `dict` 

1267 If `True`, a new row was inserted; if `False`, a matching row 

1268 already existed. If a `dict` (only possible if ``update=True``), 

1269 then an existing row was updated, and the dict maps the names of 

1270 the updated columns to their *old* values (new values can be 

1271 obtained from ``compared``). 

1272 

1273 Raises 

1274 ------ 

1275 DatabaseConflictError 

1276 Raised if the values in ``compared`` do not match the values in the 

1277 database. 

1278 ReadOnlyDatabaseError 

1279 Raised if `isWriteable` returns `False`, and no matching record 

1280 already exists. 

1281 

1282 Notes 

1283 ----- 

1284 May be used inside transaction contexts, so implementations may not 

1285 perform operations that interrupt transactions. 

1286 

1287 It may be called on read-only databases if and only if the matching row 

1288 does in fact already exist. 

1289 """ 

1290 

1291 def check() -> tuple[int, dict[str, Any] | None, list | None]: 

1292 """Query for a row that matches the ``key`` argument, and compare 

1293 to what was given by the caller. 

1294 

1295 Returns 

1296 ------- 

1297 n : `int` 

1298 Number of matching rows. ``n != 1`` is always an error, but 

1299 it's a different kind of error depending on where `check` is 

1300 being called. 

1301 bad : `dict` or `None` 

1302 The subset of the keys of ``compared`` for which the existing 

1303 values did not match the given one, mapped to the existing 

1304 values in the database. Once again, ``not bad`` is always an 

1305 error, but a different kind on context. `None` if ``n != 1`` 

1306 result : `list` or `None` 

1307 Results in the database that correspond to the columns given 

1308 in ``returning``, or `None` if ``returning is None``. 

1309 """ 

1310 toSelect: set[str] = set() 

1311 if compared is not None: 

1312 toSelect.update(compared.keys()) 

1313 if returning is not None: 

1314 toSelect.update(returning) 

1315 if not toSelect: 

1316 # Need to select some column, even if we just want to see 

1317 # how many rows we get back. 

1318 toSelect.add(next(iter(keys.keys()))) 

1319 selectSql = ( 

1320 sqlalchemy.sql.select(*[table.columns[k].label(k) for k in toSelect]) 

1321 .select_from(table) 

1322 .where(sqlalchemy.sql.and_(*[table.columns[k] == v for k, v in keys.items()])) 

1323 ) 

1324 with self._transaction() as (_, connection): 

1325 fetched = list(connection.execute(selectSql).mappings()) 

1326 if len(fetched) != 1: 

1327 return len(fetched), None, None 

1328 existing = fetched[0] 

1329 if compared is not None: 

1330 

1331 def safeNotEqual(a: Any, b: Any) -> bool: 

1332 if isinstance(a, astropy.time.Time): 

1333 return not time_utils.TimeConverter().times_equal(a, b) 

1334 return a != b 

1335 

1336 inconsistencies = { 

1337 k: existing[k] for k, v in compared.items() if safeNotEqual(existing[k], v) 

1338 } 

1339 else: 

1340 inconsistencies = {} 

1341 if returning is not None: 

1342 toReturn: list | None = [existing[k] for k in returning] 

1343 else: 

1344 toReturn = None 

1345 return 1, inconsistencies, toReturn 

1346 

1347 def format_bad(inconsistencies: dict[str, Any]) -> str: 

1348 """Format the 'bad' dictionary of existing values returned by 

1349 ``check`` into a string suitable for an error message. 

1350 """ 

1351 assert compared is not None, "Should not be able to get inconsistencies without comparing." 

1352 return ", ".join(f"{k}: {v!r} != {compared[k]!r}" for k, v in inconsistencies.items()) 

1353 

1354 if self.isTableWriteable(table): 

1355 # Try an insert first, but allow it to fail (in only specific 

1356 # ways). 

1357 row = keys.copy() 

1358 if compared is not None: 

1359 row.update(compared) 

1360 if extra is not None: 

1361 row.update(extra) 

1362 with self.transaction(): 

1363 inserted = bool(self.ensure(table, row)) 

1364 inserted_or_updated: bool | dict[str, Any] 

1365 # Need to perform check() for this branch inside the 

1366 # transaction, so we roll back an insert that didn't do 

1367 # what we expected. That limits the extent to which we 

1368 # can reduce duplication between this block and the other 

1369 # ones that perform similar logic. 

1370 n, bad, result = check() 

1371 if n < 1: 

1372 raise ConflictingDefinitionError( 

1373 f"Attempted to ensure {row} exists by inserting it with ON CONFLICT IGNORE, " 

1374 f"but a post-insert query on {keys} returned no results. " 

1375 f"Insert was {'' if inserted else 'not '}reported as successful. " 

1376 "This can occur if the insert violated a database constraint other than the " 

1377 "unique constraint or primary key used to identify the row in this call." 

1378 ) 

1379 elif n > 1: 

1380 raise RuntimeError( 

1381 f"Keys passed to sync {keys.keys()} do not comprise a " 

1382 f"unique constraint for table {table.name}." 

1383 ) 

1384 elif bad: 

1385 assert ( 

1386 compared is not None 

1387 ), "Should not be able to get inconsistencies without comparing." 

1388 if inserted: 

1389 raise RuntimeError( 

1390 f"Conflict ({bad}) in sync after successful insert; this is " 

1391 "possible if the same table is being updated by a concurrent " 

1392 "process that isn't using sync, but it may also be a bug in " 

1393 "daf_butler." 

1394 ) 

1395 elif update: 

1396 with self._transaction() as (_, connection): 

1397 connection.execute( 

1398 table.update() 

1399 .where(sqlalchemy.sql.and_(*[table.columns[k] == v for k, v in keys.items()])) 

1400 .values(**{k: compared[k] for k in bad}) 

1401 ) 

1402 inserted_or_updated = bad 

1403 else: 

1404 raise DatabaseConflictError( 

1405 f"Conflict in sync for table {table.name} on column(s) {format_bad(bad)}." 

1406 ) 

1407 else: 

1408 inserted_or_updated = inserted 

1409 else: 

1410 # Database is not writeable; just see if the row exists. 

1411 n, bad, result = check() 

1412 if n < 1: 

1413 raise ReadOnlyDatabaseError("sync needs to insert, but database is read-only.") 

1414 elif n > 1: 

1415 raise RuntimeError("Keys passed to sync do not comprise a unique constraint.") 

1416 elif bad: 

1417 if update: 

1418 raise ReadOnlyDatabaseError("sync needs to update, but database is read-only.") 

1419 else: 

1420 raise DatabaseConflictError( 

1421 f"Conflict in sync for table {table.name} on column(s) {format_bad(bad)}." 

1422 ) 

1423 inserted_or_updated = False 

1424 if returning is None: 

1425 return None, inserted_or_updated 

1426 else: 

1427 assert result is not None 

1428 return dict(zip(returning, result, strict=True)), inserted_or_updated 

1429 

1430 def insert( 

1431 self, 

1432 table: sqlalchemy.schema.Table, 

1433 *rows: dict, 

1434 returnIds: bool = False, 

1435 select: sqlalchemy.sql.expression.SelectBase | None = None, 

1436 names: Iterable[str] | None = None, 

1437 ) -> list[int] | None: 

1438 """Insert one or more rows into a table, optionally returning 

1439 autoincrement primary key values. 

1440 

1441 Parameters 

1442 ---------- 

1443 table : `sqlalchemy.schema.Table` 

1444 Table rows should be inserted into. 

1445 returnIds: `bool` 

1446 If `True` (`False` is default), return the values of the table's 

1447 autoincrement primary key field (which much exist). 

1448 select : `sqlalchemy.sql.SelectBase`, optional 

1449 A SELECT query expression to insert rows from. Cannot be provided 

1450 with either ``rows`` or ``returnIds=True``. 

1451 names : `~collections.abc.Iterable` [ `str` ], optional 

1452 Names of columns in ``table`` to be populated, ordered to match the 

1453 columns returned by ``select``. Ignored if ``select`` is `None`. 

1454 If not provided, the columns returned by ``select`` must be named 

1455 to match the desired columns of ``table``. 

1456 *rows 

1457 Positional arguments are the rows to be inserted, as dictionaries 

1458 mapping column name to value. The keys in all dictionaries must 

1459 be the same. 

1460 

1461 Returns 

1462 ------- 

1463 ids : `None`, or `list` of `int` 

1464 If ``returnIds`` is `True`, a `list` containing the inserted 

1465 values for the table's autoincrement primary key. 

1466 

1467 Raises 

1468 ------ 

1469 ReadOnlyDatabaseError 

1470 Raised if `isWriteable` returns `False` when this method is called. 

1471 

1472 Notes 

1473 ----- 

1474 The default implementation uses bulk insert syntax when ``returnIds`` 

1475 is `False`, and a loop over single-row insert operations when it is 

1476 `True`. 

1477 

1478 Derived classes should reimplement when they can provide a more 

1479 efficient implementation (especially for the latter case). 

1480 

1481 May be used inside transaction contexts, so implementations may not 

1482 perform operations that interrupt transactions. 

1483 """ 

1484 self.assertTableWriteable(table, f"Cannot insert into read-only table {table}.") 

1485 if select is not None and (rows or returnIds): 

1486 raise TypeError("'select' is incompatible with passing value rows or returnIds=True.") 

1487 if not rows and select is None: 

1488 if returnIds: 

1489 return [] 

1490 else: 

1491 return None 

1492 with self._transaction() as (_, connection): 

1493 if not returnIds: 

1494 if select is not None: 

1495 if names is None: 

1496 # columns() is deprecated since 1.4, but 

1497 # selected_columns() method did not exist in 1.3. 

1498 if hasattr(select, "selected_columns"): 

1499 names = select.selected_columns.keys() 

1500 else: 

1501 names = select.columns.keys() 

1502 connection.execute(table.insert().from_select(list(names), select)) 

1503 else: 

1504 connection.execute(table.insert(), rows) 

1505 return None 

1506 else: 

1507 sql = table.insert() 

1508 return [connection.execute(sql, row).inserted_primary_key[0] for row in rows] 

1509 

1510 @abstractmethod 

1511 def replace(self, table: sqlalchemy.schema.Table, *rows: dict) -> None: 

1512 """Insert one or more rows into a table, replacing any existing rows 

1513 for which insertion of a new row would violate the primary key 

1514 constraint. 

1515 

1516 Parameters 

1517 ---------- 

1518 table : `sqlalchemy.schema.Table` 

1519 Table rows should be inserted into. 

1520 *rows 

1521 Positional arguments are the rows to be inserted, as dictionaries 

1522 mapping column name to value. The keys in all dictionaries must 

1523 be the same. 

1524 

1525 Raises 

1526 ------ 

1527 ReadOnlyDatabaseError 

1528 Raised if `isWriteable` returns `False` when this method is called. 

1529 

1530 Notes 

1531 ----- 

1532 May be used inside transaction contexts, so implementations may not 

1533 perform operations that interrupt transactions. 

1534 

1535 Implementations should raise a `sqlalchemy.exc.IntegrityError` 

1536 exception when a constraint other than the primary key would be 

1537 violated. 

1538 

1539 Implementations are not required to support `replace` on tables 

1540 with autoincrement keys. 

1541 """ 

1542 raise NotImplementedError() 

1543 

1544 @abstractmethod 

1545 def ensure(self, table: sqlalchemy.schema.Table, *rows: dict, primary_key_only: bool = False) -> int: 

1546 """Insert one or more rows into a table, skipping any rows for which 

1547 insertion would violate a unique constraint. 

1548 

1549 Parameters 

1550 ---------- 

1551 table : `sqlalchemy.schema.Table` 

1552 Table rows should be inserted into. 

1553 *rows 

1554 Positional arguments are the rows to be inserted, as dictionaries 

1555 mapping column name to value. The keys in all dictionaries must 

1556 be the same. 

1557 primary_key_only : `bool`, optional 

1558 If `True` (`False` is default), only skip rows that violate the 

1559 primary key constraint, and raise an exception (and rollback 

1560 transactions) for other constraint violations. 

1561 

1562 Returns 

1563 ------- 

1564 count : `int` 

1565 The number of rows actually inserted. 

1566 

1567 Raises 

1568 ------ 

1569 ReadOnlyDatabaseError 

1570 Raised if `isWriteable` returns `False` when this method is called. 

1571 This is raised even if the operation would do nothing even on a 

1572 writeable database. 

1573 

1574 Notes 

1575 ----- 

1576 May be used inside transaction contexts, so implementations may not 

1577 perform operations that interrupt transactions. 

1578 

1579 Implementations are not required to support `ensure` on tables 

1580 with autoincrement keys. 

1581 """ 

1582 raise NotImplementedError() 

1583 

1584 def delete(self, table: sqlalchemy.schema.Table, columns: Iterable[str], *rows: dict) -> int: 

1585 """Delete one or more rows from a table. 

1586 

1587 Parameters 

1588 ---------- 

1589 table : `sqlalchemy.schema.Table` 

1590 Table that rows should be deleted from. 

1591 columns: `~collections.abc.Iterable` of `str` 

1592 The names of columns that will be used to constrain the rows to 

1593 be deleted; these will be combined via ``AND`` to form the 

1594 ``WHERE`` clause of the delete query. 

1595 *rows 

1596 Positional arguments are the keys of rows to be deleted, as 

1597 dictionaries mapping column name to value. The keys in all 

1598 dictionaries must be exactly the names in ``columns``. 

1599 

1600 Returns 

1601 ------- 

1602 count : `int` 

1603 Number of rows deleted. 

1604 

1605 Raises 

1606 ------ 

1607 ReadOnlyDatabaseError 

1608 Raised if `isWriteable` returns `False` when this method is called. 

1609 

1610 Notes 

1611 ----- 

1612 May be used inside transaction contexts, so implementations may not 

1613 perform operations that interrupt transactions. 

1614 

1615 The default implementation should be sufficient for most derived 

1616 classes. 

1617 """ 

1618 self.assertTableWriteable(table, f"Cannot delete from read-only table {table}.") 

1619 if columns and not rows: 

1620 # If there are no columns, this operation is supposed to delete 

1621 # everything (so we proceed as usual). But if there are columns, 

1622 # but no rows, it was a constrained bulk operation where the 

1623 # constraint is that no rows match, and we should short-circuit 

1624 # while reporting that no rows were affected. 

1625 return 0 

1626 sql = table.delete() 

1627 columns = list(columns) # Force iterators to list 

1628 

1629 # More efficient to use IN operator if there is only one 

1630 # variable changing across all rows. 

1631 content: dict[str, set] = defaultdict(set) 

1632 if len(columns) == 1: 

1633 # Nothing to calculate since we can always use IN 

1634 column = columns[0] 

1635 changing_columns = [column] 

1636 content[column] = {row[column] for row in rows} 

1637 else: 

1638 for row in rows: 

1639 for k, v in row.items(): 

1640 content[k].add(v) 

1641 changing_columns = [col for col, values in content.items() if len(values) > 1] 

1642 

1643 if len(changing_columns) != 1: 

1644 # More than one column changes each time so do explicit bind 

1645 # parameters and have each row processed separately. 

1646 whereTerms = [table.columns[name] == sqlalchemy.sql.bindparam(name) for name in columns] 

1647 if whereTerms: 

1648 sql = sql.where(sqlalchemy.sql.and_(*whereTerms)) 

1649 with self._transaction() as (_, connection): 

1650 return connection.execute(sql, rows).rowcount 

1651 else: 

1652 # One of the columns has changing values but any others are 

1653 # fixed. In this case we can use an IN operator and be more 

1654 # efficient. 

1655 name = changing_columns.pop() 

1656 

1657 # Simple where clause for the unchanging columns 

1658 clauses = [] 

1659 for k, v in content.items(): 

1660 if k == name: 

1661 continue 

1662 column = table.columns[k] 

1663 # The set only has one element 

1664 clauses.append(column == v.pop()) 

1665 

1666 # The IN operator will not work for "infinite" numbers of 

1667 # rows so must batch it up into distinct calls. 

1668 in_content = list(content[name]) 

1669 n_elements = len(in_content) 

1670 

1671 rowcount = 0 

1672 iposn = 0 

1673 n_per_loop = 1_000 # Controls how many items to put in IN clause 

1674 with self._transaction() as (_, connection): 

1675 for iposn in range(0, n_elements, n_per_loop): 

1676 endpos = iposn + n_per_loop 

1677 in_clause = table.columns[name].in_(in_content[iposn:endpos]) 

1678 

1679 newsql = sql.where(sqlalchemy.sql.and_(*clauses, in_clause)) 

1680 rowcount += connection.execute(newsql).rowcount 

1681 return rowcount 

1682 

1683 def deleteWhere(self, table: sqlalchemy.schema.Table, where: sqlalchemy.sql.ColumnElement) -> int: 

1684 """Delete rows from a table with pre-constructed WHERE clause. 

1685 

1686 Parameters 

1687 ---------- 

1688 table : `sqlalchemy.schema.Table` 

1689 Table that rows should be deleted from. 

1690 where: `sqlalchemy.sql.ClauseElement` 

1691 The names of columns that will be used to constrain the rows to 

1692 be deleted; these will be combined via ``AND`` to form the 

1693 ``WHERE`` clause of the delete query. 

1694 

1695 Returns 

1696 ------- 

1697 count : `int` 

1698 Number of rows deleted. 

1699 

1700 Raises 

1701 ------ 

1702 ReadOnlyDatabaseError 

1703 Raised if `isWriteable` returns `False` when this method is called. 

1704 

1705 Notes 

1706 ----- 

1707 May be used inside transaction contexts, so implementations may not 

1708 perform operations that interrupt transactions. 

1709 

1710 The default implementation should be sufficient for most derived 

1711 classes. 

1712 """ 

1713 self.assertTableWriteable(table, f"Cannot delete from read-only table {table}.") 

1714 

1715 sql = table.delete().where(where) 

1716 with self._transaction() as (_, connection): 

1717 return connection.execute(sql).rowcount 

1718 

1719 def update(self, table: sqlalchemy.schema.Table, where: dict[str, str], *rows: dict) -> int: 

1720 """Update one or more rows in a table. 

1721 

1722 Parameters 

1723 ---------- 

1724 table : `sqlalchemy.schema.Table` 

1725 Table containing the rows to be updated. 

1726 where : `dict` [`str`, `str`] 

1727 A mapping from the names of columns that will be used to search for 

1728 existing rows to the keys that will hold these values in the 

1729 ``rows`` dictionaries. Note that these may not be the same due to 

1730 SQLAlchemy limitations. 

1731 *rows 

1732 Positional arguments are the rows to be updated. The keys in all 

1733 dictionaries must be the same, and may correspond to either a 

1734 value in the ``where`` dictionary or the name of a column to be 

1735 updated. 

1736 

1737 Returns 

1738 ------- 

1739 count : `int` 

1740 Number of rows matched (regardless of whether the update actually 

1741 modified them). 

1742 

1743 Raises 

1744 ------ 

1745 ReadOnlyDatabaseError 

1746 Raised if `isWriteable` returns `False` when this method is called. 

1747 

1748 Notes 

1749 ----- 

1750 May be used inside transaction contexts, so implementations may not 

1751 perform operations that interrupt transactions. 

1752 

1753 The default implementation should be sufficient for most derived 

1754 classes. 

1755 """ 

1756 self.assertTableWriteable(table, f"Cannot update read-only table {table}.") 

1757 if not rows: 

1758 return 0 

1759 sql = table.update().where( 

1760 sqlalchemy.sql.and_(*[table.columns[k] == sqlalchemy.sql.bindparam(v) for k, v in where.items()]) 

1761 ) 

1762 with self._transaction() as (_, connection): 

1763 return connection.execute(sql, rows).rowcount 

1764 

1765 @contextmanager 

1766 def query( 

1767 self, 

1768 sql: sqlalchemy.sql.expression.Executable | sqlalchemy.sql.expression.SelectBase, 

1769 *args: Any, 

1770 **kwargs: Any, 

1771 ) -> Iterator[sqlalchemy.engine.CursorResult]: 

1772 """Run a SELECT query against the database. 

1773 

1774 Parameters 

1775 ---------- 

1776 sql : `sqlalchemy.sql.expression.SelectBase` 

1777 A SQLAlchemy representation of a ``SELECT`` query. 

1778 *args 

1779 Additional positional arguments are forwarded to 

1780 `sqlalchemy.engine.Connection.execute`. 

1781 **kwargs 

1782 Additional keyword arguments are forwarded to 

1783 `sqlalchemy.engine.Connection.execute`. 

1784 

1785 Returns 

1786 ------- 

1787 result_context : `sqlalchemy.engine.CursorResults` 

1788 Context manager that returns the query result object when entered. 

1789 These results are invalidated when the context is exited. 

1790 """ 

1791 if self._session_connection is None: 

1792 connection = self._engine.connect() 

1793 else: 

1794 connection = self._session_connection 

1795 # TODO: SelectBase is not good for execute(), but it used everywhere, 

1796 # e.g. in daf_relation. We should switch to Executable at some point. 

1797 result = connection.execute(cast(sqlalchemy.sql.expression.Executable, sql), *args, **kwargs) 

1798 try: 

1799 yield result 

1800 finally: 

1801 if connection is not self._session_connection: 

1802 connection.close() 

1803 

1804 @abstractmethod 

1805 def constant_rows( 

1806 self, 

1807 fields: NamedValueAbstractSet[ddl.FieldSpec], 

1808 *rows: dict, 

1809 name: str | None = None, 

1810 ) -> sqlalchemy.sql.FromClause: 

1811 """Return a SQLAlchemy object that represents a small number of 

1812 constant-valued rows. 

1813 

1814 Parameters 

1815 ---------- 

1816 fields : `NamedValueAbstractSet` [ `ddl.FieldSpec` ] 

1817 The columns of the rows. Unique and foreign key constraints are 

1818 ignored. 

1819 *rows : `dict` 

1820 Values for the rows. 

1821 name : `str`, optional 

1822 If provided, the name of the SQL construct. If not provided, an 

1823 opaque but unique identifier is generated. 

1824 

1825 Returns 

1826 ------- 

1827 from_clause : `sqlalchemy.sql.FromClause` 

1828 SQLAlchemy object representing the given rows. This is guaranteed 

1829 to be something that can be directly joined into a ``SELECT`` 

1830 query's ``FROM`` clause, and will not involve a temporary table 

1831 that needs to be cleaned up later. 

1832 

1833 Notes 

1834 ----- 

1835 The default implementation uses the SQL-standard ``VALUES`` construct, 

1836 but support for that construct is varied enough across popular RDBMSs 

1837 that the method is still marked abstract to force explicit opt-in via 

1838 delegation to `super`. 

1839 """ 

1840 if name is None: 

1841 name = f"tmp_{uuid.uuid4().hex}" 

1842 return sqlalchemy.sql.values( 

1843 *[sqlalchemy.Column(field.name, field.getSizedColumnType()) for field in fields], 

1844 name=name, 

1845 ).data([tuple(row[name] for name in fields.names) for row in rows]) 

1846 

1847 def get_constant_rows_max(self) -> int: 

1848 """Return the maximum number of rows that should be passed to 

1849 `constant_rows` for this backend. 

1850 

1851 Returns 

1852 ------- 

1853 max : `int` 

1854 Maximum number of rows. 

1855 

1856 Notes 

1857 ----- 

1858 This should reflect typical performance profiles (or a guess at these), 

1859 not just hard database engine limits. 

1860 """ 

1861 return 100 

1862 

1863 origin: int 

1864 """An integer ID that should be used as the default for any datasets, 

1865 quanta, or other entities that use a (autoincrement, origin) compound 

1866 primary key (`int`). 

1867 """ 

1868 

1869 namespace: str | None 

1870 """The schema or namespace this database instance is associated with 

1871 (`str` or `None`). 

1872 """