Coverage for python/lsst/daf/butler/registry/interfaces/_database.py: 23%

411 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-12-05 11:07 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27from __future__ import annotations 

28 

29from ... import ddl, time_utils 

30 

31__all__ = [ 

32 "Database", 

33 "ReadOnlyDatabaseError", 

34 "DatabaseConflictError", 

35 "DatabaseInsertMode", 

36 "SchemaAlreadyDefinedError", 

37 "StaticTablesContext", 

38] 

39 

40import enum 

41import uuid 

42import warnings 

43from abc import ABC, abstractmethod 

44from collections import defaultdict 

45from collections.abc import Callable, Iterable, Iterator, Sequence 

46from contextlib import contextmanager 

47from typing import Any, cast, final 

48 

49import astropy.time 

50import sqlalchemy 

51 

52from ..._named import NamedValueAbstractSet 

53from ..._timespan import TimespanDatabaseRepresentation 

54from .._exceptions import ConflictingDefinitionError 

55 

56 

57class DatabaseInsertMode(enum.Enum): 

58 """Mode options available for inserting database records.""" 

59 

60 INSERT = enum.auto() 

61 """Insert records, failing if they already exist.""" 

62 

63 REPLACE = enum.auto() 

64 """Replace records, overwriting existing.""" 

65 

66 ENSURE = enum.auto() 

67 """Insert records, skipping any that already exist.""" 

68 

69 

70# TODO: method is called with list[ReflectedColumn] in SA 2, and 

71# ReflectedColumn does not exist in 1.4. 

72def _checkExistingTableDefinition(name: str, spec: ddl.TableSpec, inspection: list) -> None: 

73 """Test that the definition of a table in a `ddl.TableSpec` and from 

74 database introspection are consistent. 

75 

76 Parameters 

77 ---------- 

78 name : `str` 

79 Name of the table (only used in error messages). 

80 spec : `ddl.TableSpec` 

81 Specification of the table. 

82 inspection : `dict` 

83 Dictionary returned by 

84 `sqlalchemy.engine.reflection.Inspector.get_columns`. 

85 

86 Raises 

87 ------ 

88 DatabaseConflictError 

89 Raised if the definitions are inconsistent. 

90 """ 

91 columnNames = [c["name"] for c in inspection] 

92 if spec.fields.names != set(columnNames): 

93 raise DatabaseConflictError( 

94 f"Table '{name}' exists but is defined differently in the database; " 

95 f"specification has columns {list(spec.fields.names)}, while the " 

96 f"table in the database has {columnNames}." 

97 ) 

98 

99 

100class ReadOnlyDatabaseError(RuntimeError): 

101 """Exception raised when a write operation is called on a read-only 

102 `Database`. 

103 """ 

104 

105 

106class DatabaseConflictError(ConflictingDefinitionError): 

107 """Exception raised when database content (row values or schema entities) 

108 are inconsistent with what this client expects. 

109 """ 

110 

111 

112class SchemaAlreadyDefinedError(RuntimeError): 

113 """Exception raised when trying to initialize database schema when some 

114 tables already exist. 

115 """ 

116 

117 

118class StaticTablesContext: 

119 """Helper class used to declare the static schema for a registry layer 

120 in a database. 

121 

122 An instance of this class is returned by `Database.declareStaticTables`, 

123 which should be the only way it should be constructed. 

124 """ 

125 

126 def __init__(self, db: Database, connection: sqlalchemy.engine.Connection): 

127 self._db = db 

128 self._foreignKeys: list[tuple[sqlalchemy.schema.Table, sqlalchemy.schema.ForeignKeyConstraint]] = [] 

129 self._inspector = sqlalchemy.inspect(connection) 

130 self._tableNames = frozenset(self._inspector.get_table_names(schema=self._db.namespace)) 

131 self._initializers: list[Callable[[Database], None]] = [] 

132 

133 def addTable(self, name: str, spec: ddl.TableSpec) -> sqlalchemy.schema.Table: 

134 """Add a new table to the schema, returning its sqlalchemy 

135 representation. 

136 

137 The new table may not actually be created until the end of the 

138 context created by `Database.declareStaticTables`, allowing tables 

139 to be declared in any order even in the presence of foreign key 

140 relationships. 

141 """ 

142 name = self._db._mangleTableName(name) 

143 metadata = self._db._metadata 

144 assert metadata is not None, "Guaranteed by context manager that returns this object." 

145 table = self._db._convertTableSpec(name, spec, metadata) 

146 for foreignKeySpec in spec.foreignKeys: 

147 self._foreignKeys.append((table, self._db._convertForeignKeySpec(name, foreignKeySpec, metadata))) 

148 return table 

149 

150 def addTableTuple(self, specs: tuple[ddl.TableSpec, ...]) -> tuple[sqlalchemy.schema.Table, ...]: 

151 """Add a named tuple of tables to the schema, returning their 

152 SQLAlchemy representations in a named tuple of the same type. 

153 

154 The new tables may not actually be created until the end of the 

155 context created by `Database.declareStaticTables`, allowing tables 

156 to be declared in any order even in the presence of foreign key 

157 relationships. 

158 

159 Notes 

160 ----- 

161 ``specs`` *must* be an instance of a type created by 

162 `collections.namedtuple`, not just regular tuple, and the returned 

163 object is guaranteed to be the same. Because `~collections.namedtuple` 

164 is just a factory for `type` objects, not an actual type itself, 

165 we cannot represent this with type annotations. 

166 """ 

167 return specs._make( # type: ignore 

168 self.addTable(name, spec) for name, spec in zip(specs._fields, specs, strict=True) # type: ignore 

169 ) 

170 

171 def addInitializer(self, initializer: Callable[[Database], None]) -> None: 

172 """Add a method that does one-time initialization of a database. 

173 

174 Initialization can mean anything that changes state of a database 

175 and needs to be done exactly once after database schema was created. 

176 An example for that could be population of schema attributes. 

177 

178 Parameters 

179 ---------- 

180 initializer : callable 

181 Method of a single argument which is a `Database` instance. 

182 """ 

183 self._initializers.append(initializer) 

184 

185 

186class Database(ABC): 

187 """An abstract interface that represents a particular database engine's 

188 representation of a single schema/namespace/database. 

189 

190 Parameters 

191 ---------- 

192 origin : `int` 

193 An integer ID that should be used as the default for any datasets, 

194 quanta, or other entities that use a (autoincrement, origin) compound 

195 primary key. 

196 engine : `sqlalchemy.engine.Engine` 

197 The SQLAlchemy engine for this `Database`. 

198 namespace : `str`, optional 

199 Name of the schema or namespace this instance is associated with. 

200 This is passed as the ``schema`` argument when constructing a 

201 `sqlalchemy.schema.MetaData` instance. We use ``namespace`` instead to 

202 avoid confusion between "schema means namespace" and "schema means 

203 table definitions". 

204 

205 Notes 

206 ----- 

207 `Database` requires all write operations to go through its special named 

208 methods. Our write patterns are sufficiently simple that we don't really 

209 need the full flexibility of SQL insert/update/delete syntax, and we need 

210 non-standard (but common) functionality in these operations sufficiently 

211 often that it seems worthwhile to provide our own generic API. 

212 

213 In contrast, `Database.query` allows arbitrary ``SELECT`` queries (via 

214 their SQLAlchemy representation) to be run, as we expect these to require 

215 significantly more sophistication while still being limited to standard 

216 SQL. 

217 

218 `Database` itself has several underscore-prefixed attributes: 

219 

220 - ``_engine``: SQLAlchemy object representing its engine. 

221 - ``_connection``: method returning a context manager for 

222 `sqlalchemy.engine.Connection` object. 

223 - ``_metadata``: the `sqlalchemy.schema.MetaData` object representing 

224 the tables and other schema entities. 

225 

226 These are considered protected (derived classes may access them, but other 

227 code should not), and read-only, aside from executing SQL via 

228 ``_connection``. 

229 """ 

230 

231 def __init__(self, *, origin: int, engine: sqlalchemy.engine.Engine, namespace: str | None = None): 

232 self.origin = origin 

233 self.namespace = namespace 

234 self._engine = engine 

235 self._session_connection: sqlalchemy.engine.Connection | None = None 

236 self._metadata: sqlalchemy.schema.MetaData | None = None 

237 self._temp_tables: set[str] = set() 

238 

239 def __repr__(self) -> str: 

240 # Rather than try to reproduce all the parameters used to create 

241 # the object, instead report the more useful information of the 

242 # connection URL. 

243 if self._engine.url.password is not None: 

244 uri = str(self._engine.url.set(password="***")) 

245 else: 

246 uri = str(self._engine.url) 

247 if self.namespace: 

248 uri += f"#{self.namespace}" 

249 return f'{type(self).__name__}("{uri}")' 

250 

251 @classmethod 

252 def makeDefaultUri(cls, root: str) -> str | None: 

253 """Create a default connection URI appropriate for the given root 

254 directory, or `None` if there can be no such default. 

255 """ 

256 return None 

257 

258 @classmethod 

259 def fromUri( 

260 cls, 

261 uri: str | sqlalchemy.engine.URL, 

262 *, 

263 origin: int, 

264 namespace: str | None = None, 

265 writeable: bool = True, 

266 ) -> Database: 

267 """Construct a database from a SQLAlchemy URI. 

268 

269 Parameters 

270 ---------- 

271 uri : `str` or `sqlalchemy.engine.URL` 

272 A SQLAlchemy URI connection string. 

273 origin : `int` 

274 An integer ID that should be used as the default for any datasets, 

275 quanta, or other entities that use a (autoincrement, origin) 

276 compound primary key. 

277 namespace : `str`, optional 

278 A database namespace (i.e. schema) the new instance should be 

279 associated with. If `None` (default), the namespace (if any) is 

280 inferred from the URI. 

281 writeable : `bool`, optional 

282 If `True`, allow write operations on the database, including 

283 ``CREATE TABLE``. 

284 

285 Returns 

286 ------- 

287 db : `Database` 

288 A new `Database` instance. 

289 """ 

290 return cls.fromEngine( 

291 cls.makeEngine(uri, writeable=writeable), origin=origin, namespace=namespace, writeable=writeable 

292 ) 

293 

294 @classmethod 

295 @abstractmethod 

296 def makeEngine( 

297 cls, uri: str | sqlalchemy.engine.URL, *, writeable: bool = True 

298 ) -> sqlalchemy.engine.Engine: 

299 """Create a `sqlalchemy.engine.Engine` from a SQLAlchemy URI. 

300 

301 Parameters 

302 ---------- 

303 uri : `str` or `sqlalchemy.engine.URL` 

304 A SQLAlchemy URI connection string. 

305 writeable : `bool`, optional 

306 If `True`, allow write operations on the database, including 

307 ``CREATE TABLE``. 

308 

309 Returns 

310 ------- 

311 engine : `sqlalchemy.engine.Engine` 

312 A database engine. 

313 

314 Notes 

315 ----- 

316 Subclasses that support other ways to connect to a database are 

317 encouraged to add optional arguments to their implementation of this 

318 method, as long as they maintain compatibility with the base class 

319 call signature. 

320 """ 

321 raise NotImplementedError() 

322 

323 @classmethod 

324 @abstractmethod 

325 def fromEngine( 

326 cls, 

327 engine: sqlalchemy.engine.Engine, 

328 *, 

329 origin: int, 

330 namespace: str | None = None, 

331 writeable: bool = True, 

332 ) -> Database: 

333 """Create a new `Database` from an existing `sqlalchemy.engine.Engine`. 

334 

335 Parameters 

336 ---------- 

337 engine : `sqlalchemy.engine.Engine` 

338 The engine for the database. May be shared between `Database` 

339 instances. 

340 origin : `int` 

341 An integer ID that should be used as the default for any datasets, 

342 quanta, or other entities that use a (autoincrement, origin) 

343 compound primary key. 

344 namespace : `str`, optional 

345 A different database namespace (i.e. schema) the new instance 

346 should be associated with. If `None` (default), the namespace 

347 (if any) is inferred from the connection. 

348 writeable : `bool`, optional 

349 If `True`, allow write operations on the database, including 

350 ``CREATE TABLE``. 

351 

352 Returns 

353 ------- 

354 db : `Database` 

355 A new `Database` instance. 

356 

357 Notes 

358 ----- 

359 This method allows different `Database` instances to share the same 

360 engine, which is desirable when they represent different namespaces 

361 can be queried together. 

362 """ 

363 raise NotImplementedError() 

364 

365 @final 

366 @contextmanager 

367 def session(self) -> Iterator[None]: 

368 """Return a context manager that represents a session (persistent 

369 connection to a database). 

370 

371 Returns 

372 ------- 

373 context : `AbstractContextManager` [ `None` ] 

374 A context manager that does not return a value when entered. 

375 

376 Notes 

377 ----- 

378 This method should be used when a sequence of read-only SQL operations 

379 will be performed in rapid succession *without* a requirement that they 

380 yield consistent results in the presence of concurrent writes (or, more 

381 rarely, when conflicting concurrent writes are rare/impossible and the 

382 session will be open long enough that a transaction is inadvisable). 

383 """ 

384 with self._session(): 

385 yield 

386 

387 @final 

388 @contextmanager 

389 def transaction( 

390 self, 

391 *, 

392 interrupting: bool = False, 

393 savepoint: bool = False, 

394 lock: Iterable[sqlalchemy.schema.Table] = (), 

395 for_temp_tables: bool = False, 

396 ) -> Iterator[None]: 

397 """Return a context manager that represents a transaction. 

398 

399 Parameters 

400 ---------- 

401 interrupting : `bool`, optional 

402 If `True` (`False` is default), this transaction block may not be 

403 nested without an outer one, and attempting to do so is a logic 

404 (i.e. assertion) error. 

405 savepoint : `bool`, optional 

406 If `True` (`False` is default), create a `SAVEPOINT`, allowing 

407 exceptions raised by the database (e.g. due to constraint 

408 violations) during this transaction's context to be caught outside 

409 it without also rolling back all operations in an outer transaction 

410 block. If `False`, transactions may still be nested, but a 

411 rollback may be generated at any level and affects all levels, and 

412 commits are deferred until the outermost block completes. If any 

413 outer transaction block was created with ``savepoint=True``, all 

414 inner blocks will be as well (regardless of the actual value 

415 passed). This has no effect if this is the outermost transaction. 

416 lock : `~collections.abc.Iterable` [ `sqlalchemy.schema.Table` ], \ 

417 optional 

418 A list of tables to lock for the duration of this transaction. 

419 These locks are guaranteed to prevent concurrent writes and allow 

420 this transaction (only) to acquire the same locks (others should 

421 block), but only prevent concurrent reads if the database engine 

422 requires that in order to block concurrent writes. 

423 for_temp_tables : `bool`, optional 

424 If `True`, this transaction may involve creating temporary tables. 

425 

426 Returns 

427 ------- 

428 context : `AbstractContextManager` [ `None` ] 

429 A context manager that commits the transaction when it is exited 

430 without error and rolls back the transactoin when it is exited via 

431 an exception. 

432 

433 Notes 

434 ----- 

435 All transactions on a connection managed by one or more `Database` 

436 instances _must_ go through this method, or transaction state will not 

437 be correctly managed. 

438 """ 

439 with self._transaction( 

440 interrupting=interrupting, savepoint=savepoint, lock=lock, for_temp_tables=for_temp_tables 

441 ): 

442 yield 

443 

444 @contextmanager 

445 def temporary_table( 

446 self, spec: ddl.TableSpec, name: str | None = None 

447 ) -> Iterator[sqlalchemy.schema.Table]: 

448 """Return a context manager that creates and then drops a temporary 

449 table. 

450 

451 Parameters 

452 ---------- 

453 spec : `ddl.TableSpec` 

454 Specification for the columns. Unique and foreign key constraints 

455 may be ignored. 

456 name : `str`, optional 

457 If provided, the name of the SQL construct. If not provided, an 

458 opaque but unique identifier is generated. 

459 

460 Returns 

461 ------- 

462 context : `AbstractContextManager` [ `sqlalchemy.schema.Table` ] 

463 A context manager that returns a SQLAlchemy representation of the 

464 temporary table when entered. 

465 

466 Notes 

467 ----- 

468 Temporary tables may be created, dropped, and written to even in 

469 read-only databases - at least according to the Python-level 

470 protections in the `Database` classes. Server permissions may say 

471 otherwise, but in that case they probably need to be modified to 

472 support the full range of expected read-only butler behavior. 

473 """ 

474 with self._session() as connection: 

475 table = self._make_temporary_table(connection, spec=spec, name=name) 

476 self._temp_tables.add(table.key) 

477 try: 

478 yield table 

479 finally: 

480 with self._transaction(): 

481 table.drop(connection) 

482 self._temp_tables.remove(table.key) 

483 

484 @contextmanager 

485 def _session(self) -> Iterator[sqlalchemy.engine.Connection]: 

486 """Protected implementation for `session` that actually returns the 

487 connection. 

488 

489 This method is for internal `Database` calls that need the actual 

490 SQLAlchemy connection object. It should be overridden by subclasses 

491 instead of `session` itself. 

492 

493 Returns 

494 ------- 

495 context : `AbstractContextManager` [ `sqlalchemy.engine.Connection` ] 

496 A context manager that returns a SQLALchemy connection when 

497 entered. 

498 

499 """ 

500 if self._session_connection is not None: 

501 # session already started, just reuse that 

502 yield self._session_connection 

503 else: 

504 try: 

505 # open new connection and close it when done 

506 self._session_connection = self._engine.connect() 

507 yield self._session_connection 

508 finally: 

509 if self._session_connection is not None: 

510 self._session_connection.close() 

511 self._session_connection = None 

512 # Temporary tables only live within session 

513 self._temp_tables = set() 

514 

515 @contextmanager 

516 def _transaction( 

517 self, 

518 *, 

519 interrupting: bool = False, 

520 savepoint: bool = False, 

521 lock: Iterable[sqlalchemy.schema.Table] = (), 

522 for_temp_tables: bool = False, 

523 ) -> Iterator[tuple[bool, sqlalchemy.engine.Connection]]: 

524 """Protected implementation for `transaction` that actually returns the 

525 connection and whether this is a new outermost transaction. 

526 

527 This method is for internal `Database` calls that need the actual 

528 SQLAlchemy connection object. It should be overridden by subclasses 

529 instead of `transaction` itself. 

530 

531 Parameters 

532 ---------- 

533 interrupting : `bool`, optional 

534 If `True` (`False` is default), this transaction block may not be 

535 nested without an outer one, and attempting to do so is a logic 

536 (i.e. assertion) error. 

537 savepoint : `bool`, optional 

538 If `True` (`False` is default), create a `SAVEPOINT`, allowing 

539 exceptions raised by the database (e.g. due to constraint 

540 violations) during this transaction's context to be caught outside 

541 it without also rolling back all operations in an outer transaction 

542 block. If `False`, transactions may still be nested, but a 

543 rollback may be generated at any level and affects all levels, and 

544 commits are deferred until the outermost block completes. If any 

545 outer transaction block was created with ``savepoint=True``, all 

546 inner blocks will be as well (regardless of the actual value 

547 passed). This has no effect if this is the outermost transaction. 

548 lock : `~collections.abc.Iterable` [ `sqlalchemy.schema.Table` ], \ 

549 optional 

550 A list of tables to lock for the duration of this transaction. 

551 These locks are guaranteed to prevent concurrent writes and allow 

552 this transaction (only) to acquire the same locks (others should 

553 block), but only prevent concurrent reads if the database engine 

554 requires that in order to block concurrent writes. 

555 for_temp_tables : `bool`, optional 

556 If `True`, this transaction may involve creating temporary tables. 

557 

558 Returns 

559 ------- 

560 context : `AbstractContextManager` [ `tuple` [ `bool`, 

561 `sqlalchemy.engine.Connection` ] ] 

562 A context manager that commits the transaction when it is exited 

563 without error and rolls back the transactoin when it is exited via 

564 an exception. When entered, it returns a tuple of: 

565 

566 - ``is_new`` (`bool`): whether this is a new (outermost) 

567 transaction; 

568 - ``connection`` (`sqlalchemy.engine.Connection`): the connection. 

569 """ 

570 with self._session() as connection: 

571 already_in_transaction = connection.in_transaction() 

572 assert not (interrupting and already_in_transaction), ( 

573 "Logic error in transaction nesting: an operation that would " 

574 "interrupt the active transaction context has been requested." 

575 ) 

576 savepoint = savepoint or connection.in_nested_transaction() 

577 trans: sqlalchemy.engine.Transaction | None 

578 if already_in_transaction: 

579 if savepoint: 

580 trans = connection.begin_nested() 

581 else: 

582 # Nested non-savepoint transactions don't do anything. 

583 trans = None 

584 else: 

585 # Use a regular (non-savepoint) transaction always for the 

586 # outermost context. 

587 trans = connection.begin() 

588 self._lockTables(connection, lock) 

589 try: 

590 yield not already_in_transaction, connection 

591 if trans is not None: 

592 trans.commit() 

593 except BaseException: 

594 if trans is not None: 

595 trans.rollback() 

596 raise 

597 

598 @abstractmethod 

599 def _lockTables( 

600 self, connection: sqlalchemy.engine.Connection, tables: Iterable[sqlalchemy.schema.Table] = () 

601 ) -> None: 

602 """Acquire locks on the given tables. 

603 

604 This is an implementation hook for subclasses, called by `transaction`. 

605 It should not be called directly by other code. 

606 

607 Parameters 

608 ---------- 

609 connection : `sqlalchemy.engine.Connection` 

610 Database connection object. It is guaranteed that transaction is 

611 already in a progress for this connection. 

612 tables : `~collections.abc.Iterable` [ `sqlalchemy.schema.Table` ], \ 

613 optional 

614 A list of tables to lock for the duration of this transaction. 

615 These locks are guaranteed to prevent concurrent writes and allow 

616 this transaction (only) to acquire the same locks (others should 

617 block), but only prevent concurrent reads if the database engine 

618 requires that in order to block concurrent writes. 

619 """ 

620 raise NotImplementedError() 

621 

622 def isTableWriteable(self, table: sqlalchemy.schema.Table) -> bool: 

623 """Check whether a table is writeable, either because the database 

624 connection is read-write or the table is a temporary table. 

625 

626 Parameters 

627 ---------- 

628 table : `sqlalchemy.schema.Table` 

629 SQLAlchemy table object to check. 

630 

631 Returns 

632 ------- 

633 writeable : `bool` 

634 Whether this table is writeable. 

635 """ 

636 return self.isWriteable() or table.key in self._temp_tables 

637 

638 def assertTableWriteable(self, table: sqlalchemy.schema.Table, msg: str) -> None: 

639 """Raise if the given table is not writeable, either because the 

640 database connection is read-write or the table is a temporary table. 

641 

642 Parameters 

643 ---------- 

644 table : `sqlalchemy.schema.Table` 

645 SQLAlchemy table object to check. 

646 msg : `str`, optional 

647 If provided, raise `ReadOnlyDatabaseError` instead of returning 

648 `False`, with this message. 

649 """ 

650 if not self.isTableWriteable(table): 

651 raise ReadOnlyDatabaseError(msg) 

652 

653 @contextmanager 

654 def declareStaticTables(self, *, create: bool) -> Iterator[StaticTablesContext]: 

655 """Return a context manager in which the database's static DDL schema 

656 can be declared. 

657 

658 Parameters 

659 ---------- 

660 create : `bool` 

661 If `True`, attempt to create all tables at the end of the context. 

662 If `False`, they will be assumed to already exist. 

663 

664 Returns 

665 ------- 

666 schema : `StaticTablesContext` 

667 A helper object that is used to add new tables. 

668 

669 Raises 

670 ------ 

671 ReadOnlyDatabaseError 

672 Raised if ``create`` is `True`, `Database.isWriteable` is `False`, 

673 and one or more declared tables do not already exist. 

674 

675 Examples 

676 -------- 

677 Given a `Database` instance ``db``:: 

678 

679 with db.declareStaticTables(create=True) as schema: 

680 schema.addTable("table1", TableSpec(...)) 

681 schema.addTable("table2", TableSpec(...)) 

682 

683 Notes 

684 ----- 

685 A database's static DDL schema must be declared before any dynamic 

686 tables are managed via calls to `ensureTableExists` or 

687 `getExistingTable`. The order in which static schema tables are added 

688 inside the context block is unimportant; they will automatically be 

689 sorted and added in an order consistent with their foreign key 

690 relationships. 

691 """ 

692 if create and not self.isWriteable(): 

693 raise ReadOnlyDatabaseError(f"Cannot create tables in read-only database {self}.") 

694 self._metadata = sqlalchemy.MetaData(schema=self.namespace) 

695 try: 

696 with self._transaction() as (_, connection): 

697 context = StaticTablesContext(self, connection) 

698 if create and context._tableNames: 

699 # Looks like database is already initalized, to avoid 

700 # danger of modifying/destroying valid schema we refuse to 

701 # do anything in this case 

702 raise SchemaAlreadyDefinedError(f"Cannot create tables in non-empty database {self}.") 

703 yield context 

704 for table, foreignKey in context._foreignKeys: 

705 table.append_constraint(foreignKey) 

706 if create: 

707 if ( 

708 self.namespace is not None 

709 and self.namespace not in context._inspector.get_schema_names() 

710 ): 

711 connection.execute(sqlalchemy.schema.CreateSchema(self.namespace)) 

712 # In our tables we have columns that make use of sqlalchemy 

713 # Sequence objects. There is currently a bug in sqlalchemy 

714 # that causes a deprecation warning to be thrown on a 

715 # property of the Sequence object when the repr for the 

716 # sequence is created. Here a filter is used to catch these 

717 # deprecation warnings when tables are created. 

718 with warnings.catch_warnings(): 

719 warnings.simplefilter("ignore", category=sqlalchemy.exc.SADeprecationWarning) 

720 self._metadata.create_all(connection) 

721 # call all initializer methods sequentially 

722 for init in context._initializers: 

723 init(self) 

724 except BaseException: 

725 self._metadata = None 

726 raise 

727 

728 @abstractmethod 

729 def isWriteable(self) -> bool: 

730 """Return `True` if this database can be modified by this client.""" 

731 raise NotImplementedError() 

732 

733 @abstractmethod 

734 def __str__(self) -> str: 

735 """Return a human-readable identifier for this `Database`, including 

736 any namespace or schema that identifies its names within a `Registry`. 

737 """ 

738 raise NotImplementedError() 

739 

740 @property 

741 def dialect(self) -> sqlalchemy.engine.Dialect: 

742 """The SQLAlchemy dialect for this database engine 

743 (`sqlalchemy.engine.Dialect`). 

744 """ 

745 return self._engine.dialect 

746 

747 def shrinkDatabaseEntityName(self, original: str) -> str: 

748 """Return a version of the given name that fits within this database 

749 engine's length limits for table, constraint, indexes, and sequence 

750 names. 

751 

752 Implementations should not assume that simple truncation is safe, 

753 because multiple long names often begin with the same prefix. 

754 

755 The default implementation simply returns the given name. 

756 

757 Parameters 

758 ---------- 

759 original : `str` 

760 The original name. 

761 

762 Returns 

763 ------- 

764 shrunk : `str` 

765 The new, possibly shortened name. 

766 """ 

767 return original 

768 

769 def expandDatabaseEntityName(self, shrunk: str) -> str: 

770 """Retrieve the original name for a database entity that was too long 

771 to fit within the database engine's limits. 

772 

773 Parameters 

774 ---------- 

775 original : `str` 

776 The original name. 

777 

778 Returns 

779 ------- 

780 shrunk : `str` 

781 The new, possibly shortened name. 

782 """ 

783 return shrunk 

784 

785 def _mangleTableName(self, name: str) -> str: 

786 """Map a logical, user-visible table name to the true table name used 

787 in the database. 

788 

789 The default implementation returns the given name unchanged. 

790 

791 Parameters 

792 ---------- 

793 name : `str` 

794 Input table name. Should not include a namespace (i.e. schema) 

795 prefix. 

796 

797 Returns 

798 ------- 

799 mangled : `str` 

800 Mangled version of the table name (still with no namespace prefix). 

801 

802 Notes 

803 ----- 

804 Reimplementations of this method must be idempotent - mangling an 

805 already-mangled name must have no effect. 

806 """ 

807 return name 

808 

809 def _makeColumnConstraints(self, table: str, spec: ddl.FieldSpec) -> list[sqlalchemy.CheckConstraint]: 

810 """Create constraints based on this spec. 

811 

812 Parameters 

813 ---------- 

814 table : `str` 

815 Name of the table this column is being added to. 

816 spec : `FieldSpec` 

817 Specification for the field to be added. 

818 

819 Returns 

820 ------- 

821 constraint : `list` of `sqlalchemy.CheckConstraint` 

822 Constraint added for this column. 

823 """ 

824 # By default we return no additional constraints 

825 return [] 

826 

827 def _convertFieldSpec( 

828 self, table: str, spec: ddl.FieldSpec, metadata: sqlalchemy.MetaData, **kwargs: Any 

829 ) -> sqlalchemy.schema.Column: 

830 """Convert a `FieldSpec` to a `sqlalchemy.schema.Column`. 

831 

832 Parameters 

833 ---------- 

834 table : `str` 

835 Name of the table this column is being added to. 

836 spec : `FieldSpec` 

837 Specification for the field to be added. 

838 metadata : `sqlalchemy.MetaData` 

839 SQLAlchemy representation of the DDL schema this field's table is 

840 being added to. 

841 **kwargs 

842 Additional keyword arguments to forward to the 

843 `sqlalchemy.schema.Column` constructor. This is provided to make 

844 it easier for derived classes to delegate to ``super()`` while 

845 making only minor changes. 

846 

847 Returns 

848 ------- 

849 column : `sqlalchemy.schema.Column` 

850 SQLAlchemy representation of the field. 

851 """ 

852 args = [] 

853 if spec.autoincrement: 

854 # Generate a sequence to use for auto incrementing for databases 

855 # that do not support it natively. This will be ignored by 

856 # sqlalchemy for databases that do support it. 

857 args.append( 

858 sqlalchemy.Sequence( 

859 self.shrinkDatabaseEntityName(f"{table}_seq_{spec.name}"), metadata=metadata 

860 ) 

861 ) 

862 assert spec.doc is None or isinstance(spec.doc, str), f"Bad doc for {table}.{spec.name}." 

863 return sqlalchemy.schema.Column( 

864 spec.name, 

865 spec.getSizedColumnType(), 

866 *args, 

867 nullable=spec.nullable, 

868 primary_key=spec.primaryKey, 

869 comment=spec.doc, 

870 server_default=spec.default, 

871 **kwargs, 

872 ) 

873 

874 def _convertForeignKeySpec( 

875 self, table: str, spec: ddl.ForeignKeySpec, metadata: sqlalchemy.MetaData, **kwargs: Any 

876 ) -> sqlalchemy.schema.ForeignKeyConstraint: 

877 """Convert a `ForeignKeySpec` to a 

878 `sqlalchemy.schema.ForeignKeyConstraint`. 

879 

880 Parameters 

881 ---------- 

882 table : `str` 

883 Name of the table this foreign key is being added to. 

884 spec : `ForeignKeySpec` 

885 Specification for the foreign key to be added. 

886 metadata : `sqlalchemy.MetaData` 

887 SQLAlchemy representation of the DDL schema this constraint is 

888 being added to. 

889 **kwargs 

890 Additional keyword arguments to forward to the 

891 `sqlalchemy.schema.ForeignKeyConstraint` constructor. This is 

892 provided to make it easier for derived classes to delegate to 

893 ``super()`` while making only minor changes. 

894 

895 Returns 

896 ------- 

897 constraint : `sqlalchemy.schema.ForeignKeyConstraint` 

898 SQLAlchemy representation of the constraint. 

899 """ 

900 name = self.shrinkDatabaseEntityName( 

901 "_".join( 

902 ["fkey", table, self._mangleTableName(spec.table)] + list(spec.target) + list(spec.source) 

903 ) 

904 ) 

905 return sqlalchemy.schema.ForeignKeyConstraint( 

906 spec.source, 

907 [f"{self._mangleTableName(spec.table)}.{col}" for col in spec.target], 

908 name=name, 

909 ondelete=spec.onDelete, 

910 ) 

911 

912 def _convertExclusionConstraintSpec( 

913 self, 

914 table: str, 

915 spec: tuple[str | type[TimespanDatabaseRepresentation], ...], 

916 metadata: sqlalchemy.MetaData, 

917 ) -> sqlalchemy.schema.Constraint: 

918 """Convert a `tuple` from `ddl.TableSpec.exclusion` into a SQLAlchemy 

919 constraint representation. 

920 

921 Parameters 

922 ---------- 

923 table : `str` 

924 Name of the table this constraint is being added to. 

925 spec : `tuple` [ `str` or `type` ] 

926 A tuple of `str` column names and the `type` object returned by 

927 `getTimespanRepresentation` (which must appear exactly once), 

928 indicating the order of the columns in the index used to back the 

929 constraint. 

930 metadata : `sqlalchemy.MetaData` 

931 SQLAlchemy representation of the DDL schema this constraint is 

932 being added to. 

933 

934 Returns 

935 ------- 

936 constraint : `sqlalchemy.schema.Constraint` 

937 SQLAlchemy representation of the constraint. 

938 

939 Raises 

940 ------ 

941 NotImplementedError 

942 Raised if this database does not support exclusion constraints. 

943 """ 

944 raise NotImplementedError(f"Database {self} does not support exclusion constraints.") 

945 

946 def _convertTableSpec( 

947 self, name: str, spec: ddl.TableSpec, metadata: sqlalchemy.MetaData, **kwargs: Any 

948 ) -> sqlalchemy.schema.Table: 

949 """Convert a `TableSpec` to a `sqlalchemy.schema.Table`. 

950 

951 Parameters 

952 ---------- 

953 spec : `TableSpec` 

954 Specification for the foreign key to be added. 

955 metadata : `sqlalchemy.MetaData` 

956 SQLAlchemy representation of the DDL schema this table is being 

957 added to. 

958 **kwargs 

959 Additional keyword arguments to forward to the 

960 `sqlalchemy.schema.Table` constructor. This is provided to make it 

961 easier for derived classes to delegate to ``super()`` while making 

962 only minor changes. 

963 

964 Returns 

965 ------- 

966 table : `sqlalchemy.schema.Table` 

967 SQLAlchemy representation of the table. 

968 

969 Notes 

970 ----- 

971 This method does not handle ``spec.foreignKeys`` at all, in order to 

972 avoid circular dependencies. These are added by higher-level logic in 

973 `ensureTableExists`, `getExistingTable`, and `declareStaticTables`. 

974 """ 

975 name = self._mangleTableName(name) 

976 args: list[sqlalchemy.schema.SchemaItem] = [ 

977 self._convertFieldSpec(name, fieldSpec, metadata) for fieldSpec in spec.fields 

978 ] 

979 

980 # Add any column constraints 

981 for fieldSpec in spec.fields: 

982 args.extend(self._makeColumnConstraints(name, fieldSpec)) 

983 

984 # Track indexes added for primary key and unique constraints, to make 

985 # sure we don't add duplicate explicit or foreign key indexes for 

986 # those. 

987 allIndexes = {tuple(fieldSpec.name for fieldSpec in spec.fields if fieldSpec.primaryKey)} 

988 args.extend( 

989 sqlalchemy.schema.UniqueConstraint( 

990 *columns, name=self.shrinkDatabaseEntityName("_".join([name, "unq"] + list(columns))) 

991 ) 

992 for columns in spec.unique 

993 ) 

994 allIndexes.update(spec.unique) 

995 args.extend( 

996 sqlalchemy.schema.Index( 

997 self.shrinkDatabaseEntityName("_".join([name, "idx"] + list(index.columns))), 

998 *index.columns, 

999 unique=(index.columns in spec.unique), 

1000 **index.kwargs, 

1001 ) 

1002 for index in spec.indexes 

1003 if index.columns not in allIndexes 

1004 ) 

1005 allIndexes.update(index.columns for index in spec.indexes) 

1006 args.extend( 

1007 sqlalchemy.schema.Index( 

1008 self.shrinkDatabaseEntityName("_".join((name, "fkidx") + fk.source)), 

1009 *fk.source, 

1010 ) 

1011 for fk in spec.foreignKeys 

1012 if fk.addIndex and fk.source not in allIndexes 

1013 ) 

1014 

1015 args.extend(self._convertExclusionConstraintSpec(name, excl, metadata) for excl in spec.exclusion) 

1016 

1017 assert spec.doc is None or isinstance(spec.doc, str), f"Bad doc for {name}." 

1018 return sqlalchemy.schema.Table(name, metadata, *args, comment=spec.doc, info={"spec": spec}, **kwargs) 

1019 

1020 def ensureTableExists(self, name: str, spec: ddl.TableSpec) -> sqlalchemy.schema.Table: 

1021 """Ensure that a table with the given name and specification exists, 

1022 creating it if necessary. 

1023 

1024 Parameters 

1025 ---------- 

1026 name : `str` 

1027 Name of the table (not including namespace qualifiers). 

1028 spec : `TableSpec` 

1029 Specification for the table. This will be used when creating the 

1030 table, and *may* be used when obtaining an existing table to check 

1031 for consistency, but no such check is guaranteed. 

1032 

1033 Returns 

1034 ------- 

1035 table : `sqlalchemy.schema.Table` 

1036 SQLAlchemy representation of the table. 

1037 

1038 Raises 

1039 ------ 

1040 ReadOnlyDatabaseError 

1041 Raised if `isWriteable` returns `False`, and the table does not 

1042 already exist. 

1043 DatabaseConflictError 

1044 Raised if the table exists but ``spec`` is inconsistent with its 

1045 definition. 

1046 

1047 Notes 

1048 ----- 

1049 This method may not be called within transactions. It may be called on 

1050 read-only databases if and only if the table does in fact already 

1051 exist. 

1052 

1053 Subclasses may override this method, but usually should not need to. 

1054 """ 

1055 # TODO: if _engine is used to make a table then it uses separate 

1056 # connection and should not interfere with current transaction 

1057 assert ( 

1058 self._session_connection is None or not self._session_connection.in_transaction() 

1059 ), "Table creation interrupts transactions." 

1060 assert self._metadata is not None, "Static tables must be declared before dynamic tables." 

1061 table = self.getExistingTable(name, spec) 

1062 if table is not None: 

1063 return table 

1064 if not self.isWriteable(): 

1065 raise ReadOnlyDatabaseError( 

1066 f"Table {name} does not exist, and cannot be created because database {self} is read-only." 

1067 ) 

1068 table = self._convertTableSpec(name, spec, self._metadata) 

1069 for foreignKeySpec in spec.foreignKeys: 

1070 table.append_constraint(self._convertForeignKeySpec(name, foreignKeySpec, self._metadata)) 

1071 try: 

1072 with self._transaction() as (_, connection): 

1073 table.create(connection) 

1074 except sqlalchemy.exc.DatabaseError: 

1075 # Some other process could have created the table meanwhile, which 

1076 # usually causes OperationalError or ProgrammingError. We cannot 

1077 # use IF NOT EXISTS clause in this case due to PostgreSQL race 

1078 # condition on server side which causes IntegrityError. Instead we 

1079 # catch these exceptions (they all inherit DatabaseError) and 

1080 # re-check whether table is now there. 

1081 table = self.getExistingTable(name, spec) 

1082 if table is None: 

1083 raise 

1084 return table 

1085 

1086 def getExistingTable(self, name: str, spec: ddl.TableSpec) -> sqlalchemy.schema.Table | None: 

1087 """Obtain an existing table with the given name and specification. 

1088 

1089 Parameters 

1090 ---------- 

1091 name : `str` 

1092 Name of the table (not including namespace qualifiers). 

1093 spec : `TableSpec` 

1094 Specification for the table. This will be used when creating the 

1095 SQLAlchemy representation of the table, and it is used to 

1096 check that the actual table in the database is consistent. 

1097 

1098 Returns 

1099 ------- 

1100 table : `sqlalchemy.schema.Table` or `None` 

1101 SQLAlchemy representation of the table, or `None` if it does not 

1102 exist. 

1103 

1104 Raises 

1105 ------ 

1106 DatabaseConflictError 

1107 Raised if the table exists but ``spec`` is inconsistent with its 

1108 definition. 

1109 

1110 Notes 

1111 ----- 

1112 This method can be called within transactions and never modifies the 

1113 database. 

1114 

1115 Subclasses may override this method, but usually should not need to. 

1116 """ 

1117 assert self._metadata is not None, "Static tables must be declared before dynamic tables." 

1118 name = self._mangleTableName(name) 

1119 table = self._metadata.tables.get(name if self.namespace is None else f"{self.namespace}.{name}") 

1120 if table is not None: 

1121 if spec.fields.names != set(table.columns.keys()): 

1122 raise DatabaseConflictError( 

1123 f"Table '{name}' has already been defined differently; the new " 

1124 f"specification has columns {list(spec.fields.names)}, while " 

1125 f"the previous definition has {list(table.columns.keys())}." 

1126 ) 

1127 else: 

1128 inspector = sqlalchemy.inspect( 

1129 self._engine if self._session_connection is None else self._session_connection, raiseerr=True 

1130 ) 

1131 if name in inspector.get_table_names(schema=self.namespace): 

1132 _checkExistingTableDefinition(name, spec, inspector.get_columns(name, schema=self.namespace)) 

1133 table = self._convertTableSpec(name, spec, self._metadata) 

1134 for foreignKeySpec in spec.foreignKeys: 

1135 table.append_constraint(self._convertForeignKeySpec(name, foreignKeySpec, self._metadata)) 

1136 return table 

1137 return table 

1138 

1139 def _make_temporary_table( 

1140 self, 

1141 connection: sqlalchemy.engine.Connection, 

1142 spec: ddl.TableSpec, 

1143 name: str | None = None, 

1144 **kwargs: Any, 

1145 ) -> sqlalchemy.schema.Table: 

1146 """Create a temporary table. 

1147 

1148 Parameters 

1149 ---------- 

1150 connection : `sqlalchemy.engine.Connection` 

1151 Connection to use when creating the table. 

1152 spec : `TableSpec` 

1153 Specification for the table. 

1154 name : `str`, optional 

1155 A unique (within this session/connetion) name for the table. 

1156 Subclasses may override to modify the actual name used. If not 

1157 provided, a unique name will be generated. 

1158 **kwargs 

1159 Additional keyword arguments to forward to the 

1160 `sqlalchemy.schema.Table` constructor. This is provided to make it 

1161 easier for derived classes to delegate to ``super()`` while making 

1162 only minor changes. 

1163 

1164 Returns 

1165 ------- 

1166 table : `sqlalchemy.schema.Table` 

1167 SQLAlchemy representation of the table. 

1168 """ 

1169 if name is None: 

1170 name = f"tmp_{uuid.uuid4().hex}" 

1171 metadata = self._metadata 

1172 if metadata is None: 

1173 raise RuntimeError("Cannot create temporary table before static schema is defined.") 

1174 table = self._convertTableSpec( 

1175 name, spec, metadata, prefixes=["TEMPORARY"], schema=sqlalchemy.schema.BLANK_SCHEMA, **kwargs 

1176 ) 

1177 if table.key in self._temp_tables and table.key != name: 

1178 raise ValueError( 

1179 f"A temporary table with name {name} (transformed to {table.key} by " 

1180 "Database) already exists." 

1181 ) 

1182 for foreignKeySpec in spec.foreignKeys: 

1183 table.append_constraint(self._convertForeignKeySpec(name, foreignKeySpec, metadata)) 

1184 with self._transaction(): 

1185 table.create(connection) 

1186 return table 

1187 

1188 @classmethod 

1189 def getTimespanRepresentation(cls) -> type[TimespanDatabaseRepresentation]: 

1190 """Return a `type` that encapsulates the way `Timespan` objects are 

1191 stored in this database. 

1192 

1193 `Database` does not automatically use the return type of this method 

1194 anywhere else; calling code is responsible for making sure that DDL 

1195 and queries are consistent with it. 

1196 

1197 Returns 

1198 ------- 

1199 TimespanReprClass : `type` (`TimespanDatabaseRepresention` subclass) 

1200 A type that encapsulates the way `Timespan` objects should be 

1201 stored in this database. 

1202 

1203 Notes 

1204 ----- 

1205 There are two big reasons we've decided to keep timespan-mangling logic 

1206 outside the `Database` implementations, even though the choice of 

1207 representation is ultimately up to a `Database` implementation: 

1208 

1209 - Timespans appear in relatively few tables and queries in our 

1210 typical usage, and the code that operates on them is already aware 

1211 that it is working with timespans. In contrast, a 

1212 timespan-representation-aware implementation of, say, `insert`, 

1213 would need to have extra logic to identify when timespan-mangling 

1214 needed to occur, which would usually be useless overhead. 

1215 

1216 - SQLAlchemy's rich SELECT query expression system has no way to wrap 

1217 multiple columns in a single expression object (the ORM does, but 

1218 we are not using the ORM). So we would have to wrap _much_ more of 

1219 that code in our own interfaces to encapsulate timespan 

1220 representations there. 

1221 """ 

1222 return TimespanDatabaseRepresentation.Compound 

1223 

1224 def sync( 

1225 self, 

1226 table: sqlalchemy.schema.Table, 

1227 *, 

1228 keys: dict[str, Any], 

1229 compared: dict[str, Any] | None = None, 

1230 extra: dict[str, Any] | None = None, 

1231 returning: Sequence[str] | None = None, 

1232 update: bool = False, 

1233 ) -> tuple[dict[str, Any] | None, bool | dict[str, Any]]: 

1234 """Insert into a table as necessary to ensure database contains 

1235 values equivalent to the given ones. 

1236 

1237 Parameters 

1238 ---------- 

1239 table : `sqlalchemy.schema.Table` 

1240 Table to be queried and possibly inserted into. 

1241 keys : `dict` 

1242 Column name-value pairs used to search for an existing row; must 

1243 be a combination that can be used to select a single row if one 

1244 exists. If such a row does not exist, these values are used in 

1245 the insert. 

1246 compared : `dict`, optional 

1247 Column name-value pairs that are compared to those in any existing 

1248 row. If such a row does not exist, these rows are used in the 

1249 insert. 

1250 extra : `dict`, optional 

1251 Column name-value pairs that are ignored if a matching row exists, 

1252 but used in an insert if one is necessary. 

1253 returning : `~collections.abc.Sequence` of `str`, optional 

1254 The names of columns whose values should be returned. 

1255 update : `bool`, optional 

1256 If `True` (`False` is default), update the existing row with the 

1257 values in ``compared`` instead of raising `DatabaseConflictError`. 

1258 

1259 Returns 

1260 ------- 

1261 row : `dict`, optional 

1262 The value of the fields indicated by ``returning``, or `None` if 

1263 ``returning`` is `None`. 

1264 inserted_or_updated : `bool` or `dict` 

1265 If `True`, a new row was inserted; if `False`, a matching row 

1266 already existed. If a `dict` (only possible if ``update=True``), 

1267 then an existing row was updated, and the dict maps the names of 

1268 the updated columns to their *old* values (new values can be 

1269 obtained from ``compared``). 

1270 

1271 Raises 

1272 ------ 

1273 DatabaseConflictError 

1274 Raised if the values in ``compared`` do not match the values in the 

1275 database. 

1276 ReadOnlyDatabaseError 

1277 Raised if `isWriteable` returns `False`, and no matching record 

1278 already exists. 

1279 

1280 Notes 

1281 ----- 

1282 May be used inside transaction contexts, so implementations may not 

1283 perform operations that interrupt transactions. 

1284 

1285 It may be called on read-only databases if and only if the matching row 

1286 does in fact already exist. 

1287 """ 

1288 

1289 def check() -> tuple[int, dict[str, Any] | None, list | None]: 

1290 """Query for a row that matches the ``key`` argument, and compare 

1291 to what was given by the caller. 

1292 

1293 Returns 

1294 ------- 

1295 n : `int` 

1296 Number of matching rows. ``n != 1`` is always an error, but 

1297 it's a different kind of error depending on where `check` is 

1298 being called. 

1299 bad : `dict` or `None` 

1300 The subset of the keys of ``compared`` for which the existing 

1301 values did not match the given one, mapped to the existing 

1302 values in the database. Once again, ``not bad`` is always an 

1303 error, but a different kind on context. `None` if ``n != 1`` 

1304 result : `list` or `None` 

1305 Results in the database that correspond to the columns given 

1306 in ``returning``, or `None` if ``returning is None``. 

1307 """ 

1308 toSelect: set[str] = set() 

1309 if compared is not None: 

1310 toSelect.update(compared.keys()) 

1311 if returning is not None: 

1312 toSelect.update(returning) 

1313 if not toSelect: 

1314 # Need to select some column, even if we just want to see 

1315 # how many rows we get back. 

1316 toSelect.add(next(iter(keys.keys()))) 

1317 selectSql = ( 

1318 sqlalchemy.sql.select(*[table.columns[k].label(k) for k in toSelect]) 

1319 .select_from(table) 

1320 .where(sqlalchemy.sql.and_(*[table.columns[k] == v for k, v in keys.items()])) 

1321 ) 

1322 with self._transaction() as (_, connection): 

1323 fetched = list(connection.execute(selectSql).mappings()) 

1324 if len(fetched) != 1: 

1325 return len(fetched), None, None 

1326 existing = fetched[0] 

1327 if compared is not None: 

1328 

1329 def safeNotEqual(a: Any, b: Any) -> bool: 

1330 if isinstance(a, astropy.time.Time): 

1331 return not time_utils.TimeConverter().times_equal(a, b) 

1332 return a != b 

1333 

1334 inconsistencies = { 

1335 k: existing[k] for k, v in compared.items() if safeNotEqual(existing[k], v) 

1336 } 

1337 else: 

1338 inconsistencies = {} 

1339 if returning is not None: 

1340 toReturn: list | None = [existing[k] for k in returning] 

1341 else: 

1342 toReturn = None 

1343 return 1, inconsistencies, toReturn 

1344 

1345 def format_bad(inconsistencies: dict[str, Any]) -> str: 

1346 """Format the 'bad' dictionary of existing values returned by 

1347 ``check`` into a string suitable for an error message. 

1348 """ 

1349 assert compared is not None, "Should not be able to get inconsistencies without comparing." 

1350 return ", ".join(f"{k}: {v!r} != {compared[k]!r}" for k, v in inconsistencies.items()) 

1351 

1352 if self.isTableWriteable(table): 

1353 # Try an insert first, but allow it to fail (in only specific 

1354 # ways). 

1355 row = keys.copy() 

1356 if compared is not None: 

1357 row.update(compared) 

1358 if extra is not None: 

1359 row.update(extra) 

1360 with self.transaction(): 

1361 inserted = bool(self.ensure(table, row)) 

1362 inserted_or_updated: bool | dict[str, Any] 

1363 # Need to perform check() for this branch inside the 

1364 # transaction, so we roll back an insert that didn't do 

1365 # what we expected. That limits the extent to which we 

1366 # can reduce duplication between this block and the other 

1367 # ones that perform similar logic. 

1368 n, bad, result = check() 

1369 if n < 1: 

1370 raise ConflictingDefinitionError( 

1371 f"Attempted to ensure {row} exists by inserting it with ON CONFLICT IGNORE, " 

1372 f"but a post-insert query on {keys} returned no results. " 

1373 f"Insert was {'' if inserted else 'not '}reported as successful. " 

1374 "This can occur if the insert violated a database constraint other than the " 

1375 "unique constraint or primary key used to identify the row in this call." 

1376 ) 

1377 elif n > 1: 

1378 raise RuntimeError( 

1379 f"Keys passed to sync {keys.keys()} do not comprise a " 

1380 f"unique constraint for table {table.name}." 

1381 ) 

1382 elif bad: 

1383 assert ( 

1384 compared is not None 

1385 ), "Should not be able to get inconsistencies without comparing." 

1386 if inserted: 

1387 raise RuntimeError( 

1388 f"Conflict ({bad}) in sync after successful insert; this is " 

1389 "possible if the same table is being updated by a concurrent " 

1390 "process that isn't using sync, but it may also be a bug in " 

1391 "daf_butler." 

1392 ) 

1393 elif update: 

1394 with self._transaction() as (_, connection): 

1395 connection.execute( 

1396 table.update() 

1397 .where(sqlalchemy.sql.and_(*[table.columns[k] == v for k, v in keys.items()])) 

1398 .values(**{k: compared[k] for k in bad}) 

1399 ) 

1400 inserted_or_updated = bad 

1401 else: 

1402 raise DatabaseConflictError( 

1403 f"Conflict in sync for table {table.name} on column(s) {format_bad(bad)}." 

1404 ) 

1405 else: 

1406 inserted_or_updated = inserted 

1407 else: 

1408 # Database is not writeable; just see if the row exists. 

1409 n, bad, result = check() 

1410 if n < 1: 

1411 raise ReadOnlyDatabaseError("sync needs to insert, but database is read-only.") 

1412 elif n > 1: 

1413 raise RuntimeError("Keys passed to sync do not comprise a unique constraint.") 

1414 elif bad: 

1415 if update: 

1416 raise ReadOnlyDatabaseError("sync needs to update, but database is read-only.") 

1417 else: 

1418 raise DatabaseConflictError( 

1419 f"Conflict in sync for table {table.name} on column(s) {format_bad(bad)}." 

1420 ) 

1421 inserted_or_updated = False 

1422 if returning is None: 

1423 return None, inserted_or_updated 

1424 else: 

1425 assert result is not None 

1426 return dict(zip(returning, result, strict=True)), inserted_or_updated 

1427 

1428 def insert( 

1429 self, 

1430 table: sqlalchemy.schema.Table, 

1431 *rows: dict, 

1432 returnIds: bool = False, 

1433 select: sqlalchemy.sql.expression.SelectBase | None = None, 

1434 names: Iterable[str] | None = None, 

1435 ) -> list[int] | None: 

1436 """Insert one or more rows into a table, optionally returning 

1437 autoincrement primary key values. 

1438 

1439 Parameters 

1440 ---------- 

1441 table : `sqlalchemy.schema.Table` 

1442 Table rows should be inserted into. 

1443 returnIds: `bool` 

1444 If `True` (`False` is default), return the values of the table's 

1445 autoincrement primary key field (which much exist). 

1446 select : `sqlalchemy.sql.SelectBase`, optional 

1447 A SELECT query expression to insert rows from. Cannot be provided 

1448 with either ``rows`` or ``returnIds=True``. 

1449 names : `~collections.abc.Iterable` [ `str` ], optional 

1450 Names of columns in ``table`` to be populated, ordered to match the 

1451 columns returned by ``select``. Ignored if ``select`` is `None`. 

1452 If not provided, the columns returned by ``select`` must be named 

1453 to match the desired columns of ``table``. 

1454 *rows 

1455 Positional arguments are the rows to be inserted, as dictionaries 

1456 mapping column name to value. The keys in all dictionaries must 

1457 be the same. 

1458 

1459 Returns 

1460 ------- 

1461 ids : `None`, or `list` of `int` 

1462 If ``returnIds`` is `True`, a `list` containing the inserted 

1463 values for the table's autoincrement primary key. 

1464 

1465 Raises 

1466 ------ 

1467 ReadOnlyDatabaseError 

1468 Raised if `isWriteable` returns `False` when this method is called. 

1469 

1470 Notes 

1471 ----- 

1472 The default implementation uses bulk insert syntax when ``returnIds`` 

1473 is `False`, and a loop over single-row insert operations when it is 

1474 `True`. 

1475 

1476 Derived classes should reimplement when they can provide a more 

1477 efficient implementation (especially for the latter case). 

1478 

1479 May be used inside transaction contexts, so implementations may not 

1480 perform operations that interrupt transactions. 

1481 """ 

1482 self.assertTableWriteable(table, f"Cannot insert into read-only table {table}.") 

1483 if select is not None and (rows or returnIds): 

1484 raise TypeError("'select' is incompatible with passing value rows or returnIds=True.") 

1485 if not rows and select is None: 

1486 if returnIds: 

1487 return [] 

1488 else: 

1489 return None 

1490 with self._transaction() as (_, connection): 

1491 if not returnIds: 

1492 if select is not None: 

1493 if names is None: 

1494 # columns() is deprecated since 1.4, but 

1495 # selected_columns() method did not exist in 1.3. 

1496 if hasattr(select, "selected_columns"): 

1497 names = select.selected_columns.keys() 

1498 else: 

1499 names = select.columns.keys() 

1500 connection.execute(table.insert().from_select(list(names), select)) 

1501 else: 

1502 connection.execute(table.insert(), rows) 

1503 return None 

1504 else: 

1505 sql = table.insert() 

1506 return [connection.execute(sql, row).inserted_primary_key[0] for row in rows] 

1507 

1508 @abstractmethod 

1509 def replace(self, table: sqlalchemy.schema.Table, *rows: dict) -> None: 

1510 """Insert one or more rows into a table, replacing any existing rows 

1511 for which insertion of a new row would violate the primary key 

1512 constraint. 

1513 

1514 Parameters 

1515 ---------- 

1516 table : `sqlalchemy.schema.Table` 

1517 Table rows should be inserted into. 

1518 *rows 

1519 Positional arguments are the rows to be inserted, as dictionaries 

1520 mapping column name to value. The keys in all dictionaries must 

1521 be the same. 

1522 

1523 Raises 

1524 ------ 

1525 ReadOnlyDatabaseError 

1526 Raised if `isWriteable` returns `False` when this method is called. 

1527 

1528 Notes 

1529 ----- 

1530 May be used inside transaction contexts, so implementations may not 

1531 perform operations that interrupt transactions. 

1532 

1533 Implementations should raise a `sqlalchemy.exc.IntegrityError` 

1534 exception when a constraint other than the primary key would be 

1535 violated. 

1536 

1537 Implementations are not required to support `replace` on tables 

1538 with autoincrement keys. 

1539 """ 

1540 raise NotImplementedError() 

1541 

1542 @abstractmethod 

1543 def ensure(self, table: sqlalchemy.schema.Table, *rows: dict, primary_key_only: bool = False) -> int: 

1544 """Insert one or more rows into a table, skipping any rows for which 

1545 insertion would violate a unique constraint. 

1546 

1547 Parameters 

1548 ---------- 

1549 table : `sqlalchemy.schema.Table` 

1550 Table rows should be inserted into. 

1551 *rows 

1552 Positional arguments are the rows to be inserted, as dictionaries 

1553 mapping column name to value. The keys in all dictionaries must 

1554 be the same. 

1555 primary_key_only : `bool`, optional 

1556 If `True` (`False` is default), only skip rows that violate the 

1557 primary key constraint, and raise an exception (and rollback 

1558 transactions) for other constraint violations. 

1559 

1560 Returns 

1561 ------- 

1562 count : `int` 

1563 The number of rows actually inserted. 

1564 

1565 Raises 

1566 ------ 

1567 ReadOnlyDatabaseError 

1568 Raised if `isWriteable` returns `False` when this method is called. 

1569 This is raised even if the operation would do nothing even on a 

1570 writeable database. 

1571 

1572 Notes 

1573 ----- 

1574 May be used inside transaction contexts, so implementations may not 

1575 perform operations that interrupt transactions. 

1576 

1577 Implementations are not required to support `ensure` on tables 

1578 with autoincrement keys. 

1579 """ 

1580 raise NotImplementedError() 

1581 

1582 def delete(self, table: sqlalchemy.schema.Table, columns: Iterable[str], *rows: dict) -> int: 

1583 """Delete one or more rows from a table. 

1584 

1585 Parameters 

1586 ---------- 

1587 table : `sqlalchemy.schema.Table` 

1588 Table that rows should be deleted from. 

1589 columns: `~collections.abc.Iterable` of `str` 

1590 The names of columns that will be used to constrain the rows to 

1591 be deleted; these will be combined via ``AND`` to form the 

1592 ``WHERE`` clause of the delete query. 

1593 *rows 

1594 Positional arguments are the keys of rows to be deleted, as 

1595 dictionaries mapping column name to value. The keys in all 

1596 dictionaries must be exactly the names in ``columns``. 

1597 

1598 Returns 

1599 ------- 

1600 count : `int` 

1601 Number of rows deleted. 

1602 

1603 Raises 

1604 ------ 

1605 ReadOnlyDatabaseError 

1606 Raised if `isWriteable` returns `False` when this method is called. 

1607 

1608 Notes 

1609 ----- 

1610 May be used inside transaction contexts, so implementations may not 

1611 perform operations that interrupt transactions. 

1612 

1613 The default implementation should be sufficient for most derived 

1614 classes. 

1615 """ 

1616 self.assertTableWriteable(table, f"Cannot delete from read-only table {table}.") 

1617 if columns and not rows: 

1618 # If there are no columns, this operation is supposed to delete 

1619 # everything (so we proceed as usual). But if there are columns, 

1620 # but no rows, it was a constrained bulk operation where the 

1621 # constraint is that no rows match, and we should short-circuit 

1622 # while reporting that no rows were affected. 

1623 return 0 

1624 sql = table.delete() 

1625 columns = list(columns) # Force iterators to list 

1626 

1627 # More efficient to use IN operator if there is only one 

1628 # variable changing across all rows. 

1629 content: dict[str, set] = defaultdict(set) 

1630 if len(columns) == 1: 

1631 # Nothing to calculate since we can always use IN 

1632 column = columns[0] 

1633 changing_columns = [column] 

1634 content[column] = {row[column] for row in rows} 

1635 else: 

1636 for row in rows: 

1637 for k, v in row.items(): 

1638 content[k].add(v) 

1639 changing_columns = [col for col, values in content.items() if len(values) > 1] 

1640 

1641 if len(changing_columns) != 1: 

1642 # More than one column changes each time so do explicit bind 

1643 # parameters and have each row processed separately. 

1644 whereTerms = [table.columns[name] == sqlalchemy.sql.bindparam(name) for name in columns] 

1645 if whereTerms: 

1646 sql = sql.where(sqlalchemy.sql.and_(*whereTerms)) 

1647 with self._transaction() as (_, connection): 

1648 return connection.execute(sql, rows).rowcount 

1649 else: 

1650 # One of the columns has changing values but any others are 

1651 # fixed. In this case we can use an IN operator and be more 

1652 # efficient. 

1653 name = changing_columns.pop() 

1654 

1655 # Simple where clause for the unchanging columns 

1656 clauses = [] 

1657 for k, v in content.items(): 

1658 if k == name: 

1659 continue 

1660 column = table.columns[k] 

1661 # The set only has one element 

1662 clauses.append(column == v.pop()) 

1663 

1664 # The IN operator will not work for "infinite" numbers of 

1665 # rows so must batch it up into distinct calls. 

1666 in_content = list(content[name]) 

1667 n_elements = len(in_content) 

1668 

1669 rowcount = 0 

1670 iposn = 0 

1671 n_per_loop = 1_000 # Controls how many items to put in IN clause 

1672 with self._transaction() as (_, connection): 

1673 for iposn in range(0, n_elements, n_per_loop): 

1674 endpos = iposn + n_per_loop 

1675 in_clause = table.columns[name].in_(in_content[iposn:endpos]) 

1676 

1677 newsql = sql.where(sqlalchemy.sql.and_(*clauses, in_clause)) 

1678 rowcount += connection.execute(newsql).rowcount 

1679 return rowcount 

1680 

1681 def deleteWhere(self, table: sqlalchemy.schema.Table, where: sqlalchemy.sql.ColumnElement) -> int: 

1682 """Delete rows from a table with pre-constructed WHERE clause. 

1683 

1684 Parameters 

1685 ---------- 

1686 table : `sqlalchemy.schema.Table` 

1687 Table that rows should be deleted from. 

1688 where: `sqlalchemy.sql.ClauseElement` 

1689 The names of columns that will be used to constrain the rows to 

1690 be deleted; these will be combined via ``AND`` to form the 

1691 ``WHERE`` clause of the delete query. 

1692 

1693 Returns 

1694 ------- 

1695 count : `int` 

1696 Number of rows deleted. 

1697 

1698 Raises 

1699 ------ 

1700 ReadOnlyDatabaseError 

1701 Raised if `isWriteable` returns `False` when this method is called. 

1702 

1703 Notes 

1704 ----- 

1705 May be used inside transaction contexts, so implementations may not 

1706 perform operations that interrupt transactions. 

1707 

1708 The default implementation should be sufficient for most derived 

1709 classes. 

1710 """ 

1711 self.assertTableWriteable(table, f"Cannot delete from read-only table {table}.") 

1712 

1713 sql = table.delete().where(where) 

1714 with self._transaction() as (_, connection): 

1715 return connection.execute(sql).rowcount 

1716 

1717 def update(self, table: sqlalchemy.schema.Table, where: dict[str, str], *rows: dict) -> int: 

1718 """Update one or more rows in a table. 

1719 

1720 Parameters 

1721 ---------- 

1722 table : `sqlalchemy.schema.Table` 

1723 Table containing the rows to be updated. 

1724 where : `dict` [`str`, `str`] 

1725 A mapping from the names of columns that will be used to search for 

1726 existing rows to the keys that will hold these values in the 

1727 ``rows`` dictionaries. Note that these may not be the same due to 

1728 SQLAlchemy limitations. 

1729 *rows 

1730 Positional arguments are the rows to be updated. The keys in all 

1731 dictionaries must be the same, and may correspond to either a 

1732 value in the ``where`` dictionary or the name of a column to be 

1733 updated. 

1734 

1735 Returns 

1736 ------- 

1737 count : `int` 

1738 Number of rows matched (regardless of whether the update actually 

1739 modified them). 

1740 

1741 Raises 

1742 ------ 

1743 ReadOnlyDatabaseError 

1744 Raised if `isWriteable` returns `False` when this method is called. 

1745 

1746 Notes 

1747 ----- 

1748 May be used inside transaction contexts, so implementations may not 

1749 perform operations that interrupt transactions. 

1750 

1751 The default implementation should be sufficient for most derived 

1752 classes. 

1753 """ 

1754 self.assertTableWriteable(table, f"Cannot update read-only table {table}.") 

1755 if not rows: 

1756 return 0 

1757 sql = table.update().where( 

1758 sqlalchemy.sql.and_(*[table.columns[k] == sqlalchemy.sql.bindparam(v) for k, v in where.items()]) 

1759 ) 

1760 with self._transaction() as (_, connection): 

1761 return connection.execute(sql, rows).rowcount 

1762 

1763 @contextmanager 

1764 def query( 

1765 self, 

1766 sql: sqlalchemy.sql.expression.Executable | sqlalchemy.sql.expression.SelectBase, 

1767 *args: Any, 

1768 **kwargs: Any, 

1769 ) -> Iterator[sqlalchemy.engine.CursorResult]: 

1770 """Run a SELECT query against the database. 

1771 

1772 Parameters 

1773 ---------- 

1774 sql : `sqlalchemy.sql.expression.SelectBase` 

1775 A SQLAlchemy representation of a ``SELECT`` query. 

1776 *args 

1777 Additional positional arguments are forwarded to 

1778 `sqlalchemy.engine.Connection.execute`. 

1779 **kwargs 

1780 Additional keyword arguments are forwarded to 

1781 `sqlalchemy.engine.Connection.execute`. 

1782 

1783 Returns 

1784 ------- 

1785 result_context : `sqlalchemy.engine.CursorResults` 

1786 Context manager that returns the query result object when entered. 

1787 These results are invalidated when the context is exited. 

1788 """ 

1789 if self._session_connection is None: 

1790 connection = self._engine.connect() 

1791 else: 

1792 connection = self._session_connection 

1793 # TODO: SelectBase is not good for execute(), but it used everywhere, 

1794 # e.g. in daf_relation. We should switch to Executable at some point. 

1795 result = connection.execute(cast(sqlalchemy.sql.expression.Executable, sql), *args, **kwargs) 

1796 try: 

1797 yield result 

1798 finally: 

1799 if connection is not self._session_connection: 

1800 connection.close() 

1801 

1802 @abstractmethod 

1803 def constant_rows( 

1804 self, 

1805 fields: NamedValueAbstractSet[ddl.FieldSpec], 

1806 *rows: dict, 

1807 name: str | None = None, 

1808 ) -> sqlalchemy.sql.FromClause: 

1809 """Return a SQLAlchemy object that represents a small number of 

1810 constant-valued rows. 

1811 

1812 Parameters 

1813 ---------- 

1814 fields : `NamedValueAbstractSet` [ `ddl.FieldSpec` ] 

1815 The columns of the rows. Unique and foreign key constraints are 

1816 ignored. 

1817 *rows : `dict` 

1818 Values for the rows. 

1819 name : `str`, optional 

1820 If provided, the name of the SQL construct. If not provided, an 

1821 opaque but unique identifier is generated. 

1822 

1823 Returns 

1824 ------- 

1825 from_clause : `sqlalchemy.sql.FromClause` 

1826 SQLAlchemy object representing the given rows. This is guaranteed 

1827 to be something that can be directly joined into a ``SELECT`` 

1828 query's ``FROM`` clause, and will not involve a temporary table 

1829 that needs to be cleaned up later. 

1830 

1831 Notes 

1832 ----- 

1833 The default implementation uses the SQL-standard ``VALUES`` construct, 

1834 but support for that construct is varied enough across popular RDBMSs 

1835 that the method is still marked abstract to force explicit opt-in via 

1836 delegation to `super`. 

1837 """ 

1838 if name is None: 

1839 name = f"tmp_{uuid.uuid4().hex}" 

1840 return sqlalchemy.sql.values( 

1841 *[sqlalchemy.Column(field.name, field.getSizedColumnType()) for field in fields], 

1842 name=name, 

1843 ).data([tuple(row[name] for name in fields.names) for row in rows]) 

1844 

1845 def get_constant_rows_max(self) -> int: 

1846 """Return the maximum number of rows that should be passed to 

1847 `constant_rows` for this backend. 

1848 

1849 Returns 

1850 ------- 

1851 max : `int` 

1852 Maximum number of rows. 

1853 

1854 Notes 

1855 ----- 

1856 This should reflect typical performance profiles (or a guess at these), 

1857 not just hard database engine limits. 

1858 """ 

1859 return 100 

1860 

1861 origin: int 

1862 """An integer ID that should be used as the default for any datasets, 

1863 quanta, or other entities that use a (autoincrement, origin) compound 

1864 primary key (`int`). 

1865 """ 

1866 

1867 namespace: str | None 

1868 """The schema or namespace this database instance is associated with 

1869 (`str` or `None`). 

1870 """