Coverage for python/lsst/daf/butler/registry/interfaces/_database.py: 14%

413 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-03-23 02:06 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = [ 

24 "Database", 

25 "ReadOnlyDatabaseError", 

26 "DatabaseConflictError", 

27 "SchemaAlreadyDefinedError", 

28 "StaticTablesContext", 

29] 

30 

31import uuid 

32import warnings 

33from abc import ABC, abstractmethod 

34from collections import defaultdict 

35from contextlib import contextmanager 

36from typing import ( 

37 Any, 

38 Callable, 

39 Dict, 

40 Iterable, 

41 Iterator, 

42 List, 

43 Optional, 

44 Sequence, 

45 Set, 

46 Tuple, 

47 Type, 

48 Union, 

49 cast, 

50 final, 

51) 

52 

53import astropy.time 

54import sqlalchemy 

55 

56from ...core import TimespanDatabaseRepresentation, ddl, time_utils 

57from ...core.named import NamedValueAbstractSet 

58from .._exceptions import ConflictingDefinitionError 

59 

60 

61# TODO: method is called with list[ReflectedColumn] in SA 2, and 

62# ReflectedColumn does not exist in 1.4. 

63def _checkExistingTableDefinition(name: str, spec: ddl.TableSpec, inspection: list) -> None: 

64 """Test that the definition of a table in a `ddl.TableSpec` and from 

65 database introspection are consistent. 

66 

67 Parameters 

68 ---------- 

69 name : `str` 

70 Name of the table (only used in error messages). 

71 spec : `ddl.TableSpec` 

72 Specification of the table. 

73 inspection : `dict` 

74 Dictionary returned by 

75 `sqlalchemy.engine.reflection.Inspector.get_columns`. 

76 

77 Raises 

78 ------ 

79 DatabaseConflictError 

80 Raised if the definitions are inconsistent. 

81 """ 

82 columnNames = [c["name"] for c in inspection] 

83 if spec.fields.names != set(columnNames): 

84 raise DatabaseConflictError( 

85 f"Table '{name}' exists but is defined differently in the database; " 

86 f"specification has columns {list(spec.fields.names)}, while the " 

87 f"table in the database has {columnNames}." 

88 ) 

89 

90 

91class ReadOnlyDatabaseError(RuntimeError): 

92 """Exception raised when a write operation is called on a read-only 

93 `Database`. 

94 """ 

95 

96 

97class DatabaseConflictError(ConflictingDefinitionError): 

98 """Exception raised when database content (row values or schema entities) 

99 are inconsistent with what this client expects. 

100 """ 

101 

102 

103class SchemaAlreadyDefinedError(RuntimeError): 

104 """Exception raised when trying to initialize database schema when some 

105 tables already exist. 

106 """ 

107 

108 

109class StaticTablesContext: 

110 """Helper class used to declare the static schema for a registry layer 

111 in a database. 

112 

113 An instance of this class is returned by `Database.declareStaticTables`, 

114 which should be the only way it should be constructed. 

115 """ 

116 

117 def __init__(self, db: Database, connection: sqlalchemy.engine.Connection): 

118 self._db = db 

119 self._foreignKeys: List[Tuple[sqlalchemy.schema.Table, sqlalchemy.schema.ForeignKeyConstraint]] = [] 

120 self._inspector = sqlalchemy.inspect(connection) 

121 self._tableNames = frozenset(self._inspector.get_table_names(schema=self._db.namespace)) 

122 self._initializers: List[Callable[[Database], None]] = [] 

123 

124 def addTable(self, name: str, spec: ddl.TableSpec) -> sqlalchemy.schema.Table: 

125 """Add a new table to the schema, returning its sqlalchemy 

126 representation. 

127 

128 The new table may not actually be created until the end of the 

129 context created by `Database.declareStaticTables`, allowing tables 

130 to be declared in any order even in the presence of foreign key 

131 relationships. 

132 """ 

133 name = self._db._mangleTableName(name) 

134 if name in self._tableNames: 

135 _checkExistingTableDefinition( 

136 name, spec, self._inspector.get_columns(name, schema=self._db.namespace) 

137 ) 

138 metadata = self._db._metadata 

139 assert metadata is not None, "Guaranteed by context manager that returns this object." 

140 table = self._db._convertTableSpec(name, spec, metadata) 

141 for foreignKeySpec in spec.foreignKeys: 

142 self._foreignKeys.append((table, self._db._convertForeignKeySpec(name, foreignKeySpec, metadata))) 

143 return table 

144 

145 def addTableTuple(self, specs: Tuple[ddl.TableSpec, ...]) -> Tuple[sqlalchemy.schema.Table, ...]: 

146 """Add a named tuple of tables to the schema, returning their 

147 SQLAlchemy representations in a named tuple of the same type. 

148 

149 The new tables may not actually be created until the end of the 

150 context created by `Database.declareStaticTables`, allowing tables 

151 to be declared in any order even in the presence of foreign key 

152 relationships. 

153 

154 Notes 

155 ----- 

156 ``specs`` *must* be an instance of a type created by 

157 `collections.namedtuple`, not just regular tuple, and the returned 

158 object is guaranteed to be the same. Because `~collections.namedtuple` 

159 is just a factory for `type` objects, not an actual type itself, 

160 we cannot represent this with type annotations. 

161 """ 

162 return specs._make( # type: ignore 

163 self.addTable(name, spec) for name, spec in zip(specs._fields, specs) # type: ignore 

164 ) 

165 

166 def addInitializer(self, initializer: Callable[[Database], None]) -> None: 

167 """Add a method that does one-time initialization of a database. 

168 

169 Initialization can mean anything that changes state of a database 

170 and needs to be done exactly once after database schema was created. 

171 An example for that could be population of schema attributes. 

172 

173 Parameters 

174 ---------- 

175 initializer : callable 

176 Method of a single argument which is a `Database` instance. 

177 """ 

178 self._initializers.append(initializer) 

179 

180 

181class Database(ABC): 

182 """An abstract interface that represents a particular database engine's 

183 representation of a single schema/namespace/database. 

184 

185 Parameters 

186 ---------- 

187 origin : `int` 

188 An integer ID that should be used as the default for any datasets, 

189 quanta, or other entities that use a (autoincrement, origin) compound 

190 primary key. 

191 engine : `sqlalchemy.engine.Engine` 

192 The SQLAlchemy engine for this `Database`. 

193 namespace : `str`, optional 

194 Name of the schema or namespace this instance is associated with. 

195 This is passed as the ``schema`` argument when constructing a 

196 `sqlalchemy.schema.MetaData` instance. We use ``namespace`` instead to 

197 avoid confusion between "schema means namespace" and "schema means 

198 table definitions". 

199 

200 Notes 

201 ----- 

202 `Database` requires all write operations to go through its special named 

203 methods. Our write patterns are sufficiently simple that we don't really 

204 need the full flexibility of SQL insert/update/delete syntax, and we need 

205 non-standard (but common) functionality in these operations sufficiently 

206 often that it seems worthwhile to provide our own generic API. 

207 

208 In contrast, `Database.query` allows arbitrary ``SELECT`` queries (via 

209 their SQLAlchemy representation) to be run, as we expect these to require 

210 significantly more sophistication while still being limited to standard 

211 SQL. 

212 

213 `Database` itself has several underscore-prefixed attributes: 

214 

215 - ``_engine``: SQLAlchemy object representing its engine. 

216 - ``_connection``: method returning a context manager for 

217 `sqlalchemy.engine.Connection` object. 

218 - ``_metadata``: the `sqlalchemy.schema.MetaData` object representing 

219 the tables and other schema entities. 

220 

221 These are considered protected (derived classes may access them, but other 

222 code should not), and read-only, aside from executing SQL via 

223 ``_connection``. 

224 """ 

225 

226 def __init__(self, *, origin: int, engine: sqlalchemy.engine.Engine, namespace: Optional[str] = None): 

227 self.origin = origin 

228 self.namespace = namespace 

229 self._engine = engine 

230 self._session_connection: Optional[sqlalchemy.engine.Connection] = None 

231 self._metadata: Optional[sqlalchemy.schema.MetaData] = None 

232 self._temp_tables: Set[str] = set() 

233 

234 def __repr__(self) -> str: 

235 # Rather than try to reproduce all the parameters used to create 

236 # the object, instead report the more useful information of the 

237 # connection URL. 

238 if self._engine.url.password is not None: 

239 uri = str(self._engine.url.set(password="***")) 

240 else: 

241 uri = str(self._engine.url) 

242 if self.namespace: 

243 uri += f"#{self.namespace}" 

244 return f'{type(self).__name__}("{uri}")' 

245 

246 @classmethod 

247 def makeDefaultUri(cls, root: str) -> Optional[str]: 

248 """Create a default connection URI appropriate for the given root 

249 directory, or `None` if there can be no such default. 

250 """ 

251 return None 

252 

253 @classmethod 

254 def fromUri( 

255 cls, uri: str, *, origin: int, namespace: Optional[str] = None, writeable: bool = True 

256 ) -> Database: 

257 """Construct a database from a SQLAlchemy URI. 

258 

259 Parameters 

260 ---------- 

261 uri : `str` 

262 A SQLAlchemy URI connection string. 

263 origin : `int` 

264 An integer ID that should be used as the default for any datasets, 

265 quanta, or other entities that use a (autoincrement, origin) 

266 compound primary key. 

267 namespace : `str`, optional 

268 A database namespace (i.e. schema) the new instance should be 

269 associated with. If `None` (default), the namespace (if any) is 

270 inferred from the URI. 

271 writeable : `bool`, optional 

272 If `True`, allow write operations on the database, including 

273 ``CREATE TABLE``. 

274 

275 Returns 

276 ------- 

277 db : `Database` 

278 A new `Database` instance. 

279 """ 

280 return cls.fromEngine( 

281 cls.makeEngine(uri, writeable=writeable), origin=origin, namespace=namespace, writeable=writeable 

282 ) 

283 

284 @classmethod 

285 @abstractmethod 

286 def makeEngine(cls, uri: str, *, writeable: bool = True) -> sqlalchemy.engine.Engine: 

287 """Create a `sqlalchemy.engine.Engine` from a SQLAlchemy URI. 

288 

289 Parameters 

290 ---------- 

291 uri : `str` 

292 A SQLAlchemy URI connection string. 

293 writeable : `bool`, optional 

294 If `True`, allow write operations on the database, including 

295 ``CREATE TABLE``. 

296 

297 Returns 

298 ------- 

299 engine : `sqlalchemy.engine.Engine` 

300 A database engine. 

301 

302 Notes 

303 ----- 

304 Subclasses that support other ways to connect to a database are 

305 encouraged to add optional arguments to their implementation of this 

306 method, as long as they maintain compatibility with the base class 

307 call signature. 

308 """ 

309 raise NotImplementedError() 

310 

311 @classmethod 

312 @abstractmethod 

313 def fromEngine( 

314 cls, 

315 engine: sqlalchemy.engine.Engine, 

316 *, 

317 origin: int, 

318 namespace: Optional[str] = None, 

319 writeable: bool = True, 

320 ) -> Database: 

321 """Create a new `Database` from an existing `sqlalchemy.engine.Engine`. 

322 

323 Parameters 

324 ---------- 

325 engine : `sqlalchemy.engine.Engine` 

326 The engine for the database. May be shared between `Database` 

327 instances. 

328 origin : `int` 

329 An integer ID that should be used as the default for any datasets, 

330 quanta, or other entities that use a (autoincrement, origin) 

331 compound primary key. 

332 namespace : `str`, optional 

333 A different database namespace (i.e. schema) the new instance 

334 should be associated with. If `None` (default), the namespace 

335 (if any) is inferred from the connection. 

336 writeable : `bool`, optional 

337 If `True`, allow write operations on the database, including 

338 ``CREATE TABLE``. 

339 

340 Returns 

341 ------- 

342 db : `Database` 

343 A new `Database` instance. 

344 

345 Notes 

346 ----- 

347 This method allows different `Database` instances to share the same 

348 engine, which is desirable when they represent different namespaces 

349 can be queried together. 

350 """ 

351 raise NotImplementedError() 

352 

353 @final 

354 @contextmanager 

355 def session(self) -> Iterator[None]: 

356 """Return a context manager that represents a session (persistent 

357 connection to a database). 

358 

359 Returns 

360 ------- 

361 context : `AbstractContextManager` [ `None` ] 

362 A context manager that does not return a value when entered. 

363 

364 Notes 

365 ----- 

366 This method should be used when a sequence of read-only SQL operations 

367 will be performed in rapid succession *without* a requirement that they 

368 yield consistent results in the presence of concurrent writes (or, more 

369 rarely, when conflicting concurrent writes are rare/impossible and the 

370 session will be open long enough that a transaction is inadvisable). 

371 """ 

372 with self._session(): 

373 yield 

374 

375 @final 

376 @contextmanager 

377 def transaction( 

378 self, 

379 *, 

380 interrupting: bool = False, 

381 savepoint: bool = False, 

382 lock: Iterable[sqlalchemy.schema.Table] = (), 

383 for_temp_tables: bool = False, 

384 ) -> Iterator[None]: 

385 """Return a context manager that represents a transaction. 

386 

387 Parameters 

388 ---------- 

389 interrupting : `bool`, optional 

390 If `True` (`False` is default), this transaction block may not be 

391 nested without an outer one, and attempting to do so is a logic 

392 (i.e. assertion) error. 

393 savepoint : `bool`, optional 

394 If `True` (`False` is default), create a `SAVEPOINT`, allowing 

395 exceptions raised by the database (e.g. due to constraint 

396 violations) during this transaction's context to be caught outside 

397 it without also rolling back all operations in an outer transaction 

398 block. If `False`, transactions may still be nested, but a 

399 rollback may be generated at any level and affects all levels, and 

400 commits are deferred until the outermost block completes. If any 

401 outer transaction block was created with ``savepoint=True``, all 

402 inner blocks will be as well (regardless of the actual value 

403 passed). This has no effect if this is the outermost transaction. 

404 lock : `Iterable` [ `sqlalchemy.schema.Table` ], optional 

405 A list of tables to lock for the duration of this transaction. 

406 These locks are guaranteed to prevent concurrent writes and allow 

407 this transaction (only) to acquire the same locks (others should 

408 block), but only prevent concurrent reads if the database engine 

409 requires that in order to block concurrent writes. 

410 for_temp_tables : `bool`, optional 

411 If `True`, this transaction may involve creating temporary tables. 

412 

413 Returns 

414 ------- 

415 context : `AbstractContextManager` [ `None` ] 

416 A context manager that commits the transaction when it is exited 

417 without error and rolls back the transactoin when it is exited via 

418 an exception. 

419 

420 Notes 

421 ----- 

422 All transactions on a connection managed by one or more `Database` 

423 instances _must_ go through this method, or transaction state will not 

424 be correctly managed. 

425 """ 

426 with self._transaction( 

427 interrupting=interrupting, savepoint=savepoint, lock=lock, for_temp_tables=for_temp_tables 

428 ): 

429 yield 

430 

431 @contextmanager 

432 def temporary_table( 

433 self, spec: ddl.TableSpec, name: Optional[str] = None 

434 ) -> Iterator[sqlalchemy.schema.Table]: 

435 """Return a context manager that creates and then drops a temporary 

436 table. 

437 

438 Parameters 

439 ---------- 

440 spec : `ddl.TableSpec` 

441 Specification for the columns. Unique and foreign key constraints 

442 may be ignored. 

443 name : `str`, optional 

444 If provided, the name of the SQL construct. If not provided, an 

445 opaque but unique identifier is generated. 

446 

447 Returns 

448 ------- 

449 context : `AbstractContextManager` [ `sqlalchemy.schema.Table` ] 

450 A context manager that returns a SQLAlchemy representation of the 

451 temporary table when entered. 

452 

453 Notes 

454 ----- 

455 Temporary tables may be created, dropped, and written to even in 

456 read-only databases - at least according to the Python-level 

457 protections in the `Database` classes. Server permissions may say 

458 otherwise, but in that case they probably need to be modified to 

459 support the full range of expected read-only butler behavior. 

460 """ 

461 with self._session() as connection: 

462 table = self._make_temporary_table(connection, spec=spec, name=name) 

463 self._temp_tables.add(table.key) 

464 try: 

465 yield table 

466 finally: 

467 with self._transaction(): 

468 table.drop(connection) 

469 self._temp_tables.remove(table.key) 

470 

471 @contextmanager 

472 def _session(self) -> Iterator[sqlalchemy.engine.Connection]: 

473 """Protected implementation for `session` that actually returns the 

474 connection. 

475 

476 This method is for internal `Database` calls that need the actual 

477 SQLAlchemy connection object. It should be overridden by subclasses 

478 instead of `session` itself. 

479 

480 Returns 

481 ------- 

482 context : `AbstractContextManager` [ `sqlalchemy.engine.Connection` ] 

483 A context manager that returns a SQLALchemy connection when 

484 entered. 

485 

486 """ 

487 if self._session_connection is not None: 

488 # session already started, just reuse that 

489 yield self._session_connection 

490 else: 

491 try: 

492 # open new connection and close it when done 

493 self._session_connection = self._engine.connect() 

494 yield self._session_connection 

495 finally: 

496 if self._session_connection is not None: 

497 self._session_connection.close() 

498 self._session_connection = None 

499 # Temporary tables only live within session 

500 self._temp_tables = set() 

501 

502 @contextmanager 

503 def _transaction( 

504 self, 

505 *, 

506 interrupting: bool = False, 

507 savepoint: bool = False, 

508 lock: Iterable[sqlalchemy.schema.Table] = (), 

509 for_temp_tables: bool = False, 

510 ) -> Iterator[tuple[bool, sqlalchemy.engine.Connection]]: 

511 """Protected implementation for `transaction` that actually returns the 

512 connection and whether this is a new outermost transaction. 

513 

514 This method is for internal `Database` calls that need the actual 

515 SQLAlchemy connection object. It should be overridden by subclasses 

516 instead of `transaction` itself. 

517 

518 Parameters 

519 ---------- 

520 interrupting : `bool`, optional 

521 If `True` (`False` is default), this transaction block may not be 

522 nested without an outer one, and attempting to do so is a logic 

523 (i.e. assertion) error. 

524 savepoint : `bool`, optional 

525 If `True` (`False` is default), create a `SAVEPOINT`, allowing 

526 exceptions raised by the database (e.g. due to constraint 

527 violations) during this transaction's context to be caught outside 

528 it without also rolling back all operations in an outer transaction 

529 block. If `False`, transactions may still be nested, but a 

530 rollback may be generated at any level and affects all levels, and 

531 commits are deferred until the outermost block completes. If any 

532 outer transaction block was created with ``savepoint=True``, all 

533 inner blocks will be as well (regardless of the actual value 

534 passed). This has no effect if this is the outermost transaction. 

535 lock : `Iterable` [ `sqlalchemy.schema.Table` ], optional 

536 A list of tables to lock for the duration of this transaction. 

537 These locks are guaranteed to prevent concurrent writes and allow 

538 this transaction (only) to acquire the same locks (others should 

539 block), but only prevent concurrent reads if the database engine 

540 requires that in order to block concurrent writes. 

541 for_temp_tables : `bool`, optional 

542 If `True`, this transaction may involve creating temporary tables. 

543 

544 Returns 

545 ------- 

546 context : `AbstractContextManager` [ `tuple` [ `bool`, 

547 `sqlalchemy.engine.Connection` ] ] 

548 A context manager that commits the transaction when it is exited 

549 without error and rolls back the transactoin when it is exited via 

550 an exception. When entered, it returns a tuple of: 

551 

552 - ``is_new`` (`bool`): whether this is a new (outermost) 

553 transaction; 

554 - ``connection`` (`sqlalchemy.engine.Connection`): the connection. 

555 """ 

556 with self._session() as connection: 

557 already_in_transaction = connection.in_transaction() 

558 assert not (interrupting and already_in_transaction), ( 

559 "Logic error in transaction nesting: an operation that would " 

560 "interrupt the active transaction context has been requested." 

561 ) 

562 savepoint = savepoint or connection.in_nested_transaction() 

563 trans: sqlalchemy.engine.Transaction | None 

564 if already_in_transaction: 

565 if savepoint: 

566 trans = connection.begin_nested() 

567 else: 

568 # Nested non-savepoint transactions don't do anything. 

569 trans = None 

570 else: 

571 # Use a regular (non-savepoint) transaction always for the 

572 # outermost context. 

573 trans = connection.begin() 

574 self._lockTables(connection, lock) 

575 try: 

576 yield not already_in_transaction, connection 

577 if trans is not None: 

578 trans.commit() 

579 except BaseException: 

580 if trans is not None: 

581 trans.rollback() 

582 raise 

583 

584 @abstractmethod 

585 def _lockTables( 

586 self, connection: sqlalchemy.engine.Connection, tables: Iterable[sqlalchemy.schema.Table] = () 

587 ) -> None: 

588 """Acquire locks on the given tables. 

589 

590 This is an implementation hook for subclasses, called by `transaction`. 

591 It should not be called directly by other code. 

592 

593 Parameters 

594 ---------- 

595 connection : `sqlalchemy.engine.Connection` 

596 Database connection object. It is guaranteed that transaction is 

597 already in a progress for this connection. 

598 tables : `Iterable` [ `sqlalchemy.schema.Table` ], optional 

599 A list of tables to lock for the duration of this transaction. 

600 These locks are guaranteed to prevent concurrent writes and allow 

601 this transaction (only) to acquire the same locks (others should 

602 block), but only prevent concurrent reads if the database engine 

603 requires that in order to block concurrent writes. 

604 """ 

605 raise NotImplementedError() 

606 

607 def isTableWriteable(self, table: sqlalchemy.schema.Table) -> bool: 

608 """Check whether a table is writeable, either because the database 

609 connection is read-write or the table is a temporary table. 

610 

611 Parameters 

612 ---------- 

613 table : `sqlalchemy.schema.Table` 

614 SQLAlchemy table object to check. 

615 

616 Returns 

617 ------- 

618 writeable : `bool` 

619 Whether this table is writeable. 

620 """ 

621 return self.isWriteable() or table.key in self._temp_tables 

622 

623 def assertTableWriteable(self, table: sqlalchemy.schema.Table, msg: str) -> None: 

624 """Raise if the given table is not writeable, either because the 

625 database connection is read-write or the table is a temporary table. 

626 

627 Parameters 

628 ---------- 

629 table : `sqlalchemy.schema.Table` 

630 SQLAlchemy table object to check. 

631 msg : `str`, optional 

632 If provided, raise `ReadOnlyDatabaseError` instead of returning 

633 `False`, with this message. 

634 """ 

635 if not self.isTableWriteable(table): 

636 raise ReadOnlyDatabaseError(msg) 

637 

638 @contextmanager 

639 def declareStaticTables(self, *, create: bool) -> Iterator[StaticTablesContext]: 

640 """Return a context manager in which the database's static DDL schema 

641 can be declared. 

642 

643 Parameters 

644 ---------- 

645 create : `bool` 

646 If `True`, attempt to create all tables at the end of the context. 

647 If `False`, they will be assumed to already exist. 

648 

649 Returns 

650 ------- 

651 schema : `StaticTablesContext` 

652 A helper object that is used to add new tables. 

653 

654 Raises 

655 ------ 

656 ReadOnlyDatabaseError 

657 Raised if ``create`` is `True`, `Database.isWriteable` is `False`, 

658 and one or more declared tables do not already exist. 

659 

660 Examples 

661 -------- 

662 Given a `Database` instance ``db``:: 

663 

664 with db.declareStaticTables(create=True) as schema: 

665 schema.addTable("table1", TableSpec(...)) 

666 schema.addTable("table2", TableSpec(...)) 

667 

668 Notes 

669 ----- 

670 A database's static DDL schema must be declared before any dynamic 

671 tables are managed via calls to `ensureTableExists` or 

672 `getExistingTable`. The order in which static schema tables are added 

673 inside the context block is unimportant; they will automatically be 

674 sorted and added in an order consistent with their foreign key 

675 relationships. 

676 """ 

677 if create and not self.isWriteable(): 

678 raise ReadOnlyDatabaseError(f"Cannot create tables in read-only database {self}.") 

679 self._metadata = sqlalchemy.MetaData(schema=self.namespace) 

680 try: 

681 with self._transaction() as (_, connection): 

682 context = StaticTablesContext(self, connection) 

683 if create and context._tableNames: 

684 # Looks like database is already initalized, to avoid 

685 # danger of modifying/destroying valid schema we refuse to 

686 # do anything in this case 

687 raise SchemaAlreadyDefinedError(f"Cannot create tables in non-empty database {self}.") 

688 yield context 

689 for table, foreignKey in context._foreignKeys: 

690 table.append_constraint(foreignKey) 

691 if create: 

692 if self.namespace is not None: 

693 if self.namespace not in context._inspector.get_schema_names(): 

694 connection.execute(sqlalchemy.schema.CreateSchema(self.namespace)) 

695 # In our tables we have columns that make use of sqlalchemy 

696 # Sequence objects. There is currently a bug in sqlalchemy 

697 # that causes a deprecation warning to be thrown on a 

698 # property of the Sequence object when the repr for the 

699 # sequence is created. Here a filter is used to catch these 

700 # deprecation warnings when tables are created. 

701 with warnings.catch_warnings(): 

702 warnings.simplefilter("ignore", category=sqlalchemy.exc.SADeprecationWarning) 

703 self._metadata.create_all(connection) 

704 # call all initializer methods sequentially 

705 for init in context._initializers: 

706 init(self) 

707 except BaseException: 

708 self._metadata = None 

709 raise 

710 

711 @abstractmethod 

712 def isWriteable(self) -> bool: 

713 """Return `True` if this database can be modified by this client.""" 

714 raise NotImplementedError() 

715 

716 @abstractmethod 

717 def __str__(self) -> str: 

718 """Return a human-readable identifier for this `Database`, including 

719 any namespace or schema that identifies its names within a `Registry`. 

720 """ 

721 raise NotImplementedError() 

722 

723 @property 

724 def dialect(self) -> sqlalchemy.engine.Dialect: 

725 """The SQLAlchemy dialect for this database engine 

726 (`sqlalchemy.engine.Dialect`). 

727 """ 

728 return self._engine.dialect 

729 

730 def shrinkDatabaseEntityName(self, original: str) -> str: 

731 """Return a version of the given name that fits within this database 

732 engine's length limits for table, constraint, indexes, and sequence 

733 names. 

734 

735 Implementations should not assume that simple truncation is safe, 

736 because multiple long names often begin with the same prefix. 

737 

738 The default implementation simply returns the given name. 

739 

740 Parameters 

741 ---------- 

742 original : `str` 

743 The original name. 

744 

745 Returns 

746 ------- 

747 shrunk : `str` 

748 The new, possibly shortened name. 

749 """ 

750 return original 

751 

752 def expandDatabaseEntityName(self, shrunk: str) -> str: 

753 """Retrieve the original name for a database entity that was too long 

754 to fit within the database engine's limits. 

755 

756 Parameters 

757 ---------- 

758 original : `str` 

759 The original name. 

760 

761 Returns 

762 ------- 

763 shrunk : `str` 

764 The new, possibly shortened name. 

765 """ 

766 return shrunk 

767 

768 def _mangleTableName(self, name: str) -> str: 

769 """Map a logical, user-visible table name to the true table name used 

770 in the database. 

771 

772 The default implementation returns the given name unchanged. 

773 

774 Parameters 

775 ---------- 

776 name : `str` 

777 Input table name. Should not include a namespace (i.e. schema) 

778 prefix. 

779 

780 Returns 

781 ------- 

782 mangled : `str` 

783 Mangled version of the table name (still with no namespace prefix). 

784 

785 Notes 

786 ----- 

787 Reimplementations of this method must be idempotent - mangling an 

788 already-mangled name must have no effect. 

789 """ 

790 return name 

791 

792 def _makeColumnConstraints(self, table: str, spec: ddl.FieldSpec) -> List[sqlalchemy.CheckConstraint]: 

793 """Create constraints based on this spec. 

794 

795 Parameters 

796 ---------- 

797 table : `str` 

798 Name of the table this column is being added to. 

799 spec : `FieldSpec` 

800 Specification for the field to be added. 

801 

802 Returns 

803 ------- 

804 constraint : `list` of `sqlalchemy.CheckConstraint` 

805 Constraint added for this column. 

806 """ 

807 # By default we return no additional constraints 

808 return [] 

809 

810 def _convertFieldSpec( 

811 self, table: str, spec: ddl.FieldSpec, metadata: sqlalchemy.MetaData, **kwargs: Any 

812 ) -> sqlalchemy.schema.Column: 

813 """Convert a `FieldSpec` to a `sqlalchemy.schema.Column`. 

814 

815 Parameters 

816 ---------- 

817 table : `str` 

818 Name of the table this column is being added to. 

819 spec : `FieldSpec` 

820 Specification for the field to be added. 

821 metadata : `sqlalchemy.MetaData` 

822 SQLAlchemy representation of the DDL schema this field's table is 

823 being added to. 

824 **kwargs 

825 Additional keyword arguments to forward to the 

826 `sqlalchemy.schema.Column` constructor. This is provided to make 

827 it easier for derived classes to delegate to ``super()`` while 

828 making only minor changes. 

829 

830 Returns 

831 ------- 

832 column : `sqlalchemy.schema.Column` 

833 SQLAlchemy representation of the field. 

834 """ 

835 args = [] 

836 if spec.autoincrement: 

837 # Generate a sequence to use for auto incrementing for databases 

838 # that do not support it natively. This will be ignored by 

839 # sqlalchemy for databases that do support it. 

840 args.append( 

841 sqlalchemy.Sequence( 

842 self.shrinkDatabaseEntityName(f"{table}_seq_{spec.name}"), metadata=metadata 

843 ) 

844 ) 

845 assert spec.doc is None or isinstance(spec.doc, str), f"Bad doc for {table}.{spec.name}." 

846 return sqlalchemy.schema.Column( 

847 spec.name, 

848 spec.getSizedColumnType(), 

849 *args, 

850 nullable=spec.nullable, 

851 primary_key=spec.primaryKey, 

852 comment=spec.doc, 

853 server_default=spec.default, 

854 **kwargs, 

855 ) 

856 

857 def _convertForeignKeySpec( 

858 self, table: str, spec: ddl.ForeignKeySpec, metadata: sqlalchemy.MetaData, **kwargs: Any 

859 ) -> sqlalchemy.schema.ForeignKeyConstraint: 

860 """Convert a `ForeignKeySpec` to a 

861 `sqlalchemy.schema.ForeignKeyConstraint`. 

862 

863 Parameters 

864 ---------- 

865 table : `str` 

866 Name of the table this foreign key is being added to. 

867 spec : `ForeignKeySpec` 

868 Specification for the foreign key to be added. 

869 metadata : `sqlalchemy.MetaData` 

870 SQLAlchemy representation of the DDL schema this constraint is 

871 being added to. 

872 **kwargs 

873 Additional keyword arguments to forward to the 

874 `sqlalchemy.schema.ForeignKeyConstraint` constructor. This is 

875 provided to make it easier for derived classes to delegate to 

876 ``super()`` while making only minor changes. 

877 

878 Returns 

879 ------- 

880 constraint : `sqlalchemy.schema.ForeignKeyConstraint` 

881 SQLAlchemy representation of the constraint. 

882 """ 

883 name = self.shrinkDatabaseEntityName( 

884 "_".join( 

885 ["fkey", table, self._mangleTableName(spec.table)] + list(spec.target) + list(spec.source) 

886 ) 

887 ) 

888 return sqlalchemy.schema.ForeignKeyConstraint( 

889 spec.source, 

890 [f"{self._mangleTableName(spec.table)}.{col}" for col in spec.target], 

891 name=name, 

892 ondelete=spec.onDelete, 

893 ) 

894 

895 def _convertExclusionConstraintSpec( 

896 self, 

897 table: str, 

898 spec: Tuple[Union[str, Type[TimespanDatabaseRepresentation]], ...], 

899 metadata: sqlalchemy.MetaData, 

900 ) -> sqlalchemy.schema.Constraint: 

901 """Convert a `tuple` from `ddl.TableSpec.exclusion` into a SQLAlchemy 

902 constraint representation. 

903 

904 Parameters 

905 ---------- 

906 table : `str` 

907 Name of the table this constraint is being added to. 

908 spec : `tuple` [ `str` or `type` ] 

909 A tuple of `str` column names and the `type` object returned by 

910 `getTimespanRepresentation` (which must appear exactly once), 

911 indicating the order of the columns in the index used to back the 

912 constraint. 

913 metadata : `sqlalchemy.MetaData` 

914 SQLAlchemy representation of the DDL schema this constraint is 

915 being added to. 

916 

917 Returns 

918 ------- 

919 constraint : `sqlalchemy.schema.Constraint` 

920 SQLAlchemy representation of the constraint. 

921 

922 Raises 

923 ------ 

924 NotImplementedError 

925 Raised if this database does not support exclusion constraints. 

926 """ 

927 raise NotImplementedError(f"Database {self} does not support exclusion constraints.") 

928 

929 def _convertTableSpec( 

930 self, name: str, spec: ddl.TableSpec, metadata: sqlalchemy.MetaData, **kwargs: Any 

931 ) -> sqlalchemy.schema.Table: 

932 """Convert a `TableSpec` to a `sqlalchemy.schema.Table`. 

933 

934 Parameters 

935 ---------- 

936 spec : `TableSpec` 

937 Specification for the foreign key to be added. 

938 metadata : `sqlalchemy.MetaData` 

939 SQLAlchemy representation of the DDL schema this table is being 

940 added to. 

941 **kwargs 

942 Additional keyword arguments to forward to the 

943 `sqlalchemy.schema.Table` constructor. This is provided to make it 

944 easier for derived classes to delegate to ``super()`` while making 

945 only minor changes. 

946 

947 Returns 

948 ------- 

949 table : `sqlalchemy.schema.Table` 

950 SQLAlchemy representation of the table. 

951 

952 Notes 

953 ----- 

954 This method does not handle ``spec.foreignKeys`` at all, in order to 

955 avoid circular dependencies. These are added by higher-level logic in 

956 `ensureTableExists`, `getExistingTable`, and `declareStaticTables`. 

957 """ 

958 name = self._mangleTableName(name) 

959 args: list[sqlalchemy.schema.SchemaItem] = [ 

960 self._convertFieldSpec(name, fieldSpec, metadata) for fieldSpec in spec.fields 

961 ] 

962 

963 # Add any column constraints 

964 for fieldSpec in spec.fields: 

965 args.extend(self._makeColumnConstraints(name, fieldSpec)) 

966 

967 # Track indexes added for primary key and unique constraints, to make 

968 # sure we don't add duplicate explicit or foreign key indexes for 

969 # those. 

970 allIndexes = {tuple(fieldSpec.name for fieldSpec in spec.fields if fieldSpec.primaryKey)} 

971 args.extend( 

972 sqlalchemy.schema.UniqueConstraint( 

973 *columns, name=self.shrinkDatabaseEntityName("_".join([name, "unq"] + list(columns))) 

974 ) 

975 for columns in spec.unique 

976 ) 

977 allIndexes.update(spec.unique) 

978 args.extend( 

979 sqlalchemy.schema.Index( 

980 self.shrinkDatabaseEntityName("_".join([name, "idx"] + list(index.columns))), 

981 *index.columns, 

982 unique=(index.columns in spec.unique), 

983 **index.kwargs, 

984 ) 

985 for index in spec.indexes 

986 if index.columns not in allIndexes 

987 ) 

988 allIndexes.update(index.columns for index in spec.indexes) 

989 args.extend( 

990 sqlalchemy.schema.Index( 

991 self.shrinkDatabaseEntityName("_".join((name, "fkidx") + fk.source)), 

992 *fk.source, 

993 ) 

994 for fk in spec.foreignKeys 

995 if fk.addIndex and fk.source not in allIndexes 

996 ) 

997 

998 args.extend(self._convertExclusionConstraintSpec(name, excl, metadata) for excl in spec.exclusion) 

999 

1000 assert spec.doc is None or isinstance(spec.doc, str), f"Bad doc for {name}." 

1001 return sqlalchemy.schema.Table(name, metadata, *args, comment=spec.doc, info={"spec": spec}, **kwargs) 

1002 

1003 def ensureTableExists(self, name: str, spec: ddl.TableSpec) -> sqlalchemy.schema.Table: 

1004 """Ensure that a table with the given name and specification exists, 

1005 creating it if necessary. 

1006 

1007 Parameters 

1008 ---------- 

1009 name : `str` 

1010 Name of the table (not including namespace qualifiers). 

1011 spec : `TableSpec` 

1012 Specification for the table. This will be used when creating the 

1013 table, and *may* be used when obtaining an existing table to check 

1014 for consistency, but no such check is guaranteed. 

1015 

1016 Returns 

1017 ------- 

1018 table : `sqlalchemy.schema.Table` 

1019 SQLAlchemy representation of the table. 

1020 

1021 Raises 

1022 ------ 

1023 ReadOnlyDatabaseError 

1024 Raised if `isWriteable` returns `False`, and the table does not 

1025 already exist. 

1026 DatabaseConflictError 

1027 Raised if the table exists but ``spec`` is inconsistent with its 

1028 definition. 

1029 

1030 Notes 

1031 ----- 

1032 This method may not be called within transactions. It may be called on 

1033 read-only databases if and only if the table does in fact already 

1034 exist. 

1035 

1036 Subclasses may override this method, but usually should not need to. 

1037 """ 

1038 # TODO: if _engine is used to make a table then it uses separate 

1039 # connection and should not interfere with current transaction 

1040 assert ( 

1041 self._session_connection is None or not self._session_connection.in_transaction() 

1042 ), "Table creation interrupts transactions." 

1043 assert self._metadata is not None, "Static tables must be declared before dynamic tables." 

1044 table = self.getExistingTable(name, spec) 

1045 if table is not None: 

1046 return table 

1047 if not self.isWriteable(): 

1048 raise ReadOnlyDatabaseError( 

1049 f"Table {name} does not exist, and cannot be created because database {self} is read-only." 

1050 ) 

1051 table = self._convertTableSpec(name, spec, self._metadata) 

1052 for foreignKeySpec in spec.foreignKeys: 

1053 table.append_constraint(self._convertForeignKeySpec(name, foreignKeySpec, self._metadata)) 

1054 try: 

1055 with self._transaction() as (_, connection): 

1056 table.create(connection) 

1057 except sqlalchemy.exc.DatabaseError: 

1058 # Some other process could have created the table meanwhile, which 

1059 # usually causes OperationalError or ProgrammingError. We cannot 

1060 # use IF NOT EXISTS clause in this case due to PostgreSQL race 

1061 # condition on server side which causes IntegrityError. Instead we 

1062 # catch these exceptions (they all inherit DatabaseError) and 

1063 # re-check whether table is now there. 

1064 table = self.getExistingTable(name, spec) 

1065 if table is None: 

1066 raise 

1067 return table 

1068 

1069 def getExistingTable(self, name: str, spec: ddl.TableSpec) -> Optional[sqlalchemy.schema.Table]: 

1070 """Obtain an existing table with the given name and specification. 

1071 

1072 Parameters 

1073 ---------- 

1074 name : `str` 

1075 Name of the table (not including namespace qualifiers). 

1076 spec : `TableSpec` 

1077 Specification for the table. This will be used when creating the 

1078 SQLAlchemy representation of the table, and it is used to 

1079 check that the actual table in the database is consistent. 

1080 

1081 Returns 

1082 ------- 

1083 table : `sqlalchemy.schema.Table` or `None` 

1084 SQLAlchemy representation of the table, or `None` if it does not 

1085 exist. 

1086 

1087 Raises 

1088 ------ 

1089 DatabaseConflictError 

1090 Raised if the table exists but ``spec`` is inconsistent with its 

1091 definition. 

1092 

1093 Notes 

1094 ----- 

1095 This method can be called within transactions and never modifies the 

1096 database. 

1097 

1098 Subclasses may override this method, but usually should not need to. 

1099 """ 

1100 assert self._metadata is not None, "Static tables must be declared before dynamic tables." 

1101 name = self._mangleTableName(name) 

1102 table = self._metadata.tables.get(name if self.namespace is None else f"{self.namespace}.{name}") 

1103 if table is not None: 

1104 if spec.fields.names != set(table.columns.keys()): 

1105 raise DatabaseConflictError( 

1106 f"Table '{name}' has already been defined differently; the new " 

1107 f"specification has columns {list(spec.fields.names)}, while " 

1108 f"the previous definition has {list(table.columns.keys())}." 

1109 ) 

1110 else: 

1111 inspector = sqlalchemy.inspect( 

1112 self._engine if self._session_connection is None else self._session_connection, raiseerr=True 

1113 ) 

1114 if name in inspector.get_table_names(schema=self.namespace): 

1115 _checkExistingTableDefinition(name, spec, inspector.get_columns(name, schema=self.namespace)) 

1116 table = self._convertTableSpec(name, spec, self._metadata) 

1117 for foreignKeySpec in spec.foreignKeys: 

1118 table.append_constraint(self._convertForeignKeySpec(name, foreignKeySpec, self._metadata)) 

1119 return table 

1120 return table 

1121 

1122 def _make_temporary_table( 

1123 self, 

1124 connection: sqlalchemy.engine.Connection, 

1125 spec: ddl.TableSpec, 

1126 name: Optional[str] = None, 

1127 **kwargs: Any, 

1128 ) -> sqlalchemy.schema.Table: 

1129 """Create a temporary table. 

1130 

1131 Parameters 

1132 ---------- 

1133 connection : `sqlalchemy.engine.Connection` 

1134 Connection to use when creating the table. 

1135 spec : `TableSpec` 

1136 Specification for the table. 

1137 name : `str`, optional 

1138 A unique (within this session/connetion) name for the table. 

1139 Subclasses may override to modify the actual name used. If not 

1140 provided, a unique name will be generated. 

1141 **kwargs 

1142 Additional keyword arguments to forward to the 

1143 `sqlalchemy.schema.Table` constructor. This is provided to make it 

1144 easier for derived classes to delegate to ``super()`` while making 

1145 only minor changes. 

1146 

1147 Returns 

1148 ------- 

1149 table : `sqlalchemy.schema.Table` 

1150 SQLAlchemy representation of the table. 

1151 """ 

1152 if name is None: 

1153 name = f"tmp_{uuid.uuid4().hex}" 

1154 metadata = self._metadata 

1155 if metadata is None: 

1156 raise RuntimeError("Cannot create temporary table before static schema is defined.") 

1157 table = self._convertTableSpec( 

1158 name, spec, metadata, prefixes=["TEMPORARY"], schema=sqlalchemy.schema.BLANK_SCHEMA, **kwargs 

1159 ) 

1160 if table.key in self._temp_tables: 

1161 if table.key != name: 

1162 raise ValueError( 

1163 f"A temporary table with name {name} (transformed to {table.key} by " 

1164 "Database) already exists." 

1165 ) 

1166 for foreignKeySpec in spec.foreignKeys: 

1167 table.append_constraint(self._convertForeignKeySpec(name, foreignKeySpec, metadata)) 

1168 with self._transaction(): 

1169 table.create(connection) 

1170 return table 

1171 

1172 @classmethod 

1173 def getTimespanRepresentation(cls) -> Type[TimespanDatabaseRepresentation]: 

1174 """Return a `type` that encapsulates the way `Timespan` objects are 

1175 stored in this database. 

1176 

1177 `Database` does not automatically use the return type of this method 

1178 anywhere else; calling code is responsible for making sure that DDL 

1179 and queries are consistent with it. 

1180 

1181 Returns 

1182 ------- 

1183 TimespanReprClass : `type` (`TimespanDatabaseRepresention` subclass) 

1184 A type that encapsulates the way `Timespan` objects should be 

1185 stored in this database. 

1186 

1187 Notes 

1188 ----- 

1189 There are two big reasons we've decided to keep timespan-mangling logic 

1190 outside the `Database` implementations, even though the choice of 

1191 representation is ultimately up to a `Database` implementation: 

1192 

1193 - Timespans appear in relatively few tables and queries in our 

1194 typical usage, and the code that operates on them is already aware 

1195 that it is working with timespans. In contrast, a 

1196 timespan-representation-aware implementation of, say, `insert`, 

1197 would need to have extra logic to identify when timespan-mangling 

1198 needed to occur, which would usually be useless overhead. 

1199 

1200 - SQLAlchemy's rich SELECT query expression system has no way to wrap 

1201 multiple columns in a single expression object (the ORM does, but 

1202 we are not using the ORM). So we would have to wrap _much_ more of 

1203 that code in our own interfaces to encapsulate timespan 

1204 representations there. 

1205 """ 

1206 return TimespanDatabaseRepresentation.Compound 

1207 

1208 def sync( 

1209 self, 

1210 table: sqlalchemy.schema.Table, 

1211 *, 

1212 keys: Dict[str, Any], 

1213 compared: Optional[Dict[str, Any]] = None, 

1214 extra: Optional[Dict[str, Any]] = None, 

1215 returning: Optional[Sequence[str]] = None, 

1216 update: bool = False, 

1217 ) -> Tuple[Optional[Dict[str, Any]], Union[bool, Dict[str, Any]]]: 

1218 """Insert into a table as necessary to ensure database contains 

1219 values equivalent to the given ones. 

1220 

1221 Parameters 

1222 ---------- 

1223 table : `sqlalchemy.schema.Table` 

1224 Table to be queried and possibly inserted into. 

1225 keys : `dict` 

1226 Column name-value pairs used to search for an existing row; must 

1227 be a combination that can be used to select a single row if one 

1228 exists. If such a row does not exist, these values are used in 

1229 the insert. 

1230 compared : `dict`, optional 

1231 Column name-value pairs that are compared to those in any existing 

1232 row. If such a row does not exist, these rows are used in the 

1233 insert. 

1234 extra : `dict`, optional 

1235 Column name-value pairs that are ignored if a matching row exists, 

1236 but used in an insert if one is necessary. 

1237 returning : `~collections.abc.Sequence` of `str`, optional 

1238 The names of columns whose values should be returned. 

1239 update : `bool`, optional 

1240 If `True` (`False` is default), update the existing row with the 

1241 values in ``compared`` instead of raising `DatabaseConflictError`. 

1242 

1243 Returns 

1244 ------- 

1245 row : `dict`, optional 

1246 The value of the fields indicated by ``returning``, or `None` if 

1247 ``returning`` is `None`. 

1248 inserted_or_updated : `bool` or `dict` 

1249 If `True`, a new row was inserted; if `False`, a matching row 

1250 already existed. If a `dict` (only possible if ``update=True``), 

1251 then an existing row was updated, and the dict maps the names of 

1252 the updated columns to their *old* values (new values can be 

1253 obtained from ``compared``). 

1254 

1255 Raises 

1256 ------ 

1257 DatabaseConflictError 

1258 Raised if the values in ``compared`` do not match the values in the 

1259 database. 

1260 ReadOnlyDatabaseError 

1261 Raised if `isWriteable` returns `False`, and no matching record 

1262 already exists. 

1263 

1264 Notes 

1265 ----- 

1266 May be used inside transaction contexts, so implementations may not 

1267 perform operations that interrupt transactions. 

1268 

1269 It may be called on read-only databases if and only if the matching row 

1270 does in fact already exist. 

1271 """ 

1272 

1273 def check() -> Tuple[int, Optional[Dict[str, Any]], Optional[List]]: 

1274 """Query for a row that matches the ``key`` argument, and compare 

1275 to what was given by the caller. 

1276 

1277 Returns 

1278 ------- 

1279 n : `int` 

1280 Number of matching rows. ``n != 1`` is always an error, but 

1281 it's a different kind of error depending on where `check` is 

1282 being called. 

1283 bad : `dict` or `None` 

1284 The subset of the keys of ``compared`` for which the existing 

1285 values did not match the given one, mapped to the existing 

1286 values in the database. Once again, ``not bad`` is always an 

1287 error, but a different kind on context. `None` if ``n != 1`` 

1288 result : `list` or `None` 

1289 Results in the database that correspond to the columns given 

1290 in ``returning``, or `None` if ``returning is None``. 

1291 """ 

1292 toSelect: Set[str] = set() 

1293 if compared is not None: 

1294 toSelect.update(compared.keys()) 

1295 if returning is not None: 

1296 toSelect.update(returning) 

1297 if not toSelect: 

1298 # Need to select some column, even if we just want to see 

1299 # how many rows we get back. 

1300 toSelect.add(next(iter(keys.keys()))) 

1301 selectSql = ( 

1302 sqlalchemy.sql.select(*[table.columns[k].label(k) for k in toSelect]) 

1303 .select_from(table) 

1304 .where(sqlalchemy.sql.and_(*[table.columns[k] == v for k, v in keys.items()])) 

1305 ) 

1306 with self._transaction() as (_, connection): 

1307 fetched = list(connection.execute(selectSql).mappings()) 

1308 if len(fetched) != 1: 

1309 return len(fetched), None, None 

1310 existing = fetched[0] 

1311 if compared is not None: 

1312 

1313 def safeNotEqual(a: Any, b: Any) -> bool: 

1314 if isinstance(a, astropy.time.Time): 

1315 return not time_utils.TimeConverter().times_equal(a, b) 

1316 return a != b 

1317 

1318 inconsistencies = { 

1319 k: existing[k] for k, v in compared.items() if safeNotEqual(existing[k], v) 

1320 } 

1321 else: 

1322 inconsistencies = {} 

1323 if returning is not None: 

1324 toReturn: Optional[list] = [existing[k] for k in returning] 

1325 else: 

1326 toReturn = None 

1327 return 1, inconsistencies, toReturn 

1328 

1329 def format_bad(inconsistencies: Dict[str, Any]) -> str: 

1330 """Format the 'bad' dictionary of existing values returned by 

1331 ``check`` into a string suitable for an error message. 

1332 """ 

1333 assert compared is not None, "Should not be able to get inconsistencies without comparing." 

1334 return ", ".join(f"{k}: {v!r} != {compared[k]!r}" for k, v in inconsistencies.items()) 

1335 

1336 if self.isTableWriteable(table): 

1337 # Try an insert first, but allow it to fail (in only specific 

1338 # ways). 

1339 row = keys.copy() 

1340 if compared is not None: 

1341 row.update(compared) 

1342 if extra is not None: 

1343 row.update(extra) 

1344 with self.transaction(): 

1345 inserted = bool(self.ensure(table, row)) 

1346 inserted_or_updated: Union[bool, Dict[str, Any]] 

1347 # Need to perform check() for this branch inside the 

1348 # transaction, so we roll back an insert that didn't do 

1349 # what we expected. That limits the extent to which we 

1350 # can reduce duplication between this block and the other 

1351 # ones that perform similar logic. 

1352 n, bad, result = check() 

1353 if n < 1: 

1354 raise ConflictingDefinitionError( 

1355 f"Attempted to ensure {row} exists by inserting it with ON CONFLICT IGNORE, " 

1356 f"but a post-insert query on {keys} returned no results. " 

1357 f"Insert was {'' if inserted else 'not '}reported as successful. " 

1358 "This can occur if the insert violated a database constraint other than the " 

1359 "unique constraint or primary key used to identify the row in this call." 

1360 ) 

1361 elif n > 1: 

1362 raise RuntimeError( 

1363 f"Keys passed to sync {keys.keys()} do not comprise a " 

1364 f"unique constraint for table {table.name}." 

1365 ) 

1366 elif bad: 

1367 assert ( 

1368 compared is not None 

1369 ), "Should not be able to get inconsistencies without comparing." 

1370 if inserted: 

1371 raise RuntimeError( 

1372 f"Conflict ({bad}) in sync after successful insert; this is " 

1373 "possible if the same table is being updated by a concurrent " 

1374 "process that isn't using sync, but it may also be a bug in " 

1375 "daf_butler." 

1376 ) 

1377 elif update: 

1378 with self._transaction() as (_, connection): 

1379 connection.execute( 

1380 table.update() 

1381 .where(sqlalchemy.sql.and_(*[table.columns[k] == v for k, v in keys.items()])) 

1382 .values(**{k: compared[k] for k in bad.keys()}) 

1383 ) 

1384 inserted_or_updated = bad 

1385 else: 

1386 raise DatabaseConflictError( 

1387 f"Conflict in sync for table {table.name} on column(s) {format_bad(bad)}." 

1388 ) 

1389 else: 

1390 inserted_or_updated = inserted 

1391 else: 

1392 # Database is not writeable; just see if the row exists. 

1393 n, bad, result = check() 

1394 if n < 1: 

1395 raise ReadOnlyDatabaseError("sync needs to insert, but database is read-only.") 

1396 elif n > 1: 

1397 raise RuntimeError("Keys passed to sync do not comprise a unique constraint.") 

1398 elif bad: 

1399 if update: 

1400 raise ReadOnlyDatabaseError("sync needs to update, but database is read-only.") 

1401 else: 

1402 raise DatabaseConflictError( 

1403 f"Conflict in sync for table {table.name} on column(s) {format_bad(bad)}." 

1404 ) 

1405 inserted_or_updated = False 

1406 if returning is None: 

1407 return None, inserted_or_updated 

1408 else: 

1409 assert result is not None 

1410 return {k: v for k, v in zip(returning, result)}, inserted_or_updated 

1411 

1412 def insert( 

1413 self, 

1414 table: sqlalchemy.schema.Table, 

1415 *rows: dict, 

1416 returnIds: bool = False, 

1417 select: Optional[sqlalchemy.sql.expression.SelectBase] = None, 

1418 names: Optional[Iterable[str]] = None, 

1419 ) -> Optional[List[int]]: 

1420 """Insert one or more rows into a table, optionally returning 

1421 autoincrement primary key values. 

1422 

1423 Parameters 

1424 ---------- 

1425 table : `sqlalchemy.schema.Table` 

1426 Table rows should be inserted into. 

1427 returnIds: `bool` 

1428 If `True` (`False` is default), return the values of the table's 

1429 autoincrement primary key field (which much exist). 

1430 select : `sqlalchemy.sql.SelectBase`, optional 

1431 A SELECT query expression to insert rows from. Cannot be provided 

1432 with either ``rows`` or ``returnIds=True``. 

1433 names : `Iterable` [ `str` ], optional 

1434 Names of columns in ``table`` to be populated, ordered to match the 

1435 columns returned by ``select``. Ignored if ``select`` is `None`. 

1436 If not provided, the columns returned by ``select`` must be named 

1437 to match the desired columns of ``table``. 

1438 *rows 

1439 Positional arguments are the rows to be inserted, as dictionaries 

1440 mapping column name to value. The keys in all dictionaries must 

1441 be the same. 

1442 

1443 Returns 

1444 ------- 

1445 ids : `None`, or `list` of `int` 

1446 If ``returnIds`` is `True`, a `list` containing the inserted 

1447 values for the table's autoincrement primary key. 

1448 

1449 Raises 

1450 ------ 

1451 ReadOnlyDatabaseError 

1452 Raised if `isWriteable` returns `False` when this method is called. 

1453 

1454 Notes 

1455 ----- 

1456 The default implementation uses bulk insert syntax when ``returnIds`` 

1457 is `False`, and a loop over single-row insert operations when it is 

1458 `True`. 

1459 

1460 Derived classes should reimplement when they can provide a more 

1461 efficient implementation (especially for the latter case). 

1462 

1463 May be used inside transaction contexts, so implementations may not 

1464 perform operations that interrupt transactions. 

1465 """ 

1466 self.assertTableWriteable(table, f"Cannot insert into read-only table {table}.") 

1467 if select is not None and (rows or returnIds): 

1468 raise TypeError("'select' is incompatible with passing value rows or returnIds=True.") 

1469 if not rows and select is None: 

1470 if returnIds: 

1471 return [] 

1472 else: 

1473 return None 

1474 with self._transaction() as (_, connection): 

1475 if not returnIds: 

1476 if select is not None: 

1477 if names is None: 

1478 # columns() is deprecated since 1.4, but 

1479 # selected_columns() method did not exist in 1.3. 

1480 if hasattr(select, "selected_columns"): 

1481 names = select.selected_columns.keys() 

1482 else: 

1483 names = select.columns.keys() 

1484 connection.execute(table.insert().from_select(list(names), select)) 

1485 else: 

1486 connection.execute(table.insert(), rows) 

1487 return None 

1488 else: 

1489 sql = table.insert() 

1490 return [connection.execute(sql, row).inserted_primary_key[0] for row in rows] 

1491 

1492 @abstractmethod 

1493 def replace(self, table: sqlalchemy.schema.Table, *rows: dict) -> None: 

1494 """Insert one or more rows into a table, replacing any existing rows 

1495 for which insertion of a new row would violate the primary key 

1496 constraint. 

1497 

1498 Parameters 

1499 ---------- 

1500 table : `sqlalchemy.schema.Table` 

1501 Table rows should be inserted into. 

1502 *rows 

1503 Positional arguments are the rows to be inserted, as dictionaries 

1504 mapping column name to value. The keys in all dictionaries must 

1505 be the same. 

1506 

1507 Raises 

1508 ------ 

1509 ReadOnlyDatabaseError 

1510 Raised if `isWriteable` returns `False` when this method is called. 

1511 

1512 Notes 

1513 ----- 

1514 May be used inside transaction contexts, so implementations may not 

1515 perform operations that interrupt transactions. 

1516 

1517 Implementations should raise a `sqlalchemy.exc.IntegrityError` 

1518 exception when a constraint other than the primary key would be 

1519 violated. 

1520 

1521 Implementations are not required to support `replace` on tables 

1522 with autoincrement keys. 

1523 """ 

1524 raise NotImplementedError() 

1525 

1526 @abstractmethod 

1527 def ensure(self, table: sqlalchemy.schema.Table, *rows: dict, primary_key_only: bool = False) -> int: 

1528 """Insert one or more rows into a table, skipping any rows for which 

1529 insertion would violate a unique constraint. 

1530 

1531 Parameters 

1532 ---------- 

1533 table : `sqlalchemy.schema.Table` 

1534 Table rows should be inserted into. 

1535 *rows 

1536 Positional arguments are the rows to be inserted, as dictionaries 

1537 mapping column name to value. The keys in all dictionaries must 

1538 be the same. 

1539 primary_key_only : `bool`, optional 

1540 If `True` (`False` is default), only skip rows that violate the 

1541 primary key constraint, and raise an exception (and rollback 

1542 transactions) for other constraint violations. 

1543 

1544 Returns 

1545 ------- 

1546 count : `int` 

1547 The number of rows actually inserted. 

1548 

1549 Raises 

1550 ------ 

1551 ReadOnlyDatabaseError 

1552 Raised if `isWriteable` returns `False` when this method is called. 

1553 This is raised even if the operation would do nothing even on a 

1554 writeable database. 

1555 

1556 Notes 

1557 ----- 

1558 May be used inside transaction contexts, so implementations may not 

1559 perform operations that interrupt transactions. 

1560 

1561 Implementations are not required to support `ensure` on tables 

1562 with autoincrement keys. 

1563 """ 

1564 raise NotImplementedError() 

1565 

1566 def delete(self, table: sqlalchemy.schema.Table, columns: Iterable[str], *rows: dict) -> int: 

1567 """Delete one or more rows from a table. 

1568 

1569 Parameters 

1570 ---------- 

1571 table : `sqlalchemy.schema.Table` 

1572 Table that rows should be deleted from. 

1573 columns: `~collections.abc.Iterable` of `str` 

1574 The names of columns that will be used to constrain the rows to 

1575 be deleted; these will be combined via ``AND`` to form the 

1576 ``WHERE`` clause of the delete query. 

1577 *rows 

1578 Positional arguments are the keys of rows to be deleted, as 

1579 dictionaries mapping column name to value. The keys in all 

1580 dictionaries must be exactly the names in ``columns``. 

1581 

1582 Returns 

1583 ------- 

1584 count : `int` 

1585 Number of rows deleted. 

1586 

1587 Raises 

1588 ------ 

1589 ReadOnlyDatabaseError 

1590 Raised if `isWriteable` returns `False` when this method is called. 

1591 

1592 Notes 

1593 ----- 

1594 May be used inside transaction contexts, so implementations may not 

1595 perform operations that interrupt transactions. 

1596 

1597 The default implementation should be sufficient for most derived 

1598 classes. 

1599 """ 

1600 self.assertTableWriteable(table, f"Cannot delete from read-only table {table}.") 

1601 if columns and not rows: 

1602 # If there are no columns, this operation is supposed to delete 

1603 # everything (so we proceed as usual). But if there are columns, 

1604 # but no rows, it was a constrained bulk operation where the 

1605 # constraint is that no rows match, and we should short-circuit 

1606 # while reporting that no rows were affected. 

1607 return 0 

1608 sql = table.delete() 

1609 columns = list(columns) # Force iterators to list 

1610 

1611 # More efficient to use IN operator if there is only one 

1612 # variable changing across all rows. 

1613 content: Dict[str, Set] = defaultdict(set) 

1614 if len(columns) == 1: 

1615 # Nothing to calculate since we can always use IN 

1616 column = columns[0] 

1617 changing_columns = [column] 

1618 content[column] = set(row[column] for row in rows) 

1619 else: 

1620 for row in rows: 

1621 for k, v in row.items(): 

1622 content[k].add(v) 

1623 changing_columns = [col for col, values in content.items() if len(values) > 1] 

1624 

1625 if len(changing_columns) != 1: 

1626 # More than one column changes each time so do explicit bind 

1627 # parameters and have each row processed separately. 

1628 whereTerms = [table.columns[name] == sqlalchemy.sql.bindparam(name) for name in columns] 

1629 if whereTerms: 

1630 sql = sql.where(sqlalchemy.sql.and_(*whereTerms)) 

1631 with self._transaction() as (_, connection): 

1632 return connection.execute(sql, rows).rowcount 

1633 else: 

1634 # One of the columns has changing values but any others are 

1635 # fixed. In this case we can use an IN operator and be more 

1636 # efficient. 

1637 name = changing_columns.pop() 

1638 

1639 # Simple where clause for the unchanging columns 

1640 clauses = [] 

1641 for k, v in content.items(): 

1642 if k == name: 

1643 continue 

1644 column = table.columns[k] 

1645 # The set only has one element 

1646 clauses.append(column == v.pop()) 

1647 

1648 # The IN operator will not work for "infinite" numbers of 

1649 # rows so must batch it up into distinct calls. 

1650 in_content = list(content[name]) 

1651 n_elements = len(in_content) 

1652 

1653 rowcount = 0 

1654 iposn = 0 

1655 n_per_loop = 1_000 # Controls how many items to put in IN clause 

1656 with self._transaction() as (_, connection): 

1657 for iposn in range(0, n_elements, n_per_loop): 

1658 endpos = iposn + n_per_loop 

1659 in_clause = table.columns[name].in_(in_content[iposn:endpos]) 

1660 

1661 newsql = sql.where(sqlalchemy.sql.and_(*clauses, in_clause)) 

1662 rowcount += connection.execute(newsql).rowcount 

1663 return rowcount 

1664 

1665 def deleteWhere(self, table: sqlalchemy.schema.Table, where: sqlalchemy.sql.ColumnElement) -> int: 

1666 """Delete rows from a table with pre-constructed WHERE clause. 

1667 

1668 Parameters 

1669 ---------- 

1670 table : `sqlalchemy.schema.Table` 

1671 Table that rows should be deleted from. 

1672 where: `sqlalchemy.sql.ClauseElement` 

1673 The names of columns that will be used to constrain the rows to 

1674 be deleted; these will be combined via ``AND`` to form the 

1675 ``WHERE`` clause of the delete query. 

1676 

1677 Returns 

1678 ------- 

1679 count : `int` 

1680 Number of rows deleted. 

1681 

1682 Raises 

1683 ------ 

1684 ReadOnlyDatabaseError 

1685 Raised if `isWriteable` returns `False` when this method is called. 

1686 

1687 Notes 

1688 ----- 

1689 May be used inside transaction contexts, so implementations may not 

1690 perform operations that interrupt transactions. 

1691 

1692 The default implementation should be sufficient for most derived 

1693 classes. 

1694 """ 

1695 self.assertTableWriteable(table, f"Cannot delete from read-only table {table}.") 

1696 

1697 sql = table.delete().where(where) 

1698 with self._transaction() as (_, connection): 

1699 return connection.execute(sql).rowcount 

1700 

1701 def update(self, table: sqlalchemy.schema.Table, where: Dict[str, str], *rows: dict) -> int: 

1702 """Update one or more rows in a table. 

1703 

1704 Parameters 

1705 ---------- 

1706 table : `sqlalchemy.schema.Table` 

1707 Table containing the rows to be updated. 

1708 where : `dict` [`str`, `str`] 

1709 A mapping from the names of columns that will be used to search for 

1710 existing rows to the keys that will hold these values in the 

1711 ``rows`` dictionaries. Note that these may not be the same due to 

1712 SQLAlchemy limitations. 

1713 *rows 

1714 Positional arguments are the rows to be updated. The keys in all 

1715 dictionaries must be the same, and may correspond to either a 

1716 value in the ``where`` dictionary or the name of a column to be 

1717 updated. 

1718 

1719 Returns 

1720 ------- 

1721 count : `int` 

1722 Number of rows matched (regardless of whether the update actually 

1723 modified them). 

1724 

1725 Raises 

1726 ------ 

1727 ReadOnlyDatabaseError 

1728 Raised if `isWriteable` returns `False` when this method is called. 

1729 

1730 Notes 

1731 ----- 

1732 May be used inside transaction contexts, so implementations may not 

1733 perform operations that interrupt transactions. 

1734 

1735 The default implementation should be sufficient for most derived 

1736 classes. 

1737 """ 

1738 self.assertTableWriteable(table, f"Cannot update read-only table {table}.") 

1739 if not rows: 

1740 return 0 

1741 sql = table.update().where( 

1742 sqlalchemy.sql.and_(*[table.columns[k] == sqlalchemy.sql.bindparam(v) for k, v in where.items()]) 

1743 ) 

1744 with self._transaction() as (_, connection): 

1745 return connection.execute(sql, rows).rowcount 

1746 

1747 @contextmanager 

1748 def query( 

1749 self, 

1750 sql: sqlalchemy.sql.expression.Executable | sqlalchemy.sql.expression.SelectBase, 

1751 *args: Any, 

1752 **kwargs: Any, 

1753 ) -> Iterator[sqlalchemy.engine.CursorResult]: 

1754 """Run a SELECT query against the database. 

1755 

1756 Parameters 

1757 ---------- 

1758 sql : `sqlalchemy.sql.expression.SelectBase` 

1759 A SQLAlchemy representation of a ``SELECT`` query. 

1760 *args 

1761 Additional positional arguments are forwarded to 

1762 `sqlalchemy.engine.Connection.execute`. 

1763 **kwargs 

1764 Additional keyword arguments are forwarded to 

1765 `sqlalchemy.engine.Connection.execute`. 

1766 

1767 Returns 

1768 ------- 

1769 result_context : `sqlalchemy.engine.CursorResults` 

1770 Context manager that returns the query result object when entered. 

1771 These results are invalidated when the context is exited. 

1772 """ 

1773 if self._session_connection is None: 

1774 connection = self._engine.connect() 

1775 else: 

1776 connection = self._session_connection 

1777 # TODO: SelectBase is not good for execute(), but it used everywhere, 

1778 # e.g. in daf_relation. We should switch to Executable at some point. 

1779 result = connection.execute(cast(sqlalchemy.sql.expression.Executable, sql), *args, **kwargs) 

1780 try: 

1781 yield result 

1782 finally: 

1783 if connection is not self._session_connection: 

1784 connection.close() 

1785 

1786 @abstractmethod 

1787 def constant_rows( 

1788 self, 

1789 fields: NamedValueAbstractSet[ddl.FieldSpec], 

1790 *rows: dict, 

1791 name: Optional[str] = None, 

1792 ) -> sqlalchemy.sql.FromClause: 

1793 """Return a SQLAlchemy object that represents a small number of 

1794 constant-valued rows. 

1795 

1796 Parameters 

1797 ---------- 

1798 fields : `NamedValueAbstractSet` [ `ddl.FieldSpec` ] 

1799 The columns of the rows. Unique and foreign key constraints are 

1800 ignored. 

1801 *rows : `dict` 

1802 Values for the rows. 

1803 name : `str`, optional 

1804 If provided, the name of the SQL construct. If not provided, an 

1805 opaque but unique identifier is generated. 

1806 

1807 Returns 

1808 ------- 

1809 from_clause : `sqlalchemy.sql.FromClause` 

1810 SQLAlchemy object representing the given rows. This is guaranteed 

1811 to be something that can be directly joined into a ``SELECT`` 

1812 query's ``FROM`` clause, and will not involve a temporary table 

1813 that needs to be cleaned up later. 

1814 

1815 Notes 

1816 ----- 

1817 The default implementation uses the SQL-standard ``VALUES`` construct, 

1818 but support for that construct is varied enough across popular RDBMSs 

1819 that the method is still marked abstract to force explicit opt-in via 

1820 delegation to `super`. 

1821 """ 

1822 if name is None: 

1823 name = f"tmp_{uuid.uuid4().hex}" 

1824 return sqlalchemy.sql.values( 

1825 *[sqlalchemy.Column(field.name, field.getSizedColumnType()) for field in fields], 

1826 name=name, 

1827 ).data([tuple(row[name] for name in fields.names) for row in rows]) 

1828 

1829 def get_constant_rows_max(self) -> int: 

1830 """Return the maximum number of rows that should be passed to 

1831 `constant_rows` for this backend. 

1832 

1833 Returns 

1834 ------- 

1835 max : `int` 

1836 Maximum number of rows. 

1837 

1838 Notes 

1839 ----- 

1840 This should reflect typical performance profiles (or a guess at these), 

1841 not just hard database engine limits. 

1842 """ 

1843 return 100 

1844 

1845 origin: int 

1846 """An integer ID that should be used as the default for any datasets, 

1847 quanta, or other entities that use a (autoincrement, origin) compound 

1848 primary key (`int`). 

1849 """ 

1850 

1851 namespace: Optional[str] 

1852 """The schema or namespace this database instance is associated with 

1853 (`str` or `None`). 

1854 """