Coverage for python/lsst/daf/butler/registry/interfaces/_database.py: 14%

418 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2022-10-29 02:20 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = [ 

24 "Database", 

25 "ReadOnlyDatabaseError", 

26 "DatabaseConflictError", 

27 "SchemaAlreadyDefinedError", 

28 "Session", 

29 "StaticTablesContext", 

30] 

31 

32import uuid 

33import warnings 

34from abc import ABC, abstractmethod 

35from collections import defaultdict 

36from contextlib import contextmanager 

37from typing import Any, Callable, Dict, Iterable, Iterator, List, Optional, Sequence, Set, Tuple, Type, Union 

38 

39import astropy.time 

40import sqlalchemy 

41 

42from ...core import TimespanDatabaseRepresentation, ddl, time_utils 

43from ...core.named import NamedValueAbstractSet 

44from .._exceptions import ConflictingDefinitionError 

45 

46_IN_SAVEPOINT_TRANSACTION = "IN_SAVEPOINT_TRANSACTION" 

47 

48 

49def _checkExistingTableDefinition(name: str, spec: ddl.TableSpec, inspection: List[Dict[str, Any]]) -> None: 

50 """Test that the definition of a table in a `ddl.TableSpec` and from 

51 database introspection are consistent. 

52 

53 Parameters 

54 ---------- 

55 name : `str` 

56 Name of the table (only used in error messages). 

57 spec : `ddl.TableSpec` 

58 Specification of the table. 

59 inspection : `dict` 

60 Dictionary returned by 

61 `sqlalchemy.engine.reflection.Inspector.get_columns`. 

62 

63 Raises 

64 ------ 

65 DatabaseConflictError 

66 Raised if the definitions are inconsistent. 

67 """ 

68 columnNames = [c["name"] for c in inspection] 

69 if spec.fields.names != set(columnNames): 

70 raise DatabaseConflictError( 

71 f"Table '{name}' exists but is defined differently in the database; " 

72 f"specification has columns {list(spec.fields.names)}, while the " 

73 f"table in the database has {columnNames}." 

74 ) 

75 

76 

77class ReadOnlyDatabaseError(RuntimeError): 

78 """Exception raised when a write operation is called on a read-only 

79 `Database`. 

80 """ 

81 

82 

83class DatabaseConflictError(ConflictingDefinitionError): 

84 """Exception raised when database content (row values or schema entities) 

85 are inconsistent with what this client expects. 

86 """ 

87 

88 

89class SchemaAlreadyDefinedError(RuntimeError): 

90 """Exception raised when trying to initialize database schema when some 

91 tables already exist. 

92 """ 

93 

94 

95class StaticTablesContext: 

96 """Helper class used to declare the static schema for a registry layer 

97 in a database. 

98 

99 An instance of this class is returned by `Database.declareStaticTables`, 

100 which should be the only way it should be constructed. 

101 """ 

102 

103 def __init__(self, db: Database): 

104 self._db = db 

105 self._foreignKeys: List[Tuple[sqlalchemy.schema.Table, sqlalchemy.schema.ForeignKeyConstraint]] = [] 

106 self._inspector = sqlalchemy.inspect(self._db._engine) 

107 self._tableNames = frozenset(self._inspector.get_table_names(schema=self._db.namespace)) 

108 self._initializers: List[Callable[[Database], None]] = [] 

109 

110 def addTable(self, name: str, spec: ddl.TableSpec) -> sqlalchemy.schema.Table: 

111 """Add a new table to the schema, returning its sqlalchemy 

112 representation. 

113 

114 The new table may not actually be created until the end of the 

115 context created by `Database.declareStaticTables`, allowing tables 

116 to be declared in any order even in the presence of foreign key 

117 relationships. 

118 """ 

119 name = self._db._mangleTableName(name) 

120 if name in self._tableNames: 

121 _checkExistingTableDefinition( 

122 name, spec, self._inspector.get_columns(name, schema=self._db.namespace) 

123 ) 

124 metadata = self._db._metadata 

125 assert metadata is not None, "Guaranteed by context manager that returns this object." 

126 table = self._db._convertTableSpec(name, spec, metadata) 

127 for foreignKeySpec in spec.foreignKeys: 

128 self._foreignKeys.append((table, self._db._convertForeignKeySpec(name, foreignKeySpec, metadata))) 

129 return table 

130 

131 def addTableTuple(self, specs: Tuple[ddl.TableSpec, ...]) -> Tuple[sqlalchemy.schema.Table, ...]: 

132 """Add a named tuple of tables to the schema, returning their 

133 SQLAlchemy representations in a named tuple of the same type. 

134 

135 The new tables may not actually be created until the end of the 

136 context created by `Database.declareStaticTables`, allowing tables 

137 to be declared in any order even in the presence of foreign key 

138 relationships. 

139 

140 Notes 

141 ----- 

142 ``specs`` *must* be an instance of a type created by 

143 `collections.namedtuple`, not just regular tuple, and the returned 

144 object is guaranteed to be the same. Because `~collections.namedtuple` 

145 is just a factory for `type` objects, not an actual type itself, 

146 we cannot represent this with type annotations. 

147 """ 

148 return specs._make( # type: ignore 

149 self.addTable(name, spec) for name, spec in zip(specs._fields, specs) # type: ignore 

150 ) 

151 

152 def addInitializer(self, initializer: Callable[[Database], None]) -> None: 

153 """Add a method that does one-time initialization of a database. 

154 

155 Initialization can mean anything that changes state of a database 

156 and needs to be done exactly once after database schema was created. 

157 An example for that could be population of schema attributes. 

158 

159 Parameters 

160 ---------- 

161 initializer : callable 

162 Method of a single argument which is a `Database` instance. 

163 """ 

164 self._initializers.append(initializer) 

165 

166 

167class Session: 

168 """Class representing a persistent connection to a database. 

169 

170 Parameters 

171 ---------- 

172 db : `Database` 

173 Database instance. 

174 

175 Notes 

176 ----- 

177 Instances of Session class should not be created by client code; 

178 `Database.session` should be used to create context for a session:: 

179 

180 with db.session() as session: 

181 session.method() 

182 db.method() 

183 

184 In the current implementation sessions can be nested and transactions can 

185 be nested within a session. All nested sessions and transaction share the 

186 same database connection. 

187 

188 Session class represents a limited subset of database API that requires 

189 persistent connection to a database (e.g. temporary tables which have 

190 lifetime of a session). Potentially most of the database API could be 

191 associated with a Session class. 

192 """ 

193 

194 def __init__(self, db: Database): 

195 self._db = db 

196 

197 def makeTemporaryTable(self, spec: ddl.TableSpec, name: Optional[str] = None) -> sqlalchemy.schema.Table: 

198 """Create a temporary table. 

199 

200 Parameters 

201 ---------- 

202 spec : `TableSpec` 

203 Specification for the table. 

204 name : `str`, optional 

205 A unique (within this session/connetion) name for the table. 

206 Subclasses may override to modify the actual name used. If not 

207 provided, a unique name will be generated. 

208 

209 Returns 

210 ------- 

211 table : `sqlalchemy.schema.Table` 

212 SQLAlchemy representation of the table. 

213 

214 Notes 

215 ----- 

216 Temporary tables may be created, dropped, and written to even in 

217 read-only databases - at least according to the Python-level 

218 protections in the `Database` classes. Server permissions may say 

219 otherwise, but in that case they probably need to be modified to 

220 support the full range of expected read-only butler behavior. 

221 

222 Temporary table rows are guaranteed to be dropped when a connection is 

223 closed. `Database` implementations are permitted to allow the table to 

224 remain as long as this is transparent to the user (i.e. "creating" the 

225 temporary table in a new session should not be an error, even if it 

226 does nothing). 

227 

228 It may not be possible to use temporary tables within transactions with 

229 some database engines (or configurations thereof). 

230 """ 

231 if name is None: 

232 name = f"tmp_{uuid.uuid4().hex}" 

233 metadata = self._db._metadata 

234 if metadata is None: 

235 raise RuntimeError("Cannot create temporary table before static schema is defined.") 

236 table = self._db._convertTableSpec( 

237 name, spec, metadata, prefixes=["TEMPORARY"], schema=sqlalchemy.schema.BLANK_SCHEMA 

238 ) 

239 if table.key in self._db._tempTables: 

240 if table.key != name: 

241 raise ValueError( 

242 f"A temporary table with name {name} (transformed to {table.key} by " 

243 f"Database) already exists." 

244 ) 

245 for foreignKeySpec in spec.foreignKeys: 

246 table.append_constraint(self._db._convertForeignKeySpec(name, foreignKeySpec, metadata)) 

247 with self._db._connection() as connection: 

248 table.create(connection) 

249 self._db._tempTables.add(table.key) 

250 return table 

251 

252 def dropTemporaryTable(self, table: sqlalchemy.schema.Table) -> None: 

253 """Drop a temporary table. 

254 

255 Parameters 

256 ---------- 

257 table : `sqlalchemy.schema.Table` 

258 A SQLAlchemy object returned by a previous call to 

259 `makeTemporaryTable`. 

260 """ 

261 if table.key in self._db._tempTables: 

262 with self._db._connection() as connection: 

263 table.drop(connection) 

264 self._db._tempTables.remove(table.key) 

265 else: 

266 raise TypeError(f"Table {table.key} was not created by makeTemporaryTable.") 

267 

268 @contextmanager 

269 def temporary_table( 

270 self, spec: ddl.TableSpec, name: Optional[str] = None 

271 ) -> Iterator[sqlalchemy.schema.Table]: 

272 """Return a context manager that creates and then drops a context 

273 manager. 

274 

275 Parameters 

276 ---------- 

277 spec : `ddl.TableSpec` 

278 Specification for the columns. Unique and foreign key constraints 

279 may be ignored. 

280 name : `str`, optional 

281 If provided, the name of the SQL construct. If not provided, an 

282 opaque but unique identifier is generated. 

283 

284 Returns 

285 ------- 

286 table : `sqlalchemy.schema.Table` 

287 SQLAlchemy representation of the table. 

288 """ 

289 table = self.makeTemporaryTable(spec=spec, name=name) 

290 try: 

291 yield table 

292 finally: 

293 self.dropTemporaryTable(table) 

294 

295 

296class Database(ABC): 

297 """An abstract interface that represents a particular database engine's 

298 representation of a single schema/namespace/database. 

299 

300 Parameters 

301 ---------- 

302 origin : `int` 

303 An integer ID that should be used as the default for any datasets, 

304 quanta, or other entities that use a (autoincrement, origin) compound 

305 primary key. 

306 engine : `sqlalchemy.engine.Engine` 

307 The SQLAlchemy engine for this `Database`. 

308 namespace : `str`, optional 

309 Name of the schema or namespace this instance is associated with. 

310 This is passed as the ``schema`` argument when constructing a 

311 `sqlalchemy.schema.MetaData` instance. We use ``namespace`` instead to 

312 avoid confusion between "schema means namespace" and "schema means 

313 table definitions". 

314 

315 Notes 

316 ----- 

317 `Database` requires all write operations to go through its special named 

318 methods. Our write patterns are sufficiently simple that we don't really 

319 need the full flexibility of SQL insert/update/delete syntax, and we need 

320 non-standard (but common) functionality in these operations sufficiently 

321 often that it seems worthwhile to provide our own generic API. 

322 

323 In contrast, `Database.query` allows arbitrary ``SELECT`` queries (via 

324 their SQLAlchemy representation) to be run, as we expect these to require 

325 significantly more sophistication while still being limited to standard 

326 SQL. 

327 

328 `Database` itself has several underscore-prefixed attributes: 

329 

330 - ``_engine``: SQLAlchemy object representing its engine. 

331 - ``_connection``: method returning a context manager for 

332 `sqlalchemy.engine.Connection` object. 

333 - ``_metadata``: the `sqlalchemy.schema.MetaData` object representing 

334 the tables and other schema entities. 

335 

336 These are considered protected (derived classes may access them, but other 

337 code should not), and read-only, aside from executing SQL via 

338 ``_connection``. 

339 """ 

340 

341 def __init__(self, *, origin: int, engine: sqlalchemy.engine.Engine, namespace: Optional[str] = None): 

342 self.origin = origin 

343 self.namespace = namespace 

344 self._engine = engine 

345 self._session_connection: Optional[sqlalchemy.engine.Connection] = None 

346 self._metadata: Optional[sqlalchemy.schema.MetaData] = None 

347 self._tempTables: Set[str] = set() 

348 

349 def __repr__(self) -> str: 

350 # Rather than try to reproduce all the parameters used to create 

351 # the object, instead report the more useful information of the 

352 # connection URL. 

353 if self._engine.url.password is not None: 

354 uri = str(self._engine.url.set(password="***")) 

355 else: 

356 uri = str(self._engine.url) 

357 if self.namespace: 

358 uri += f"#{self.namespace}" 

359 return f'{type(self).__name__}("{uri}")' 

360 

361 @classmethod 

362 def makeDefaultUri(cls, root: str) -> Optional[str]: 

363 """Create a default connection URI appropriate for the given root 

364 directory, or `None` if there can be no such default. 

365 """ 

366 return None 

367 

368 @classmethod 

369 def fromUri( 

370 cls, uri: str, *, origin: int, namespace: Optional[str] = None, writeable: bool = True 

371 ) -> Database: 

372 """Construct a database from a SQLAlchemy URI. 

373 

374 Parameters 

375 ---------- 

376 uri : `str` 

377 A SQLAlchemy URI connection string. 

378 origin : `int` 

379 An integer ID that should be used as the default for any datasets, 

380 quanta, or other entities that use a (autoincrement, origin) 

381 compound primary key. 

382 namespace : `str`, optional 

383 A database namespace (i.e. schema) the new instance should be 

384 associated with. If `None` (default), the namespace (if any) is 

385 inferred from the URI. 

386 writeable : `bool`, optional 

387 If `True`, allow write operations on the database, including 

388 ``CREATE TABLE``. 

389 

390 Returns 

391 ------- 

392 db : `Database` 

393 A new `Database` instance. 

394 """ 

395 return cls.fromEngine( 

396 cls.makeEngine(uri, writeable=writeable), origin=origin, namespace=namespace, writeable=writeable 

397 ) 

398 

399 @classmethod 

400 @abstractmethod 

401 def makeEngine(cls, uri: str, *, writeable: bool = True) -> sqlalchemy.engine.Engine: 

402 """Create a `sqlalchemy.engine.Engine` from a SQLAlchemy URI. 

403 

404 Parameters 

405 ---------- 

406 uri : `str` 

407 A SQLAlchemy URI connection string. 

408 writeable : `bool`, optional 

409 If `True`, allow write operations on the database, including 

410 ``CREATE TABLE``. 

411 

412 Returns 

413 ------- 

414 engine : `sqlalchemy.engine.Engine` 

415 A database engine. 

416 

417 Notes 

418 ----- 

419 Subclasses that support other ways to connect to a database are 

420 encouraged to add optional arguments to their implementation of this 

421 method, as long as they maintain compatibility with the base class 

422 call signature. 

423 """ 

424 raise NotImplementedError() 

425 

426 @classmethod 

427 @abstractmethod 

428 def fromEngine( 

429 cls, 

430 engine: sqlalchemy.engine.Engine, 

431 *, 

432 origin: int, 

433 namespace: Optional[str] = None, 

434 writeable: bool = True, 

435 ) -> Database: 

436 """Create a new `Database` from an existing `sqlalchemy.engine.Engine`. 

437 

438 Parameters 

439 ---------- 

440 engine : `sqlalchemy.engine.Engine` 

441 The engine for the database. May be shared between `Database` 

442 instances. 

443 origin : `int` 

444 An integer ID that should be used as the default for any datasets, 

445 quanta, or other entities that use a (autoincrement, origin) 

446 compound primary key. 

447 namespace : `str`, optional 

448 A different database namespace (i.e. schema) the new instance 

449 should be associated with. If `None` (default), the namespace 

450 (if any) is inferred from the connection. 

451 writeable : `bool`, optional 

452 If `True`, allow write operations on the database, including 

453 ``CREATE TABLE``. 

454 

455 Returns 

456 ------- 

457 db : `Database` 

458 A new `Database` instance. 

459 

460 Notes 

461 ----- 

462 This method allows different `Database` instances to share the same 

463 engine, which is desirable when they represent different namespaces 

464 can be queried together. 

465 """ 

466 raise NotImplementedError() 

467 

468 @contextmanager 

469 def session(self) -> Iterator: 

470 """Return a context manager that represents a session (persistent 

471 connection to a database). 

472 """ 

473 if self._session_connection is not None: 

474 # session already started, just reuse that 

475 yield Session(self) 

476 else: 

477 try: 

478 # open new connection and close it when done 

479 self._session_connection = self._engine.connect() 

480 yield Session(self) 

481 finally: 

482 if self._session_connection is not None: 

483 self._session_connection.close() 

484 self._session_connection = None 

485 # Temporary tables only live within session 

486 self._tempTables = set() 

487 

488 @contextmanager 

489 def transaction( 

490 self, 

491 *, 

492 interrupting: bool = False, 

493 savepoint: bool = False, 

494 lock: Iterable[sqlalchemy.schema.Table] = (), 

495 ) -> Iterator: 

496 """Return a context manager that represents a transaction. 

497 

498 Parameters 

499 ---------- 

500 interrupting : `bool`, optional 

501 If `True` (`False` is default), this transaction block may not be 

502 nested without an outer one, and attempting to do so is a logic 

503 (i.e. assertion) error. 

504 savepoint : `bool`, optional 

505 If `True` (`False` is default), create a `SAVEPOINT`, allowing 

506 exceptions raised by the database (e.g. due to constraint 

507 violations) during this transaction's context to be caught outside 

508 it without also rolling back all operations in an outer transaction 

509 block. If `False`, transactions may still be nested, but a 

510 rollback may be generated at any level and affects all levels, and 

511 commits are deferred until the outermost block completes. If any 

512 outer transaction block was created with ``savepoint=True``, all 

513 inner blocks will be as well (regardless of the actual value 

514 passed). This has no effect if this is the outermost transaction. 

515 lock : `Iterable` [ `sqlalchemy.schema.Table` ], optional 

516 A list of tables to lock for the duration of this transaction. 

517 These locks are guaranteed to prevent concurrent writes and allow 

518 this transaction (only) to acquire the same locks (others should 

519 block), but only prevent concurrent reads if the database engine 

520 requires that in order to block concurrent writes. 

521 

522 Notes 

523 ----- 

524 All transactions on a connection managed by one or more `Database` 

525 instances _must_ go through this method, or transaction state will not 

526 be correctly managed. 

527 """ 

528 # need a connection, use session to manage it 

529 with self.session(): 

530 assert self._session_connection is not None 

531 connection = self._session_connection 

532 assert not (interrupting and connection.in_transaction()), ( 

533 "Logic error in transaction nesting: an operation that would " 

534 "interrupt the active transaction context has been requested." 

535 ) 

536 # We remember whether we are already in a SAVEPOINT transaction via 

537 # the connection object's 'info' dict, which is explicitly for user 

538 # information like this. This is safer than a regular `Database` 

539 # instance attribute, because it guards against multiple `Database` 

540 # instances sharing the same connection. The need to use our own 

541 # flag here to track whether we're in a nested transaction should 

542 # go away in SQLAlchemy 1.4, which seems to have a 

543 # `Connection.in_nested_transaction()` method. 

544 savepoint = savepoint or connection.info.get(_IN_SAVEPOINT_TRANSACTION, False) 

545 connection.info[_IN_SAVEPOINT_TRANSACTION] = savepoint 

546 trans: sqlalchemy.engine.Transaction 

547 if connection.in_transaction() and savepoint: 

548 trans = connection.begin_nested() 

549 elif not connection.in_transaction(): 

550 # Use a regular (non-savepoint) transaction always for the 

551 # outermost context. 

552 trans = connection.begin() 

553 else: 

554 # Nested non-savepoint transactions, don't do anything. 

555 trans = None 

556 self._lockTables(connection, lock) 

557 try: 

558 yield 

559 if trans is not None: 

560 trans.commit() 

561 except BaseException: 

562 if trans is not None: 

563 trans.rollback() 

564 raise 

565 finally: 

566 if not connection.in_transaction(): 

567 connection.info.pop(_IN_SAVEPOINT_TRANSACTION, None) 

568 

569 @contextmanager 

570 def _connection(self) -> Iterator[sqlalchemy.engine.Connection]: 

571 """Return context manager for Connection.""" 

572 if self._session_connection is not None: 

573 # It means that we are in Session context, but we may not be in 

574 # transaction context. Start a short transaction in that case. 

575 if self._session_connection.in_transaction(): 

576 yield self._session_connection 

577 else: 

578 with self._session_connection.begin(): 

579 yield self._session_connection 

580 else: 

581 # Make new connection and transaction, transaction will be 

582 # committed on context exit. 

583 with self._engine.begin() as connection: 

584 yield connection 

585 

586 @abstractmethod 

587 def _lockTables( 

588 self, connection: sqlalchemy.engine.Connection, tables: Iterable[sqlalchemy.schema.Table] = () 

589 ) -> None: 

590 """Acquire locks on the given tables. 

591 

592 This is an implementation hook for subclasses, called by `transaction`. 

593 It should not be called directly by other code. 

594 

595 Parameters 

596 ---------- 

597 connection : `sqlalchemy.engine.Connection` 

598 Database connection object. It is guaranteed that transaction is 

599 already in a progress for this connection. 

600 tables : `Iterable` [ `sqlalchemy.schema.Table` ], optional 

601 A list of tables to lock for the duration of this transaction. 

602 These locks are guaranteed to prevent concurrent writes and allow 

603 this transaction (only) to acquire the same locks (others should 

604 block), but only prevent concurrent reads if the database engine 

605 requires that in order to block concurrent writes. 

606 """ 

607 raise NotImplementedError() 

608 

609 def isTableWriteable(self, table: sqlalchemy.schema.Table) -> bool: 

610 """Check whether a table is writeable, either because the database 

611 connection is read-write or the table is a temporary table. 

612 

613 Parameters 

614 ---------- 

615 table : `sqlalchemy.schema.Table` 

616 SQLAlchemy table object to check. 

617 

618 Returns 

619 ------- 

620 writeable : `bool` 

621 Whether this table is writeable. 

622 """ 

623 return self.isWriteable() or table.key in self._tempTables 

624 

625 def assertTableWriteable(self, table: sqlalchemy.schema.Table, msg: str) -> None: 

626 """Raise if the given table is not writeable, either because the 

627 database connection is read-write or the table is a temporary table. 

628 

629 Parameters 

630 ---------- 

631 table : `sqlalchemy.schema.Table` 

632 SQLAlchemy table object to check. 

633 msg : `str`, optional 

634 If provided, raise `ReadOnlyDatabaseError` instead of returning 

635 `False`, with this message. 

636 """ 

637 if not self.isTableWriteable(table): 

638 raise ReadOnlyDatabaseError(msg) 

639 

640 @contextmanager 

641 def declareStaticTables(self, *, create: bool) -> Iterator[StaticTablesContext]: 

642 """Return a context manager in which the database's static DDL schema 

643 can be declared. 

644 

645 Parameters 

646 ---------- 

647 create : `bool` 

648 If `True`, attempt to create all tables at the end of the context. 

649 If `False`, they will be assumed to already exist. 

650 

651 Returns 

652 ------- 

653 schema : `StaticTablesContext` 

654 A helper object that is used to add new tables. 

655 

656 Raises 

657 ------ 

658 ReadOnlyDatabaseError 

659 Raised if ``create`` is `True`, `Database.isWriteable` is `False`, 

660 and one or more declared tables do not already exist. 

661 

662 Examples 

663 -------- 

664 Given a `Database` instance ``db``:: 

665 

666 with db.declareStaticTables(create=True) as schema: 

667 schema.addTable("table1", TableSpec(...)) 

668 schema.addTable("table2", TableSpec(...)) 

669 

670 Notes 

671 ----- 

672 A database's static DDL schema must be declared before any dynamic 

673 tables are managed via calls to `ensureTableExists` or 

674 `getExistingTable`. The order in which static schema tables are added 

675 inside the context block is unimportant; they will automatically be 

676 sorted and added in an order consistent with their foreign key 

677 relationships. 

678 """ 

679 if create and not self.isWriteable(): 

680 raise ReadOnlyDatabaseError(f"Cannot create tables in read-only database {self}.") 

681 self._metadata = sqlalchemy.MetaData(schema=self.namespace) 

682 try: 

683 context = StaticTablesContext(self) 

684 if create and context._tableNames: 

685 # Looks like database is already initalized, to avoid danger 

686 # of modifying/destroying valid schema we refuse to do 

687 # anything in this case 

688 raise SchemaAlreadyDefinedError(f"Cannot create tables in non-empty database {self}.") 

689 yield context 

690 for table, foreignKey in context._foreignKeys: 

691 table.append_constraint(foreignKey) 

692 if create: 

693 if self.namespace is not None: 

694 if self.namespace not in context._inspector.get_schema_names(): 

695 with self._connection() as connection: 

696 connection.execute(sqlalchemy.schema.CreateSchema(self.namespace)) 

697 # In our tables we have columns that make use of sqlalchemy 

698 # Sequence objects. There is currently a bug in sqlalchemy that 

699 # causes a deprecation warning to be thrown on a property of 

700 # the Sequence object when the repr for the sequence is 

701 # created. Here a filter is used to catch these deprecation 

702 # warnings when tables are created. 

703 with warnings.catch_warnings(): 

704 warnings.simplefilter("ignore", category=sqlalchemy.exc.SADeprecationWarning) 

705 self._metadata.create_all(self._engine) 

706 # call all initializer methods sequentially 

707 for init in context._initializers: 

708 init(self) 

709 except BaseException: 

710 self._metadata = None 

711 raise 

712 

713 @abstractmethod 

714 def isWriteable(self) -> bool: 

715 """Return `True` if this database can be modified by this client.""" 

716 raise NotImplementedError() 

717 

718 @abstractmethod 

719 def __str__(self) -> str: 

720 """Return a human-readable identifier for this `Database`, including 

721 any namespace or schema that identifies its names within a `Registry`. 

722 """ 

723 raise NotImplementedError() 

724 

725 @property 

726 def dialect(self) -> sqlalchemy.engine.Dialect: 

727 """The SQLAlchemy dialect for this database engine 

728 (`sqlalchemy.engine.Dialect`). 

729 """ 

730 return self._engine.dialect 

731 

732 def shrinkDatabaseEntityName(self, original: str) -> str: 

733 """Return a version of the given name that fits within this database 

734 engine's length limits for table, constraint, indexes, and sequence 

735 names. 

736 

737 Implementations should not assume that simple truncation is safe, 

738 because multiple long names often begin with the same prefix. 

739 

740 The default implementation simply returns the given name. 

741 

742 Parameters 

743 ---------- 

744 original : `str` 

745 The original name. 

746 

747 Returns 

748 ------- 

749 shrunk : `str` 

750 The new, possibly shortened name. 

751 """ 

752 return original 

753 

754 def expandDatabaseEntityName(self, shrunk: str) -> str: 

755 """Retrieve the original name for a database entity that was too long 

756 to fit within the database engine's limits. 

757 

758 Parameters 

759 ---------- 

760 original : `str` 

761 The original name. 

762 

763 Returns 

764 ------- 

765 shrunk : `str` 

766 The new, possibly shortened name. 

767 """ 

768 return shrunk 

769 

770 def _mangleTableName(self, name: str) -> str: 

771 """Map a logical, user-visible table name to the true table name used 

772 in the database. 

773 

774 The default implementation returns the given name unchanged. 

775 

776 Parameters 

777 ---------- 

778 name : `str` 

779 Input table name. Should not include a namespace (i.e. schema) 

780 prefix. 

781 

782 Returns 

783 ------- 

784 mangled : `str` 

785 Mangled version of the table name (still with no namespace prefix). 

786 

787 Notes 

788 ----- 

789 Reimplementations of this method must be idempotent - mangling an 

790 already-mangled name must have no effect. 

791 """ 

792 return name 

793 

794 def _makeColumnConstraints(self, table: str, spec: ddl.FieldSpec) -> List[sqlalchemy.CheckConstraint]: 

795 """Create constraints based on this spec. 

796 

797 Parameters 

798 ---------- 

799 table : `str` 

800 Name of the table this column is being added to. 

801 spec : `FieldSpec` 

802 Specification for the field to be added. 

803 

804 Returns 

805 ------- 

806 constraint : `list` of `sqlalchemy.CheckConstraint` 

807 Constraint added for this column. 

808 """ 

809 # By default we return no additional constraints 

810 return [] 

811 

812 def _convertFieldSpec( 

813 self, table: str, spec: ddl.FieldSpec, metadata: sqlalchemy.MetaData, **kwargs: Any 

814 ) -> sqlalchemy.schema.Column: 

815 """Convert a `FieldSpec` to a `sqlalchemy.schema.Column`. 

816 

817 Parameters 

818 ---------- 

819 table : `str` 

820 Name of the table this column is being added to. 

821 spec : `FieldSpec` 

822 Specification for the field to be added. 

823 metadata : `sqlalchemy.MetaData` 

824 SQLAlchemy representation of the DDL schema this field's table is 

825 being added to. 

826 **kwargs 

827 Additional keyword arguments to forward to the 

828 `sqlalchemy.schema.Column` constructor. This is provided to make 

829 it easier for derived classes to delegate to ``super()`` while 

830 making only minor changes. 

831 

832 Returns 

833 ------- 

834 column : `sqlalchemy.schema.Column` 

835 SQLAlchemy representation of the field. 

836 """ 

837 args = [spec.name, spec.getSizedColumnType()] 

838 if spec.autoincrement: 

839 # Generate a sequence to use for auto incrementing for databases 

840 # that do not support it natively. This will be ignored by 

841 # sqlalchemy for databases that do support it. 

842 args.append( 

843 sqlalchemy.Sequence( 

844 self.shrinkDatabaseEntityName(f"{table}_seq_{spec.name}"), metadata=metadata 

845 ) 

846 ) 

847 assert spec.doc is None or isinstance(spec.doc, str), f"Bad doc for {table}.{spec.name}." 

848 return sqlalchemy.schema.Column( 

849 *args, 

850 nullable=spec.nullable, 

851 primary_key=spec.primaryKey, 

852 comment=spec.doc, 

853 server_default=spec.default, 

854 **kwargs, 

855 ) 

856 

857 def _convertForeignKeySpec( 

858 self, table: str, spec: ddl.ForeignKeySpec, metadata: sqlalchemy.MetaData, **kwargs: Any 

859 ) -> sqlalchemy.schema.ForeignKeyConstraint: 

860 """Convert a `ForeignKeySpec` to a 

861 `sqlalchemy.schema.ForeignKeyConstraint`. 

862 

863 Parameters 

864 ---------- 

865 table : `str` 

866 Name of the table this foreign key is being added to. 

867 spec : `ForeignKeySpec` 

868 Specification for the foreign key to be added. 

869 metadata : `sqlalchemy.MetaData` 

870 SQLAlchemy representation of the DDL schema this constraint is 

871 being added to. 

872 **kwargs 

873 Additional keyword arguments to forward to the 

874 `sqlalchemy.schema.ForeignKeyConstraint` constructor. This is 

875 provided to make it easier for derived classes to delegate to 

876 ``super()`` while making only minor changes. 

877 

878 Returns 

879 ------- 

880 constraint : `sqlalchemy.schema.ForeignKeyConstraint` 

881 SQLAlchemy representation of the constraint. 

882 """ 

883 name = self.shrinkDatabaseEntityName( 

884 "_".join( 

885 ["fkey", table, self._mangleTableName(spec.table)] + list(spec.target) + list(spec.source) 

886 ) 

887 ) 

888 return sqlalchemy.schema.ForeignKeyConstraint( 

889 spec.source, 

890 [f"{self._mangleTableName(spec.table)}.{col}" for col in spec.target], 

891 name=name, 

892 ondelete=spec.onDelete, 

893 ) 

894 

895 def _convertExclusionConstraintSpec( 

896 self, 

897 table: str, 

898 spec: Tuple[Union[str, Type[TimespanDatabaseRepresentation]], ...], 

899 metadata: sqlalchemy.MetaData, 

900 ) -> sqlalchemy.schema.Constraint: 

901 """Convert a `tuple` from `ddl.TableSpec.exclusion` into a SQLAlchemy 

902 constraint representation. 

903 

904 Parameters 

905 ---------- 

906 table : `str` 

907 Name of the table this constraint is being added to. 

908 spec : `tuple` [ `str` or `type` ] 

909 A tuple of `str` column names and the `type` object returned by 

910 `getTimespanRepresentation` (which must appear exactly once), 

911 indicating the order of the columns in the index used to back the 

912 constraint. 

913 metadata : `sqlalchemy.MetaData` 

914 SQLAlchemy representation of the DDL schema this constraint is 

915 being added to. 

916 

917 Returns 

918 ------- 

919 constraint : `sqlalchemy.schema.Constraint` 

920 SQLAlchemy representation of the constraint. 

921 

922 Raises 

923 ------ 

924 NotImplementedError 

925 Raised if this database does not support exclusion constraints. 

926 """ 

927 raise NotImplementedError(f"Database {self} does not support exclusion constraints.") 

928 

929 def _convertTableSpec( 

930 self, name: str, spec: ddl.TableSpec, metadata: sqlalchemy.MetaData, **kwargs: Any 

931 ) -> sqlalchemy.schema.Table: 

932 """Convert a `TableSpec` to a `sqlalchemy.schema.Table`. 

933 

934 Parameters 

935 ---------- 

936 spec : `TableSpec` 

937 Specification for the foreign key to be added. 

938 metadata : `sqlalchemy.MetaData` 

939 SQLAlchemy representation of the DDL schema this table is being 

940 added to. 

941 **kwargs 

942 Additional keyword arguments to forward to the 

943 `sqlalchemy.schema.Table` constructor. This is provided to make it 

944 easier for derived classes to delegate to ``super()`` while making 

945 only minor changes. 

946 

947 Returns 

948 ------- 

949 table : `sqlalchemy.schema.Table` 

950 SQLAlchemy representation of the table. 

951 

952 Notes 

953 ----- 

954 This method does not handle ``spec.foreignKeys`` at all, in order to 

955 avoid circular dependencies. These are added by higher-level logic in 

956 `ensureTableExists`, `getExistingTable`, and `declareStaticTables`. 

957 """ 

958 name = self._mangleTableName(name) 

959 args = [self._convertFieldSpec(name, fieldSpec, metadata) for fieldSpec in spec.fields] 

960 

961 # Add any column constraints 

962 for fieldSpec in spec.fields: 

963 args.extend(self._makeColumnConstraints(name, fieldSpec)) 

964 

965 # Track indexes added for primary key and unique constraints, to make 

966 # sure we don't add duplicate explicit or foreign key indexes for 

967 # those. 

968 allIndexes = {tuple(fieldSpec.name for fieldSpec in spec.fields if fieldSpec.primaryKey)} 

969 args.extend( 

970 sqlalchemy.schema.UniqueConstraint( 

971 *columns, name=self.shrinkDatabaseEntityName("_".join([name, "unq"] + list(columns))) 

972 ) 

973 for columns in spec.unique 

974 ) 

975 allIndexes.update(spec.unique) 

976 args.extend( 

977 sqlalchemy.schema.Index( 

978 self.shrinkDatabaseEntityName("_".join([name, "idx"] + list(index.columns))), 

979 *index.columns, 

980 unique=(index.columns in spec.unique), 

981 **index.kwargs, 

982 ) 

983 for index in spec.indexes 

984 if index.columns not in allIndexes 

985 ) 

986 allIndexes.update(index.columns for index in spec.indexes) 

987 args.extend( 

988 sqlalchemy.schema.Index( 

989 self.shrinkDatabaseEntityName("_".join((name, "fkidx") + fk.source)), 

990 *fk.source, 

991 ) 

992 for fk in spec.foreignKeys 

993 if fk.addIndex and fk.source not in allIndexes 

994 ) 

995 

996 args.extend(self._convertExclusionConstraintSpec(name, excl, metadata) for excl in spec.exclusion) 

997 

998 assert spec.doc is None or isinstance(spec.doc, str), f"Bad doc for {name}." 

999 return sqlalchemy.schema.Table(name, metadata, *args, comment=spec.doc, info=spec, **kwargs) 

1000 

1001 def ensureTableExists(self, name: str, spec: ddl.TableSpec) -> sqlalchemy.schema.Table: 

1002 """Ensure that a table with the given name and specification exists, 

1003 creating it if necessary. 

1004 

1005 Parameters 

1006 ---------- 

1007 name : `str` 

1008 Name of the table (not including namespace qualifiers). 

1009 spec : `TableSpec` 

1010 Specification for the table. This will be used when creating the 

1011 table, and *may* be used when obtaining an existing table to check 

1012 for consistency, but no such check is guaranteed. 

1013 

1014 Returns 

1015 ------- 

1016 table : `sqlalchemy.schema.Table` 

1017 SQLAlchemy representation of the table. 

1018 

1019 Raises 

1020 ------ 

1021 ReadOnlyDatabaseError 

1022 Raised if `isWriteable` returns `False`, and the table does not 

1023 already exist. 

1024 DatabaseConflictError 

1025 Raised if the table exists but ``spec`` is inconsistent with its 

1026 definition. 

1027 

1028 Notes 

1029 ----- 

1030 This method may not be called within transactions. It may be called on 

1031 read-only databases if and only if the table does in fact already 

1032 exist. 

1033 

1034 Subclasses may override this method, but usually should not need to. 

1035 """ 

1036 # TODO: if _engine is used to make a table then it uses separate 

1037 # connection and should not interfere with current transaction 

1038 assert ( 

1039 self._session_connection is None or not self._session_connection.in_transaction() 

1040 ), "Table creation interrupts transactions." 

1041 assert self._metadata is not None, "Static tables must be declared before dynamic tables." 

1042 table = self.getExistingTable(name, spec) 

1043 if table is not None: 

1044 return table 

1045 if not self.isWriteable(): 

1046 raise ReadOnlyDatabaseError( 

1047 f"Table {name} does not exist, and cannot be created " 

1048 f"because database {self} is read-only." 

1049 ) 

1050 table = self._convertTableSpec(name, spec, self._metadata) 

1051 for foreignKeySpec in spec.foreignKeys: 

1052 table.append_constraint(self._convertForeignKeySpec(name, foreignKeySpec, self._metadata)) 

1053 try: 

1054 with self._connection() as connection: 

1055 table.create(connection) 

1056 except sqlalchemy.exc.DatabaseError: 

1057 # Some other process could have created the table meanwhile, which 

1058 # usually causes OperationalError or ProgrammingError. We cannot 

1059 # use IF NOT EXISTS clause in this case due to PostgreSQL race 

1060 # condition on server side which causes IntegrityError. Instead we 

1061 # catch these exceptions (they all inherit DatabaseError) and 

1062 # re-check whether table is now there. 

1063 table = self.getExistingTable(name, spec) 

1064 if table is None: 

1065 raise 

1066 return table 

1067 

1068 def getExistingTable(self, name: str, spec: ddl.TableSpec) -> Optional[sqlalchemy.schema.Table]: 

1069 """Obtain an existing table with the given name and specification. 

1070 

1071 Parameters 

1072 ---------- 

1073 name : `str` 

1074 Name of the table (not including namespace qualifiers). 

1075 spec : `TableSpec` 

1076 Specification for the table. This will be used when creating the 

1077 SQLAlchemy representation of the table, and it is used to 

1078 check that the actual table in the database is consistent. 

1079 

1080 Returns 

1081 ------- 

1082 table : `sqlalchemy.schema.Table` or `None` 

1083 SQLAlchemy representation of the table, or `None` if it does not 

1084 exist. 

1085 

1086 Raises 

1087 ------ 

1088 DatabaseConflictError 

1089 Raised if the table exists but ``spec`` is inconsistent with its 

1090 definition. 

1091 

1092 Notes 

1093 ----- 

1094 This method can be called within transactions and never modifies the 

1095 database. 

1096 

1097 Subclasses may override this method, but usually should not need to. 

1098 """ 

1099 assert self._metadata is not None, "Static tables must be declared before dynamic tables." 

1100 name = self._mangleTableName(name) 

1101 table = self._metadata.tables.get(name if self.namespace is None else f"{self.namespace}.{name}") 

1102 if table is not None: 

1103 if spec.fields.names != set(table.columns.keys()): 

1104 raise DatabaseConflictError( 

1105 f"Table '{name}' has already been defined differently; the new " 

1106 f"specification has columns {list(spec.fields.names)}, while " 

1107 f"the previous definition has {list(table.columns.keys())}." 

1108 ) 

1109 else: 

1110 inspector = sqlalchemy.inspect(self._engine) 

1111 if name in inspector.get_table_names(schema=self.namespace): 

1112 _checkExistingTableDefinition(name, spec, inspector.get_columns(name, schema=self.namespace)) 

1113 table = self._convertTableSpec(name, spec, self._metadata) 

1114 for foreignKeySpec in spec.foreignKeys: 

1115 table.append_constraint(self._convertForeignKeySpec(name, foreignKeySpec, self._metadata)) 

1116 return table 

1117 return table 

1118 

1119 @classmethod 

1120 def getTimespanRepresentation(cls) -> Type[TimespanDatabaseRepresentation]: 

1121 """Return a `type` that encapsulates the way `Timespan` objects are 

1122 stored in this database. 

1123 

1124 `Database` does not automatically use the return type of this method 

1125 anywhere else; calling code is responsible for making sure that DDL 

1126 and queries are consistent with it. 

1127 

1128 Returns 

1129 ------- 

1130 TimespanReprClass : `type` (`TimespanDatabaseRepresention` subclass) 

1131 A type that encapsulates the way `Timespan` objects should be 

1132 stored in this database. 

1133 

1134 Notes 

1135 ----- 

1136 There are two big reasons we've decided to keep timespan-mangling logic 

1137 outside the `Database` implementations, even though the choice of 

1138 representation is ultimately up to a `Database` implementation: 

1139 

1140 - Timespans appear in relatively few tables and queries in our 

1141 typical usage, and the code that operates on them is already aware 

1142 that it is working with timespans. In contrast, a 

1143 timespan-representation-aware implementation of, say, `insert`, 

1144 would need to have extra logic to identify when timespan-mangling 

1145 needed to occur, which would usually be useless overhead. 

1146 

1147 - SQLAlchemy's rich SELECT query expression system has no way to wrap 

1148 multiple columns in a single expression object (the ORM does, but 

1149 we are not using the ORM). So we would have to wrap _much_ more of 

1150 that code in our own interfaces to encapsulate timespan 

1151 representations there. 

1152 """ 

1153 return TimespanDatabaseRepresentation.Compound 

1154 

1155 def sync( 

1156 self, 

1157 table: sqlalchemy.schema.Table, 

1158 *, 

1159 keys: Dict[str, Any], 

1160 compared: Optional[Dict[str, Any]] = None, 

1161 extra: Optional[Dict[str, Any]] = None, 

1162 returning: Optional[Sequence[str]] = None, 

1163 update: bool = False, 

1164 ) -> Tuple[Optional[Dict[str, Any]], Union[bool, Dict[str, Any]]]: 

1165 """Insert into a table as necessary to ensure database contains 

1166 values equivalent to the given ones. 

1167 

1168 Parameters 

1169 ---------- 

1170 table : `sqlalchemy.schema.Table` 

1171 Table to be queried and possibly inserted into. 

1172 keys : `dict` 

1173 Column name-value pairs used to search for an existing row; must 

1174 be a combination that can be used to select a single row if one 

1175 exists. If such a row does not exist, these values are used in 

1176 the insert. 

1177 compared : `dict`, optional 

1178 Column name-value pairs that are compared to those in any existing 

1179 row. If such a row does not exist, these rows are used in the 

1180 insert. 

1181 extra : `dict`, optional 

1182 Column name-value pairs that are ignored if a matching row exists, 

1183 but used in an insert if one is necessary. 

1184 returning : `~collections.abc.Sequence` of `str`, optional 

1185 The names of columns whose values should be returned. 

1186 update : `bool`, optional 

1187 If `True` (`False` is default), update the existing row with the 

1188 values in ``compared`` instead of raising `DatabaseConflictError`. 

1189 

1190 Returns 

1191 ------- 

1192 row : `dict`, optional 

1193 The value of the fields indicated by ``returning``, or `None` if 

1194 ``returning`` is `None`. 

1195 inserted_or_updated : `bool` or `dict` 

1196 If `True`, a new row was inserted; if `False`, a matching row 

1197 already existed. If a `dict` (only possible if ``update=True``), 

1198 then an existing row was updated, and the dict maps the names of 

1199 the updated columns to their *old* values (new values can be 

1200 obtained from ``compared``). 

1201 

1202 Raises 

1203 ------ 

1204 DatabaseConflictError 

1205 Raised if the values in ``compared`` do not match the values in the 

1206 database. 

1207 ReadOnlyDatabaseError 

1208 Raised if `isWriteable` returns `False`, and no matching record 

1209 already exists. 

1210 

1211 Notes 

1212 ----- 

1213 May be used inside transaction contexts, so implementations may not 

1214 perform operations that interrupt transactions. 

1215 

1216 It may be called on read-only databases if and only if the matching row 

1217 does in fact already exist. 

1218 """ 

1219 

1220 def check() -> Tuple[int, Optional[Dict[str, Any]], Optional[List]]: 

1221 """Query for a row that matches the ``key`` argument, and compare 

1222 to what was given by the caller. 

1223 

1224 Returns 

1225 ------- 

1226 n : `int` 

1227 Number of matching rows. ``n != 1`` is always an error, but 

1228 it's a different kind of error depending on where `check` is 

1229 being called. 

1230 bad : `dict` or `None` 

1231 The subset of the keys of ``compared`` for which the existing 

1232 values did not match the given one, mapped to the existing 

1233 values in the database. Once again, ``not bad`` is always an 

1234 error, but a different kind on context. `None` if ``n != 1`` 

1235 result : `list` or `None` 

1236 Results in the database that correspond to the columns given 

1237 in ``returning``, or `None` if ``returning is None``. 

1238 """ 

1239 toSelect: Set[str] = set() 

1240 if compared is not None: 

1241 toSelect.update(compared.keys()) 

1242 if returning is not None: 

1243 toSelect.update(returning) 

1244 if not toSelect: 

1245 # Need to select some column, even if we just want to see 

1246 # how many rows we get back. 

1247 toSelect.add(next(iter(keys.keys()))) 

1248 selectSql = ( 

1249 sqlalchemy.sql.select(*[table.columns[k].label(k) for k in toSelect]) 

1250 .select_from(table) 

1251 .where(sqlalchemy.sql.and_(*[table.columns[k] == v for k, v in keys.items()])) 

1252 ) 

1253 with self._connection() as connection: 

1254 fetched = list(connection.execute(selectSql).mappings()) 

1255 if len(fetched) != 1: 

1256 return len(fetched), None, None 

1257 existing = fetched[0] 

1258 if compared is not None: 

1259 

1260 def safeNotEqual(a: Any, b: Any) -> bool: 

1261 if isinstance(a, astropy.time.Time): 

1262 return not time_utils.TimeConverter().times_equal(a, b) 

1263 return a != b 

1264 

1265 inconsistencies = { 

1266 k: existing[k] for k, v in compared.items() if safeNotEqual(existing[k], v) 

1267 } 

1268 else: 

1269 inconsistencies = {} 

1270 if returning is not None: 

1271 toReturn: Optional[list] = [existing[k] for k in returning] 

1272 else: 

1273 toReturn = None 

1274 return 1, inconsistencies, toReturn 

1275 

1276 def format_bad(inconsistencies: Dict[str, Any]) -> str: 

1277 """Format the 'bad' dictionary of existing values returned by 

1278 ``check`` into a string suitable for an error message. 

1279 """ 

1280 assert compared is not None, "Should not be able to get inconsistencies without comparing." 

1281 return ", ".join(f"{k}: {v!r} != {compared[k]!r}" for k, v in inconsistencies.items()) 

1282 

1283 if self.isTableWriteable(table): 

1284 # Try an insert first, but allow it to fail (in only specific 

1285 # ways). 

1286 row = keys.copy() 

1287 if compared is not None: 

1288 row.update(compared) 

1289 if extra is not None: 

1290 row.update(extra) 

1291 with self.transaction(): 

1292 inserted = bool(self.ensure(table, row)) 

1293 inserted_or_updated: Union[bool, Dict[str, Any]] 

1294 # Need to perform check() for this branch inside the 

1295 # transaction, so we roll back an insert that didn't do 

1296 # what we expected. That limits the extent to which we 

1297 # can reduce duplication between this block and the other 

1298 # ones that perform similar logic. 

1299 n, bad, result = check() 

1300 if n < 1: 

1301 raise ConflictingDefinitionError( 

1302 f"Attempted to ensure {row} exists by inserting it with ON CONFLICT IGNORE, " 

1303 f"but a post-insert query on {keys} returned no results. " 

1304 f"Insert was {'' if inserted else 'not '}reported as successful. " 

1305 "This can occur if the insert violated a database constraint other than the " 

1306 "unique constraint or primary key used to identify the row in this call." 

1307 ) 

1308 elif n > 1: 

1309 raise RuntimeError( 

1310 f"Keys passed to sync {keys.keys()} do not comprise a " 

1311 f"unique constraint for table {table.name}." 

1312 ) 

1313 elif bad: 

1314 assert ( 

1315 compared is not None 

1316 ), "Should not be able to get inconsistencies without comparing." 

1317 if inserted: 

1318 raise RuntimeError( 

1319 f"Conflict ({bad}) in sync after successful insert; this is " 

1320 "possible if the same table is being updated by a concurrent " 

1321 "process that isn't using sync, but it may also be a bug in " 

1322 "daf_butler." 

1323 ) 

1324 elif update: 

1325 with self._connection() as connection: 

1326 connection.execute( 

1327 table.update() 

1328 .where(sqlalchemy.sql.and_(*[table.columns[k] == v for k, v in keys.items()])) 

1329 .values(**{k: compared[k] for k in bad.keys()}) 

1330 ) 

1331 inserted_or_updated = bad 

1332 else: 

1333 raise DatabaseConflictError( 

1334 f"Conflict in sync for table {table.name} on column(s) {format_bad(bad)}." 

1335 ) 

1336 else: 

1337 inserted_or_updated = inserted 

1338 else: 

1339 # Database is not writeable; just see if the row exists. 

1340 n, bad, result = check() 

1341 if n < 1: 

1342 raise ReadOnlyDatabaseError("sync needs to insert, but database is read-only.") 

1343 elif n > 1: 

1344 raise RuntimeError("Keys passed to sync do not comprise a unique constraint.") 

1345 elif bad: 

1346 if update: 

1347 raise ReadOnlyDatabaseError("sync needs to update, but database is read-only.") 

1348 else: 

1349 raise DatabaseConflictError( 

1350 f"Conflict in sync for table {table.name} on column(s) {format_bad(bad)}." 

1351 ) 

1352 inserted_or_updated = False 

1353 if returning is None: 

1354 return None, inserted_or_updated 

1355 else: 

1356 assert result is not None 

1357 return {k: v for k, v in zip(returning, result)}, inserted_or_updated 

1358 

1359 def insert( 

1360 self, 

1361 table: sqlalchemy.schema.Table, 

1362 *rows: dict, 

1363 returnIds: bool = False, 

1364 select: Optional[sqlalchemy.sql.expression.SelectBase] = None, 

1365 names: Optional[Iterable[str]] = None, 

1366 ) -> Optional[List[int]]: 

1367 """Insert one or more rows into a table, optionally returning 

1368 autoincrement primary key values. 

1369 

1370 Parameters 

1371 ---------- 

1372 table : `sqlalchemy.schema.Table` 

1373 Table rows should be inserted into. 

1374 returnIds: `bool` 

1375 If `True` (`False` is default), return the values of the table's 

1376 autoincrement primary key field (which much exist). 

1377 select : `sqlalchemy.sql.SelectBase`, optional 

1378 A SELECT query expression to insert rows from. Cannot be provided 

1379 with either ``rows`` or ``returnIds=True``. 

1380 names : `Iterable` [ `str` ], optional 

1381 Names of columns in ``table`` to be populated, ordered to match the 

1382 columns returned by ``select``. Ignored if ``select`` is `None`. 

1383 If not provided, the columns returned by ``select`` must be named 

1384 to match the desired columns of ``table``. 

1385 *rows 

1386 Positional arguments are the rows to be inserted, as dictionaries 

1387 mapping column name to value. The keys in all dictionaries must 

1388 be the same. 

1389 

1390 Returns 

1391 ------- 

1392 ids : `None`, or `list` of `int` 

1393 If ``returnIds`` is `True`, a `list` containing the inserted 

1394 values for the table's autoincrement primary key. 

1395 

1396 Raises 

1397 ------ 

1398 ReadOnlyDatabaseError 

1399 Raised if `isWriteable` returns `False` when this method is called. 

1400 

1401 Notes 

1402 ----- 

1403 The default implementation uses bulk insert syntax when ``returnIds`` 

1404 is `False`, and a loop over single-row insert operations when it is 

1405 `True`. 

1406 

1407 Derived classes should reimplement when they can provide a more 

1408 efficient implementation (especially for the latter case). 

1409 

1410 May be used inside transaction contexts, so implementations may not 

1411 perform operations that interrupt transactions. 

1412 """ 

1413 self.assertTableWriteable(table, f"Cannot insert into read-only table {table}.") 

1414 if select is not None and (rows or returnIds): 

1415 raise TypeError("'select' is incompatible with passing value rows or returnIds=True.") 

1416 if not rows and select is None: 

1417 if returnIds: 

1418 return [] 

1419 else: 

1420 return None 

1421 with self._connection() as connection: 

1422 if not returnIds: 

1423 if select is not None: 

1424 if names is None: 

1425 # columns() is deprecated since 1.4, but 

1426 # selected_columns() method did not exist in 1.3. 

1427 if hasattr(select, "selected_columns"): 

1428 names = select.selected_columns.keys() 

1429 else: 

1430 names = select.columns.keys() 

1431 connection.execute(table.insert().from_select(names, select)) 

1432 else: 

1433 connection.execute(table.insert(), rows) 

1434 return None 

1435 else: 

1436 sql = table.insert() 

1437 return [connection.execute(sql, row).inserted_primary_key[0] for row in rows] 

1438 

1439 @abstractmethod 

1440 def replace(self, table: sqlalchemy.schema.Table, *rows: dict) -> None: 

1441 """Insert one or more rows into a table, replacing any existing rows 

1442 for which insertion of a new row would violate the primary key 

1443 constraint. 

1444 

1445 Parameters 

1446 ---------- 

1447 table : `sqlalchemy.schema.Table` 

1448 Table rows should be inserted into. 

1449 *rows 

1450 Positional arguments are the rows to be inserted, as dictionaries 

1451 mapping column name to value. The keys in all dictionaries must 

1452 be the same. 

1453 

1454 Raises 

1455 ------ 

1456 ReadOnlyDatabaseError 

1457 Raised if `isWriteable` returns `False` when this method is called. 

1458 

1459 Notes 

1460 ----- 

1461 May be used inside transaction contexts, so implementations may not 

1462 perform operations that interrupt transactions. 

1463 

1464 Implementations should raise a `sqlalchemy.exc.IntegrityError` 

1465 exception when a constraint other than the primary key would be 

1466 violated. 

1467 

1468 Implementations are not required to support `replace` on tables 

1469 with autoincrement keys. 

1470 """ 

1471 raise NotImplementedError() 

1472 

1473 @abstractmethod 

1474 def ensure(self, table: sqlalchemy.schema.Table, *rows: dict, primary_key_only: bool = False) -> int: 

1475 """Insert one or more rows into a table, skipping any rows for which 

1476 insertion would violate a unique constraint. 

1477 

1478 Parameters 

1479 ---------- 

1480 table : `sqlalchemy.schema.Table` 

1481 Table rows should be inserted into. 

1482 *rows 

1483 Positional arguments are the rows to be inserted, as dictionaries 

1484 mapping column name to value. The keys in all dictionaries must 

1485 be the same. 

1486 primary_key_only : `bool`, optional 

1487 If `True` (`False` is default), only skip rows that violate the 

1488 primary key constraint, and raise an exception (and rollback 

1489 transactions) for other constraint violations. 

1490 

1491 Returns 

1492 ------- 

1493 count : `int` 

1494 The number of rows actually inserted. 

1495 

1496 Raises 

1497 ------ 

1498 ReadOnlyDatabaseError 

1499 Raised if `isWriteable` returns `False` when this method is called. 

1500 This is raised even if the operation would do nothing even on a 

1501 writeable database. 

1502 

1503 Notes 

1504 ----- 

1505 May be used inside transaction contexts, so implementations may not 

1506 perform operations that interrupt transactions. 

1507 

1508 Implementations are not required to support `ensure` on tables 

1509 with autoincrement keys. 

1510 """ 

1511 raise NotImplementedError() 

1512 

1513 def delete(self, table: sqlalchemy.schema.Table, columns: Iterable[str], *rows: dict) -> int: 

1514 """Delete one or more rows from a table. 

1515 

1516 Parameters 

1517 ---------- 

1518 table : `sqlalchemy.schema.Table` 

1519 Table that rows should be deleted from. 

1520 columns: `~collections.abc.Iterable` of `str` 

1521 The names of columns that will be used to constrain the rows to 

1522 be deleted; these will be combined via ``AND`` to form the 

1523 ``WHERE`` clause of the delete query. 

1524 *rows 

1525 Positional arguments are the keys of rows to be deleted, as 

1526 dictionaries mapping column name to value. The keys in all 

1527 dictionaries must be exactly the names in ``columns``. 

1528 

1529 Returns 

1530 ------- 

1531 count : `int` 

1532 Number of rows deleted. 

1533 

1534 Raises 

1535 ------ 

1536 ReadOnlyDatabaseError 

1537 Raised if `isWriteable` returns `False` when this method is called. 

1538 

1539 Notes 

1540 ----- 

1541 May be used inside transaction contexts, so implementations may not 

1542 perform operations that interrupt transactions. 

1543 

1544 The default implementation should be sufficient for most derived 

1545 classes. 

1546 """ 

1547 self.assertTableWriteable(table, f"Cannot delete from read-only table {table}.") 

1548 if columns and not rows: 

1549 # If there are no columns, this operation is supposed to delete 

1550 # everything (so we proceed as usual). But if there are columns, 

1551 # but no rows, it was a constrained bulk operation where the 

1552 # constraint is that no rows match, and we should short-circuit 

1553 # while reporting that no rows were affected. 

1554 return 0 

1555 sql = table.delete() 

1556 columns = list(columns) # Force iterators to list 

1557 

1558 # More efficient to use IN operator if there is only one 

1559 # variable changing across all rows. 

1560 content: Dict[str, Set] = defaultdict(set) 

1561 if len(columns) == 1: 

1562 # Nothing to calculate since we can always use IN 

1563 column = columns[0] 

1564 changing_columns = [column] 

1565 content[column] = set(row[column] for row in rows) 

1566 else: 

1567 for row in rows: 

1568 for k, v in row.items(): 

1569 content[k].add(v) 

1570 changing_columns = [col for col, values in content.items() if len(values) > 1] 

1571 

1572 if len(changing_columns) != 1: 

1573 # More than one column changes each time so do explicit bind 

1574 # parameters and have each row processed separately. 

1575 whereTerms = [table.columns[name] == sqlalchemy.sql.bindparam(name) for name in columns] 

1576 if whereTerms: 

1577 sql = sql.where(sqlalchemy.sql.and_(*whereTerms)) 

1578 with self._connection() as connection: 

1579 return connection.execute(sql, rows).rowcount 

1580 else: 

1581 # One of the columns has changing values but any others are 

1582 # fixed. In this case we can use an IN operator and be more 

1583 # efficient. 

1584 name = changing_columns.pop() 

1585 

1586 # Simple where clause for the unchanging columns 

1587 clauses = [] 

1588 for k, v in content.items(): 

1589 if k == name: 

1590 continue 

1591 column = table.columns[k] 

1592 # The set only has one element 

1593 clauses.append(column == v.pop()) 

1594 

1595 # The IN operator will not work for "infinite" numbers of 

1596 # rows so must batch it up into distinct calls. 

1597 in_content = list(content[name]) 

1598 n_elements = len(in_content) 

1599 

1600 rowcount = 0 

1601 iposn = 0 

1602 n_per_loop = 1_000 # Controls how many items to put in IN clause 

1603 with self._connection() as connection: 

1604 for iposn in range(0, n_elements, n_per_loop): 

1605 endpos = iposn + n_per_loop 

1606 in_clause = table.columns[name].in_(in_content[iposn:endpos]) 

1607 

1608 newsql = sql.where(sqlalchemy.sql.and_(*clauses, in_clause)) 

1609 rowcount += connection.execute(newsql).rowcount 

1610 return rowcount 

1611 

1612 def deleteWhere(self, table: sqlalchemy.schema.Table, where: sqlalchemy.sql.ClauseElement) -> int: 

1613 """Delete rows from a table with pre-constructed WHERE clause. 

1614 

1615 Parameters 

1616 ---------- 

1617 table : `sqlalchemy.schema.Table` 

1618 Table that rows should be deleted from. 

1619 where: `sqlalchemy.sql.ClauseElement` 

1620 The names of columns that will be used to constrain the rows to 

1621 be deleted; these will be combined via ``AND`` to form the 

1622 ``WHERE`` clause of the delete query. 

1623 

1624 Returns 

1625 ------- 

1626 count : `int` 

1627 Number of rows deleted. 

1628 

1629 Raises 

1630 ------ 

1631 ReadOnlyDatabaseError 

1632 Raised if `isWriteable` returns `False` when this method is called. 

1633 

1634 Notes 

1635 ----- 

1636 May be used inside transaction contexts, so implementations may not 

1637 perform operations that interrupt transactions. 

1638 

1639 The default implementation should be sufficient for most derived 

1640 classes. 

1641 """ 

1642 self.assertTableWriteable(table, f"Cannot delete from read-only table {table}.") 

1643 

1644 sql = table.delete().where(where) 

1645 with self._connection() as connection: 

1646 return connection.execute(sql).rowcount 

1647 

1648 def update(self, table: sqlalchemy.schema.Table, where: Dict[str, str], *rows: dict) -> int: 

1649 """Update one or more rows in a table. 

1650 

1651 Parameters 

1652 ---------- 

1653 table : `sqlalchemy.schema.Table` 

1654 Table containing the rows to be updated. 

1655 where : `dict` [`str`, `str`] 

1656 A mapping from the names of columns that will be used to search for 

1657 existing rows to the keys that will hold these values in the 

1658 ``rows`` dictionaries. Note that these may not be the same due to 

1659 SQLAlchemy limitations. 

1660 *rows 

1661 Positional arguments are the rows to be updated. The keys in all 

1662 dictionaries must be the same, and may correspond to either a 

1663 value in the ``where`` dictionary or the name of a column to be 

1664 updated. 

1665 

1666 Returns 

1667 ------- 

1668 count : `int` 

1669 Number of rows matched (regardless of whether the update actually 

1670 modified them). 

1671 

1672 Raises 

1673 ------ 

1674 ReadOnlyDatabaseError 

1675 Raised if `isWriteable` returns `False` when this method is called. 

1676 

1677 Notes 

1678 ----- 

1679 May be used inside transaction contexts, so implementations may not 

1680 perform operations that interrupt transactions. 

1681 

1682 The default implementation should be sufficient for most derived 

1683 classes. 

1684 """ 

1685 self.assertTableWriteable(table, f"Cannot update read-only table {table}.") 

1686 if not rows: 

1687 return 0 

1688 sql = table.update().where( 

1689 sqlalchemy.sql.and_(*[table.columns[k] == sqlalchemy.sql.bindparam(v) for k, v in where.items()]) 

1690 ) 

1691 with self._connection() as connection: 

1692 return connection.execute(sql, rows).rowcount 

1693 

1694 def query( 

1695 self, sql: sqlalchemy.sql.Selectable, *args: Any, **kwargs: Any 

1696 ) -> sqlalchemy.engine.ResultProxy: 

1697 """Run a SELECT query against the database. 

1698 

1699 Parameters 

1700 ---------- 

1701 sql : `sqlalchemy.sql.Selectable` 

1702 A SQLAlchemy representation of a ``SELECT`` query. 

1703 *args 

1704 Additional positional arguments are forwarded to 

1705 `sqlalchemy.engine.Connection.execute`. 

1706 **kwargs 

1707 Additional keyword arguments are forwarded to 

1708 `sqlalchemy.engine.Connection.execute`. 

1709 

1710 Returns 

1711 ------- 

1712 result : `sqlalchemy.engine.ResultProxy` 

1713 Query results. 

1714 

1715 Notes 

1716 ----- 

1717 The default implementation should be sufficient for most derived 

1718 classes. 

1719 """ 

1720 # We are returning a Result object so we need to take care of 

1721 # connection lifetime. If this is happening in transaction context 

1722 # then just use existing connection, otherwise make a special 

1723 # connection which will be closed when result is closed. 

1724 # 

1725 # TODO: May be better approach would be to make this method return a 

1726 # context manager, but this means big changes for callers of this 

1727 # method. 

1728 if self._session_connection is not None: 

1729 connection = self._session_connection 

1730 else: 

1731 connection = self._engine.connect(close_with_result=True) 

1732 # TODO: should we guard against non-SELECT queries here? 

1733 return connection.execute(sql, *args, **kwargs) 

1734 

1735 @abstractmethod 

1736 def constant_rows( 

1737 self, 

1738 fields: NamedValueAbstractSet[ddl.FieldSpec], 

1739 *rows: dict, 

1740 name: Optional[str] = None, 

1741 ) -> sqlalchemy.sql.FromClause: 

1742 """Return a SQLAlchemy object that represents a small number of 

1743 constant-valued rows. 

1744 

1745 Parameters 

1746 ---------- 

1747 fields : `NamedValueAbstractSet` [ `ddl.FieldSpec` ] 

1748 The columns of the rows. Unique and foreign key constraints are 

1749 ignored. 

1750 *rows : `dict` 

1751 Values for the rows. 

1752 name : `str`, optional 

1753 If provided, the name of the SQL construct. If not provided, an 

1754 opaque but unique identifier is generated. 

1755 

1756 Returns 

1757 ------- 

1758 from_clause : `sqlalchemy.sql.FromClause` 

1759 SQLAlchemy object representing the given rows. This is guaranteed 

1760 to be something that can be directly joined into a ``SELECT`` 

1761 query's ``FROM`` clause, and will not involve a temporary table 

1762 that needs to be cleaned up later. 

1763 

1764 Notes 

1765 ----- 

1766 The default implementation uses the SQL-standard ``VALUES`` construct, 

1767 but support for that construct is varied enough across popular RDBMSs 

1768 that the method is still marked abstract to force explicit opt-in via 

1769 delegation to `super`. 

1770 """ 

1771 if name is None: 

1772 name = f"tmp_{uuid.uuid4().hex}" 

1773 return sqlalchemy.sql.values( 

1774 *[sqlalchemy.Column(field.name, field.getSizedColumnType()) for field in fields], 

1775 name=name, 

1776 ).data([tuple(row[name] for name in fields.names) for row in rows]) 

1777 

1778 def get_constant_rows_max(self) -> int: 

1779 """Return the maximum number of rows that should be passed to 

1780 `constant_rows` for this backend. 

1781 

1782 Returns 

1783 ------- 

1784 max : `int` 

1785 Maximum number of rows. 

1786 

1787 Notes 

1788 ----- 

1789 This should reflect typical performance profiles (or a guess at these), 

1790 not just hard database engine limits. 

1791 """ 

1792 return 100 

1793 

1794 origin: int 

1795 """An integer ID that should be used as the default for any datasets, 

1796 quanta, or other entities that use a (autoincrement, origin) compound 

1797 primary key (`int`). 

1798 """ 

1799 

1800 namespace: Optional[str] 

1801 """The schema or namespace this database instance is associated with 

1802 (`str` or `None`). 

1803 """