Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = [ 

24 "Database", 

25 "ReadOnlyDatabaseError", 

26 "DatabaseConflictError", 

27 "SchemaAlreadyDefinedError", 

28 "StaticTablesContext", 

29] 

30 

31from abc import ABC, abstractmethod 

32from collections import defaultdict 

33from contextlib import contextmanager 

34from typing import ( 

35 Any, 

36 Callable, 

37 Dict, 

38 Iterable, 

39 Iterator, 

40 List, 

41 Optional, 

42 Sequence, 

43 Set, 

44 Tuple, 

45 Type, 

46 Union, 

47) 

48import uuid 

49import warnings 

50 

51import astropy.time 

52import sqlalchemy 

53 

54from ...core import SpatialRegionDatabaseRepresentation, TimespanDatabaseRepresentation, ddl, time_utils 

55from .._exceptions import ConflictingDefinitionError 

56 

57_IN_SAVEPOINT_TRANSACTION = "IN_SAVEPOINT_TRANSACTION" 

58 

59 

60def _checkExistingTableDefinition(name: str, spec: ddl.TableSpec, inspection: List[Dict[str, Any]]) -> None: 

61 """Test that the definition of a table in a `ddl.TableSpec` and from 

62 database introspection are consistent. 

63 

64 Parameters 

65 ---------- 

66 name : `str` 

67 Name of the table (only used in error messages). 

68 spec : `ddl.TableSpec` 

69 Specification of the table. 

70 inspection : `dict` 

71 Dictionary returned by 

72 `sqlalchemy.engine.reflection.Inspector.get_columns`. 

73 

74 Raises 

75 ------ 

76 DatabaseConflictError 

77 Raised if the definitions are inconsistent. 

78 """ 

79 columnNames = [c["name"] for c in inspection] 

80 if spec.fields.names != set(columnNames): 

81 raise DatabaseConflictError(f"Table '{name}' exists but is defined differently in the database; " 

82 f"specification has columns {list(spec.fields.names)}, while the " 

83 f"table in the database has {columnNames}.") 

84 

85 

86class ReadOnlyDatabaseError(RuntimeError): 

87 """Exception raised when a write operation is called on a read-only 

88 `Database`. 

89 """ 

90 

91 

92class DatabaseConflictError(ConflictingDefinitionError): 

93 """Exception raised when database content (row values or schema entities) 

94 are inconsistent with what this client expects. 

95 """ 

96 

97 

98class SchemaAlreadyDefinedError(RuntimeError): 

99 """Exception raised when trying to initialize database schema when some 

100 tables already exist. 

101 """ 

102 

103 

104class StaticTablesContext: 

105 """Helper class used to declare the static schema for a registry layer 

106 in a database. 

107 

108 An instance of this class is returned by `Database.declareStaticTables`, 

109 which should be the only way it should be constructed. 

110 """ 

111 

112 def __init__(self, db: Database): 

113 self._db = db 

114 self._foreignKeys: List[Tuple[sqlalchemy.schema.Table, sqlalchemy.schema.ForeignKeyConstraint]] = [] 

115 self._inspector = sqlalchemy.inspect(self._db._engine) 

116 self._tableNames = frozenset(self._inspector.get_table_names(schema=self._db.namespace)) 

117 self._initializers: List[Callable[[Database], None]] = [] 

118 

119 def addTable(self, name: str, spec: ddl.TableSpec) -> sqlalchemy.schema.Table: 

120 """Add a new table to the schema, returning its sqlalchemy 

121 representation. 

122 

123 The new table may not actually be created until the end of the 

124 context created by `Database.declareStaticTables`, allowing tables 

125 to be declared in any order even in the presence of foreign key 

126 relationships. 

127 """ 

128 name = self._db._mangleTableName(name) 

129 if name in self._tableNames: 

130 _checkExistingTableDefinition(name, spec, self._inspector.get_columns(name, 

131 schema=self._db.namespace)) 

132 table = self._db._convertTableSpec(name, spec, self._db._metadata) 

133 for foreignKeySpec in spec.foreignKeys: 

134 self._foreignKeys.append( 

135 (table, self._db._convertForeignKeySpec(name, foreignKeySpec, self._db._metadata)) 

136 ) 

137 return table 

138 

139 def addTableTuple(self, specs: Tuple[ddl.TableSpec, ...]) -> Tuple[sqlalchemy.schema.Table, ...]: 

140 """Add a named tuple of tables to the schema, returning their 

141 SQLAlchemy representations in a named tuple of the same type. 

142 

143 The new tables may not actually be created until the end of the 

144 context created by `Database.declareStaticTables`, allowing tables 

145 to be declared in any order even in the presence of foreign key 

146 relationships. 

147 

148 Notes 

149 ----- 

150 ``specs`` *must* be an instance of a type created by 

151 `collections.namedtuple`, not just regular tuple, and the returned 

152 object is guaranteed to be the same. Because `~collections.namedtuple` 

153 is just a factory for `type` objects, not an actual type itself, 

154 we cannot represent this with type annotations. 

155 """ 

156 return specs._make(self.addTable(name, spec) # type: ignore 

157 for name, spec in zip(specs._fields, specs)) # type: ignore 

158 

159 def addInitializer(self, initializer: Callable[[Database], None]) -> None: 

160 """Add a method that does one-time initialization of a database. 

161 

162 Initialization can mean anything that changes state of a database 

163 and needs to be done exactly once after database schema was created. 

164 An example for that could be population of schema attributes. 

165 

166 Parameters 

167 ---------- 

168 initializer : callable 

169 Method of a single argument which is a `Database` instance. 

170 """ 

171 self._initializers.append(initializer) 

172 

173 

174class Session: 

175 """Class representing a persistent connection to a database. 

176 

177 Parameters 

178 ---------- 

179 db : `Database` 

180 Database instance. 

181 

182 Notes 

183 ----- 

184 Instances of Session class should not be created by client code; 

185 `Database.session` should be used to create context for a session:: 

186 

187 with db.session() as session: 

188 session.method() 

189 db.method() 

190 

191 In the current implementation sessions can be nested and transactions can 

192 be nested within a session. All nested sessions and transaction share the 

193 same database connection. 

194 

195 Session class represents a limited subset of database API that requires 

196 persistent connection to a database (e.g. temporary tables which have 

197 lifetime of a session). Potentially most of the database API could be 

198 associated with a Session class. 

199 """ 

200 def __init__(self, db: Database): 

201 self._db = db 

202 

203 def makeTemporaryTable(self, spec: ddl.TableSpec, name: Optional[str] = None) -> sqlalchemy.schema.Table: 

204 """Create a temporary table. 

205 

206 Parameters 

207 ---------- 

208 spec : `TableSpec` 

209 Specification for the table. 

210 name : `str`, optional 

211 A unique (within this session/connetion) name for the table. 

212 Subclasses may override to modify the actual name used. If not 

213 provided, a unique name will be generated. 

214 

215 Returns 

216 ------- 

217 table : `sqlalchemy.schema.Table` 

218 SQLAlchemy representation of the table. 

219 

220 Notes 

221 ----- 

222 Temporary tables may be created, dropped, and written to even in 

223 read-only databases - at least according to the Python-level 

224 protections in the `Database` classes. Server permissions may say 

225 otherwise, but in that case they probably need to be modified to 

226 support the full range of expected read-only butler behavior. 

227 

228 Temporary table rows are guaranteed to be dropped when a connection is 

229 closed. `Database` implementations are permitted to allow the table to 

230 remain as long as this is transparent to the user (i.e. "creating" the 

231 temporary table in a new session should not be an error, even if it 

232 does nothing). 

233 

234 It may not be possible to use temporary tables within transactions with 

235 some database engines (or configurations thereof). 

236 """ 

237 if name is None: 

238 name = f"tmp_{uuid.uuid4().hex}" 

239 table = self._db._convertTableSpec(name, spec, self._db._metadata, prefixes=['TEMPORARY'], 

240 schema=sqlalchemy.schema.BLANK_SCHEMA) 

241 if table.key in self._db._tempTables: 

242 if table.key != name: 

243 raise ValueError(f"A temporary table with name {name} (transformed to {table.key} by " 

244 f"Database) already exists.") 

245 for foreignKeySpec in spec.foreignKeys: 

246 table.append_constraint(self._db._convertForeignKeySpec(name, foreignKeySpec, 

247 self._db._metadata)) 

248 with self._db._connection() as connection: 

249 table.create(connection) 

250 self._db._tempTables.add(table.key) 

251 return table 

252 

253 def dropTemporaryTable(self, table: sqlalchemy.schema.Table) -> None: 

254 """Drop a temporary table. 

255 

256 Parameters 

257 ---------- 

258 table : `sqlalchemy.schema.Table` 

259 A SQLAlchemy object returned by a previous call to 

260 `makeTemporaryTable`. 

261 """ 

262 if table.key in self._db._tempTables: 

263 with self._db._connection() as connection: 

264 table.drop(connection) 

265 self._db._tempTables.remove(table.key) 

266 else: 

267 raise TypeError(f"Table {table.key} was not created by makeTemporaryTable.") 

268 

269 

270class Database(ABC): 

271 """An abstract interface that represents a particular database engine's 

272 representation of a single schema/namespace/database. 

273 

274 Parameters 

275 ---------- 

276 origin : `int` 

277 An integer ID that should be used as the default for any datasets, 

278 quanta, or other entities that use a (autoincrement, origin) compound 

279 primary key. 

280 engine : `sqlalchemy.engine.Engine` 

281 The SQLAlchemy engine for this `Database`. 

282 namespace : `str`, optional 

283 Name of the schema or namespace this instance is associated with. 

284 This is passed as the ``schema`` argument when constructing a 

285 `sqlalchemy.schema.MetaData` instance. We use ``namespace`` instead to 

286 avoid confusion between "schema means namespace" and "schema means 

287 table definitions". 

288 

289 Notes 

290 ----- 

291 `Database` requires all write operations to go through its special named 

292 methods. Our write patterns are sufficiently simple that we don't really 

293 need the full flexibility of SQL insert/update/delete syntax, and we need 

294 non-standard (but common) functionality in these operations sufficiently 

295 often that it seems worthwhile to provide our own generic API. 

296 

297 In contrast, `Database.query` allows arbitrary ``SELECT`` queries (via 

298 their SQLAlchemy representation) to be run, as we expect these to require 

299 significantly more sophistication while still being limited to standard 

300 SQL. 

301 

302 `Database` itself has several underscore-prefixed attributes: 

303 

304 - ``_engine``: SQLAlchemy object representing its engine. 

305 - ``_connection``: method returning a context manager for 

306 `sqlalchemy.engine.Connection` object. 

307 - ``_metadata``: the `sqlalchemy.schema.MetaData` object representing 

308 the tables and other schema entities. 

309 

310 These are considered protected (derived classes may access them, but other 

311 code should not), and read-only, aside from executing SQL via 

312 ``_connection``. 

313 """ 

314 

315 def __init__(self, *, origin: int, engine: sqlalchemy.engine.Engine, 

316 namespace: Optional[str] = None): 

317 self.origin = origin 

318 self.namespace = namespace 

319 self._engine = engine 

320 self._session_connection: Optional[sqlalchemy.engine.Connection] = None 

321 self._metadata: Optional[sqlalchemy.schema.MetaData] = None 

322 self._tempTables: Set[str] = set() 

323 

324 def __repr__(self) -> str: 

325 # Rather than try to reproduce all the parameters used to create 

326 # the object, instead report the more useful information of the 

327 # connection URL. 

328 if self._engine.url.password is not None: 

329 uri = str(self._engine.url.set(password="***")) 

330 else: 

331 uri = str(self._engine.url) 

332 if self.namespace: 

333 uri += f"#{self.namespace}" 

334 return f'{type(self).__name__}("{uri}")' 

335 

336 @classmethod 

337 def makeDefaultUri(cls, root: str) -> Optional[str]: 

338 """Create a default connection URI appropriate for the given root 

339 directory, or `None` if there can be no such default. 

340 """ 

341 return None 

342 

343 @classmethod 

344 def fromUri(cls, uri: str, *, origin: int, namespace: Optional[str] = None, 

345 writeable: bool = True) -> Database: 

346 """Construct a database from a SQLAlchemy URI. 

347 

348 Parameters 

349 ---------- 

350 uri : `str` 

351 A SQLAlchemy URI connection string. 

352 origin : `int` 

353 An integer ID that should be used as the default for any datasets, 

354 quanta, or other entities that use a (autoincrement, origin) 

355 compound primary key. 

356 namespace : `str`, optional 

357 A database namespace (i.e. schema) the new instance should be 

358 associated with. If `None` (default), the namespace (if any) is 

359 inferred from the URI. 

360 writeable : `bool`, optional 

361 If `True`, allow write operations on the database, including 

362 ``CREATE TABLE``. 

363 

364 Returns 

365 ------- 

366 db : `Database` 

367 A new `Database` instance. 

368 """ 

369 return cls.fromEngine(cls.makeEngine(uri, writeable=writeable), 

370 origin=origin, 

371 namespace=namespace, 

372 writeable=writeable) 

373 

374 @classmethod 

375 @abstractmethod 

376 def makeEngine(cls, uri: str, *, writeable: bool = True) -> sqlalchemy.engine.Engine: 

377 """Create a `sqlalchemy.engine.Engine` from a SQLAlchemy URI. 

378 

379 Parameters 

380 ---------- 

381 uri : `str` 

382 A SQLAlchemy URI connection string. 

383 writeable : `bool`, optional 

384 If `True`, allow write operations on the database, including 

385 ``CREATE TABLE``. 

386 

387 Returns 

388 ------- 

389 engine : `sqlalchemy.engine.Engine` 

390 A database engine. 

391 

392 Notes 

393 ----- 

394 Subclasses that support other ways to connect to a database are 

395 encouraged to add optional arguments to their implementation of this 

396 method, as long as they maintain compatibility with the base class 

397 call signature. 

398 """ 

399 raise NotImplementedError() 

400 

401 @classmethod 

402 @abstractmethod 

403 def fromEngine(cls, engine: sqlalchemy.engine.Engine, *, origin: int, 

404 namespace: Optional[str] = None, writeable: bool = True) -> Database: 

405 """Create a new `Database` from an existing `sqlalchemy.engine.Engine`. 

406 

407 Parameters 

408 ---------- 

409 engine : `sqllachemy.engine.Engine` 

410 The engine for the database. May be shared between `Database` 

411 instances. 

412 origin : `int` 

413 An integer ID that should be used as the default for any datasets, 

414 quanta, or other entities that use a (autoincrement, origin) 

415 compound primary key. 

416 namespace : `str`, optional 

417 A different database namespace (i.e. schema) the new instance 

418 should be associated with. If `None` (default), the namespace 

419 (if any) is inferred from the connection. 

420 writeable : `bool`, optional 

421 If `True`, allow write operations on the database, including 

422 ``CREATE TABLE``. 

423 

424 Returns 

425 ------- 

426 db : `Database` 

427 A new `Database` instance. 

428 

429 Notes 

430 ----- 

431 This method allows different `Database` instances to share the same 

432 engine, which is desirable when they represent different namespaces 

433 can be queried together. 

434 """ 

435 raise NotImplementedError() 

436 

437 @contextmanager 

438 def session(self) -> Iterator: 

439 """Return a context manager that represents a session (persistent 

440 connection to a database). 

441 """ 

442 if self._session_connection is not None: 

443 # session already started, just reuse that 

444 yield Session(self) 

445 else: 

446 try: 

447 # open new connection and close it when done 

448 self._session_connection = self._engine.connect() 

449 yield Session(self) 

450 finally: 

451 if self._session_connection is not None: 

452 self._session_connection.close() 

453 self._session_connection = None 

454 # Temporary tables only live within session 

455 self._tempTables = set() 

456 

457 @contextmanager 

458 def transaction(self, *, interrupting: bool = False, savepoint: bool = False, 

459 lock: Iterable[sqlalchemy.schema.Table] = ()) -> Iterator: 

460 """Return a context manager that represents a transaction. 

461 

462 Parameters 

463 ---------- 

464 interrupting : `bool`, optional 

465 If `True` (`False` is default), this transaction block may not be 

466 nested without an outer one, and attempting to do so is a logic 

467 (i.e. assertion) error. 

468 savepoint : `bool`, optional 

469 If `True` (`False` is default), create a `SAVEPOINT`, allowing 

470 exceptions raised by the database (e.g. due to constraint 

471 violations) during this transaction's context to be caught outside 

472 it without also rolling back all operations in an outer transaction 

473 block. If `False`, transactions may still be nested, but a 

474 rollback may be generated at any level and affects all levels, and 

475 commits are deferred until the outermost block completes. If any 

476 outer transaction block was created with ``savepoint=True``, all 

477 inner blocks will be as well (regardless of the actual value 

478 passed). This has no effect if this is the outermost transaction. 

479 lock : `Iterable` [ `sqlalchemy.schema.Table` ], optional 

480 A list of tables to lock for the duration of this transaction. 

481 These locks are guaranteed to prevent concurrent writes and allow 

482 this transaction (only) to acquire the same locks (others should 

483 block), but only prevent concurrent reads if the database engine 

484 requires that in order to block concurrent writes. 

485 

486 Notes 

487 ----- 

488 All transactions on a connection managed by one or more `Database` 

489 instances _must_ go through this method, or transaction state will not 

490 be correctly managed. 

491 """ 

492 # need a connection, use session to manage it 

493 with self.session(): 

494 assert self._session_connection is not None 

495 connection = self._session_connection 

496 assert not (interrupting and connection.in_transaction()), ( 

497 "Logic error in transaction nesting: an operation that would " 

498 "interrupt the active transaction context has been requested." 

499 ) 

500 # We remember whether we are already in a SAVEPOINT transaction via 

501 # the connection object's 'info' dict, which is explicitly for user 

502 # information like this. This is safer than a regular `Database` 

503 # instance attribute, because it guards against multiple `Database` 

504 # instances sharing the same connection. The need to use our own 

505 # flag here to track whether we're in a nested transaction should 

506 # go away in SQLAlchemy 1.4, which seems to have a 

507 # `Connection.in_nested_transaction()` method. 

508 savepoint = savepoint or connection.info.get(_IN_SAVEPOINT_TRANSACTION, False) 

509 connection.info[_IN_SAVEPOINT_TRANSACTION] = savepoint 

510 if connection.in_transaction() and savepoint: 

511 trans = connection.begin_nested() 

512 elif not connection.in_transaction(): 

513 # Use a regular (non-savepoint) transaction always for the 

514 # outermost context. 

515 trans = connection.begin() 

516 else: 

517 # Nested non-savepoint transactions, don't do anything. 

518 trans = None 

519 self._lockTables(connection, lock) 

520 try: 

521 yield 

522 if trans is not None: 

523 trans.commit() 

524 except BaseException: 

525 if trans is not None: 

526 trans.rollback() 

527 raise 

528 finally: 

529 if not connection.in_transaction(): 

530 connection.info.pop(_IN_SAVEPOINT_TRANSACTION, None) 

531 

532 @contextmanager 

533 def _connection(self) -> Iterator[sqlalchemy.engine.Connection]: 

534 """Return context manager for Connection. 

535 """ 

536 if self._session_connection is not None: 

537 # It means that we are in Session context, but we may not be in 

538 # transaction context. Start a short transaction in that case. 

539 if self._session_connection.in_transaction(): 

540 yield self._session_connection 

541 else: 

542 with self._session_connection.begin(): 

543 yield self._session_connection 

544 else: 

545 # Make new connection and transaction, transaction will be 

546 # committed on context exit. 

547 with self._engine.begin() as connection: 

548 yield connection 

549 

550 @abstractmethod 

551 def _lockTables(self, connection: sqlalchemy.engine.Connection, 

552 tables: Iterable[sqlalchemy.schema.Table] = ()) -> None: 

553 """Acquire locks on the given tables. 

554 

555 This is an implementation hook for subclasses, called by `transaction`. 

556 It should not be called directly by other code. 

557 

558 Parameters 

559 ---------- 

560 connection : `sqlalchemy.engine.Connection` 

561 Database connection object. It is guaranteed that transaction is 

562 already in a progress for this connection. 

563 tables : `Iterable` [ `sqlalchemy.schema.Table` ], optional 

564 A list of tables to lock for the duration of this transaction. 

565 These locks are guaranteed to prevent concurrent writes and allow 

566 this transaction (only) to acquire the same locks (others should 

567 block), but only prevent concurrent reads if the database engine 

568 requires that in order to block concurrent writes. 

569 """ 

570 raise NotImplementedError() 

571 

572 def isTableWriteable(self, table: sqlalchemy.schema.Table) -> bool: 

573 """Check whether a table is writeable, either because the database 

574 connection is read-write or the table is a temporary table. 

575 

576 Parameters 

577 ---------- 

578 table : `sqlalchemy.schema.Table` 

579 SQLAlchemy table object to check. 

580 

581 Returns 

582 ------- 

583 writeable : `bool` 

584 Whether this table is writeable. 

585 """ 

586 return self.isWriteable() or table.key in self._tempTables 

587 

588 def assertTableWriteable(self, table: sqlalchemy.schema.Table, msg: str) -> None: 

589 """Raise if the given table is not writeable, either because the 

590 database connection is read-write or the table is a temporary table. 

591 

592 Parameters 

593 ---------- 

594 table : `sqlalchemy.schema.Table` 

595 SQLAlchemy table object to check. 

596 msg : `str`, optional 

597 If provided, raise `ReadOnlyDatabaseError` instead of returning 

598 `False`, with this message. 

599 """ 

600 if not self.isTableWriteable(table): 

601 raise ReadOnlyDatabaseError(msg) 

602 

603 @contextmanager 

604 def declareStaticTables(self, *, create: bool) -> Iterator[StaticTablesContext]: 

605 """Return a context manager in which the database's static DDL schema 

606 can be declared. 

607 

608 Parameters 

609 ---------- 

610 create : `bool` 

611 If `True`, attempt to create all tables at the end of the context. 

612 If `False`, they will be assumed to already exist. 

613 

614 Returns 

615 ------- 

616 schema : `StaticTablesContext` 

617 A helper object that is used to add new tables. 

618 

619 Raises 

620 ------ 

621 ReadOnlyDatabaseError 

622 Raised if ``create`` is `True`, `Database.isWriteable` is `False`, 

623 and one or more declared tables do not already exist. 

624 

625 Examples 

626 -------- 

627 Given a `Database` instance ``db``:: 

628 

629 with db.declareStaticTables(create=True) as schema: 

630 schema.addTable("table1", TableSpec(...)) 

631 schema.addTable("table2", TableSpec(...)) 

632 

633 Notes 

634 ----- 

635 A database's static DDL schema must be declared before any dynamic 

636 tables are managed via calls to `ensureTableExists` or 

637 `getExistingTable`. The order in which static schema tables are added 

638 inside the context block is unimportant; they will automatically be 

639 sorted and added in an order consistent with their foreign key 

640 relationships. 

641 """ 

642 if create and not self.isWriteable(): 

643 raise ReadOnlyDatabaseError(f"Cannot create tables in read-only database {self}.") 

644 self._metadata = sqlalchemy.MetaData(schema=self.namespace) 

645 try: 

646 context = StaticTablesContext(self) 

647 if create and context._tableNames: 

648 # Looks like database is already initalized, to avoid danger 

649 # of modifying/destroying valid schema we refuse to do 

650 # anything in this case 

651 raise SchemaAlreadyDefinedError(f"Cannot create tables in non-empty database {self}.") 

652 yield context 

653 for table, foreignKey in context._foreignKeys: 

654 table.append_constraint(foreignKey) 

655 if create: 

656 if self.namespace is not None: 

657 if self.namespace not in context._inspector.get_schema_names(): 

658 with self._connection() as connection: 

659 connection.execute(sqlalchemy.schema.CreateSchema(self.namespace)) 

660 # In our tables we have columns that make use of sqlalchemy 

661 # Sequence objects. There is currently a bug in sqlalchemy that 

662 # causes a deprecation warning to be thrown on a property of 

663 # the Sequence object when the repr for the sequence is 

664 # created. Here a filter is used to catch these deprecation 

665 # warnings when tables are created. 

666 with warnings.catch_warnings(): 

667 warnings.simplefilter("ignore", category=sqlalchemy.exc.SADeprecationWarning) 

668 self._metadata.create_all(self._engine) 

669 # call all initializer methods sequentially 

670 for init in context._initializers: 

671 init(self) 

672 except BaseException: 

673 self._metadata = None 

674 raise 

675 

676 @abstractmethod 

677 def isWriteable(self) -> bool: 

678 """Return `True` if this database can be modified by this client. 

679 """ 

680 raise NotImplementedError() 

681 

682 @abstractmethod 

683 def __str__(self) -> str: 

684 """Return a human-readable identifier for this `Database`, including 

685 any namespace or schema that identifies its names within a `Registry`. 

686 """ 

687 raise NotImplementedError() 

688 

689 @property 

690 def dialect(self) -> sqlalchemy.engine.Dialect: 

691 """The SQLAlchemy dialect for this database engine 

692 (`sqlalchemy.engine.Dialect`). 

693 """ 

694 return self._engine.dialect 

695 

696 def shrinkDatabaseEntityName(self, original: str) -> str: 

697 """Return a version of the given name that fits within this database 

698 engine's length limits for table, constraint, indexes, and sequence 

699 names. 

700 

701 Implementations should not assume that simple truncation is safe, 

702 because multiple long names often begin with the same prefix. 

703 

704 The default implementation simply returns the given name. 

705 

706 Parameters 

707 ---------- 

708 original : `str` 

709 The original name. 

710 

711 Returns 

712 ------- 

713 shrunk : `str` 

714 The new, possibly shortened name. 

715 """ 

716 return original 

717 

718 def expandDatabaseEntityName(self, shrunk: str) -> str: 

719 """Retrieve the original name for a database entity that was too long 

720 to fit within the database engine's limits. 

721 

722 Parameters 

723 ---------- 

724 original : `str` 

725 The original name. 

726 

727 Returns 

728 ------- 

729 shrunk : `str` 

730 The new, possibly shortened name. 

731 """ 

732 return shrunk 

733 

734 def _mangleTableName(self, name: str) -> str: 

735 """Map a logical, user-visible table name to the true table name used 

736 in the database. 

737 

738 The default implementation returns the given name unchanged. 

739 

740 Parameters 

741 ---------- 

742 name : `str` 

743 Input table name. Should not include a namespace (i.e. schema) 

744 prefix. 

745 

746 Returns 

747 ------- 

748 mangled : `str` 

749 Mangled version of the table name (still with no namespace prefix). 

750 

751 Notes 

752 ----- 

753 Reimplementations of this method must be idempotent - mangling an 

754 already-mangled name must have no effect. 

755 """ 

756 return name 

757 

758 def _makeColumnConstraints(self, table: str, spec: ddl.FieldSpec) -> List[sqlalchemy.CheckConstraint]: 

759 """Create constraints based on this spec. 

760 

761 Parameters 

762 ---------- 

763 table : `str` 

764 Name of the table this column is being added to. 

765 spec : `FieldSpec` 

766 Specification for the field to be added. 

767 

768 Returns 

769 ------- 

770 constraint : `list` of `sqlalchemy.CheckConstraint` 

771 Constraint added for this column. 

772 """ 

773 # By default we return no additional constraints 

774 return [] 

775 

776 def _convertFieldSpec(self, table: str, spec: ddl.FieldSpec, metadata: sqlalchemy.MetaData, 

777 **kwargs: Any) -> sqlalchemy.schema.Column: 

778 """Convert a `FieldSpec` to a `sqlalchemy.schema.Column`. 

779 

780 Parameters 

781 ---------- 

782 table : `str` 

783 Name of the table this column is being added to. 

784 spec : `FieldSpec` 

785 Specification for the field to be added. 

786 metadata : `sqlalchemy.MetaData` 

787 SQLAlchemy representation of the DDL schema this field's table is 

788 being added to. 

789 **kwargs 

790 Additional keyword arguments to forward to the 

791 `sqlalchemy.schema.Column` constructor. This is provided to make 

792 it easier for derived classes to delegate to ``super()`` while 

793 making only minor changes. 

794 

795 Returns 

796 ------- 

797 column : `sqlalchemy.schema.Column` 

798 SQLAlchemy representation of the field. 

799 """ 

800 args = [spec.name, spec.getSizedColumnType()] 

801 if spec.autoincrement: 

802 # Generate a sequence to use for auto incrementing for databases 

803 # that do not support it natively. This will be ignored by 

804 # sqlalchemy for databases that do support it. 

805 args.append(sqlalchemy.Sequence(self.shrinkDatabaseEntityName(f"{table}_seq_{spec.name}"), 

806 metadata=metadata)) 

807 assert spec.doc is None or isinstance(spec.doc, str), f"Bad doc for {table}.{spec.name}." 

808 return sqlalchemy.schema.Column(*args, nullable=spec.nullable, primary_key=spec.primaryKey, 

809 comment=spec.doc, server_default=spec.default, **kwargs) 

810 

811 def _convertForeignKeySpec(self, table: str, spec: ddl.ForeignKeySpec, metadata: sqlalchemy.MetaData, 

812 **kwargs: Any) -> sqlalchemy.schema.ForeignKeyConstraint: 

813 """Convert a `ForeignKeySpec` to a 

814 `sqlalchemy.schema.ForeignKeyConstraint`. 

815 

816 Parameters 

817 ---------- 

818 table : `str` 

819 Name of the table this foreign key is being added to. 

820 spec : `ForeignKeySpec` 

821 Specification for the foreign key to be added. 

822 metadata : `sqlalchemy.MetaData` 

823 SQLAlchemy representation of the DDL schema this constraint is 

824 being added to. 

825 **kwargs 

826 Additional keyword arguments to forward to the 

827 `sqlalchemy.schema.ForeignKeyConstraint` constructor. This is 

828 provided to make it easier for derived classes to delegate to 

829 ``super()`` while making only minor changes. 

830 

831 Returns 

832 ------- 

833 constraint : `sqlalchemy.schema.ForeignKeyConstraint` 

834 SQLAlchemy representation of the constraint. 

835 """ 

836 name = self.shrinkDatabaseEntityName( 

837 "_".join(["fkey", table, self._mangleTableName(spec.table)] 

838 + list(spec.target) + list(spec.source)) 

839 ) 

840 return sqlalchemy.schema.ForeignKeyConstraint( 

841 spec.source, 

842 [f"{self._mangleTableName(spec.table)}.{col}" for col in spec.target], 

843 name=name, 

844 ondelete=spec.onDelete 

845 ) 

846 

847 def _convertExclusionConstraintSpec(self, table: str, 

848 spec: Tuple[Union[str, Type[TimespanDatabaseRepresentation]], ...], 

849 metadata: sqlalchemy.MetaData) -> sqlalchemy.schema.Constraint: 

850 """Convert a `tuple` from `ddl.TableSpec.exclusion` into a SQLAlchemy 

851 constraint representation. 

852 

853 Parameters 

854 ---------- 

855 table : `str` 

856 Name of the table this constraint is being added to. 

857 spec : `tuple` [ `str` or `type` ] 

858 A tuple of `str` column names and the `type` object returned by 

859 `getTimespanRepresentation` (which must appear exactly once), 

860 indicating the order of the columns in the index used to back the 

861 constraint. 

862 metadata : `sqlalchemy.MetaData` 

863 SQLAlchemy representation of the DDL schema this constraint is 

864 being added to. 

865 

866 Returns 

867 ------- 

868 constraint : `sqlalchemy.schema.Constraint` 

869 SQLAlchemy representation of the constraint. 

870 

871 Raises 

872 ------ 

873 NotImplementedError 

874 Raised if this database does not support exclusion constraints. 

875 """ 

876 raise NotImplementedError(f"Database {self} does not support exclusion constraints.") 

877 

878 def _convertTableSpec(self, name: str, spec: ddl.TableSpec, metadata: sqlalchemy.MetaData, 

879 **kwargs: Any) -> sqlalchemy.schema.Table: 

880 """Convert a `TableSpec` to a `sqlalchemy.schema.Table`. 

881 

882 Parameters 

883 ---------- 

884 spec : `TableSpec` 

885 Specification for the foreign key to be added. 

886 metadata : `sqlalchemy.MetaData` 

887 SQLAlchemy representation of the DDL schema this table is being 

888 added to. 

889 **kwargs 

890 Additional keyword arguments to forward to the 

891 `sqlalchemy.schema.Table` constructor. This is provided to make it 

892 easier for derived classes to delegate to ``super()`` while making 

893 only minor changes. 

894 

895 Returns 

896 ------- 

897 table : `sqlalchemy.schema.Table` 

898 SQLAlchemy representation of the table. 

899 

900 Notes 

901 ----- 

902 This method does not handle ``spec.foreignKeys`` at all, in order to 

903 avoid circular dependencies. These are added by higher-level logic in 

904 `ensureTableExists`, `getExistingTable`, and `declareStaticTables`. 

905 """ 

906 name = self._mangleTableName(name) 

907 args = [self._convertFieldSpec(name, fieldSpec, metadata) for fieldSpec in spec.fields] 

908 

909 # Add any column constraints 

910 for fieldSpec in spec.fields: 

911 args.extend(self._makeColumnConstraints(name, fieldSpec)) 

912 

913 # Track indexes added for primary key and unique constraints, to make 

914 # sure we don't add duplicate explicit or foreign key indexes for 

915 # those. 

916 allIndexes = {tuple(fieldSpec.name for fieldSpec in spec.fields if fieldSpec.primaryKey)} 

917 args.extend( 

918 sqlalchemy.schema.UniqueConstraint( 

919 *columns, 

920 name=self.shrinkDatabaseEntityName("_".join([name, "unq"] + list(columns))) 

921 ) 

922 for columns in spec.unique 

923 ) 

924 allIndexes.update(spec.unique) 

925 args.extend( 

926 sqlalchemy.schema.Index( 

927 self.shrinkDatabaseEntityName("_".join([name, "idx"] + list(columns))), 

928 *columns, 

929 unique=(columns in spec.unique) 

930 ) 

931 for columns in spec.indexes if columns not in allIndexes 

932 ) 

933 allIndexes.update(spec.indexes) 

934 args.extend( 

935 sqlalchemy.schema.Index( 

936 self.shrinkDatabaseEntityName("_".join((name, "fkidx") + fk.source)), 

937 *fk.source, 

938 ) 

939 for fk in spec.foreignKeys if fk.addIndex and fk.source not in allIndexes 

940 ) 

941 

942 args.extend(self._convertExclusionConstraintSpec(name, excl, metadata) for excl in spec.exclusion) 

943 

944 assert spec.doc is None or isinstance(spec.doc, str), f"Bad doc for {name}." 

945 return sqlalchemy.schema.Table(name, metadata, *args, comment=spec.doc, info=spec, **kwargs) 

946 

947 def ensureTableExists(self, name: str, spec: ddl.TableSpec) -> sqlalchemy.schema.Table: 

948 """Ensure that a table with the given name and specification exists, 

949 creating it if necessary. 

950 

951 Parameters 

952 ---------- 

953 name : `str` 

954 Name of the table (not including namespace qualifiers). 

955 spec : `TableSpec` 

956 Specification for the table. This will be used when creating the 

957 table, and *may* be used when obtaining an existing table to check 

958 for consistency, but no such check is guaranteed. 

959 

960 Returns 

961 ------- 

962 table : `sqlalchemy.schema.Table` 

963 SQLAlchemy representation of the table. 

964 

965 Raises 

966 ------ 

967 ReadOnlyDatabaseError 

968 Raised if `isWriteable` returns `False`, and the table does not 

969 already exist. 

970 DatabaseConflictError 

971 Raised if the table exists but ``spec`` is inconsistent with its 

972 definition. 

973 

974 Notes 

975 ----- 

976 This method may not be called within transactions. It may be called on 

977 read-only databases if and only if the table does in fact already 

978 exist. 

979 

980 Subclasses may override this method, but usually should not need to. 

981 """ 

982 # TODO: if _engine is used to make a table then it uses separate 

983 # connection and should not interfere with current transaction 

984 assert self._session_connection is None or not self._session_connection.in_transaction(), \ 

985 "Table creation interrupts transactions." 

986 assert self._metadata is not None, "Static tables must be declared before dynamic tables." 

987 table = self.getExistingTable(name, spec) 

988 if table is not None: 

989 return table 

990 if not self.isWriteable(): 

991 raise ReadOnlyDatabaseError( 

992 f"Table {name} does not exist, and cannot be created " 

993 f"because database {self} is read-only." 

994 ) 

995 table = self._convertTableSpec(name, spec, self._metadata) 

996 for foreignKeySpec in spec.foreignKeys: 

997 table.append_constraint(self._convertForeignKeySpec(name, foreignKeySpec, self._metadata)) 

998 with self._connection() as connection: 

999 table.create(connection) 

1000 return table 

1001 

1002 def getExistingTable(self, name: str, spec: ddl.TableSpec) -> Optional[sqlalchemy.schema.Table]: 

1003 """Obtain an existing table with the given name and specification. 

1004 

1005 Parameters 

1006 ---------- 

1007 name : `str` 

1008 Name of the table (not including namespace qualifiers). 

1009 spec : `TableSpec` 

1010 Specification for the table. This will be used when creating the 

1011 SQLAlchemy representation of the table, and it is used to 

1012 check that the actual table in the database is consistent. 

1013 

1014 Returns 

1015 ------- 

1016 table : `sqlalchemy.schema.Table` or `None` 

1017 SQLAlchemy representation of the table, or `None` if it does not 

1018 exist. 

1019 

1020 Raises 

1021 ------ 

1022 DatabaseConflictError 

1023 Raised if the table exists but ``spec`` is inconsistent with its 

1024 definition. 

1025 

1026 Notes 

1027 ----- 

1028 This method can be called within transactions and never modifies the 

1029 database. 

1030 

1031 Subclasses may override this method, but usually should not need to. 

1032 """ 

1033 assert self._metadata is not None, "Static tables must be declared before dynamic tables." 

1034 name = self._mangleTableName(name) 

1035 table = self._metadata.tables.get(name if self.namespace is None else f"{self.namespace}.{name}") 

1036 if table is not None: 

1037 if spec.fields.names != set(table.columns.keys()): 

1038 raise DatabaseConflictError(f"Table '{name}' has already been defined differently; the new " 

1039 f"specification has columns {list(spec.fields.names)}, while " 

1040 f"the previous definition has {list(table.columns.keys())}.") 

1041 else: 

1042 inspector = sqlalchemy.inspect(self._engine) 

1043 if name in inspector.get_table_names(schema=self.namespace): 

1044 _checkExistingTableDefinition(name, spec, inspector.get_columns(name, schema=self.namespace)) 

1045 table = self._convertTableSpec(name, spec, self._metadata) 

1046 for foreignKeySpec in spec.foreignKeys: 

1047 table.append_constraint(self._convertForeignKeySpec(name, foreignKeySpec, self._metadata)) 

1048 return table 

1049 return table 

1050 

1051 @classmethod 

1052 def getTimespanRepresentation(cls) -> Type[TimespanDatabaseRepresentation]: 

1053 """Return a `type` that encapsulates the way `Timespan` objects are 

1054 stored in this database. 

1055 

1056 `Database` does not automatically use the return type of this method 

1057 anywhere else; calling code is responsible for making sure that DDL 

1058 and queries are consistent with it. 

1059 

1060 Returns 

1061 ------- 

1062 TimespanReprClass : `type` (`TimespanDatabaseRepresention` subclass) 

1063 A type that encapsulates the way `Timespan` objects should be 

1064 stored in this database. 

1065 

1066 Notes 

1067 ----- 

1068 There are two big reasons we've decided to keep timespan-mangling logic 

1069 outside the `Database` implementations, even though the choice of 

1070 representation is ultimately up to a `Database` implementation: 

1071 

1072 - Timespans appear in relatively few tables and queries in our 

1073 typical usage, and the code that operates on them is already aware 

1074 that it is working with timespans. In contrast, a 

1075 timespan-representation-aware implementation of, say, `insert`, 

1076 would need to have extra logic to identify when timespan-mangling 

1077 needed to occur, which would usually be useless overhead. 

1078 

1079 - SQLAlchemy's rich SELECT query expression system has no way to wrap 

1080 multiple columns in a single expression object (the ORM does, but 

1081 we are not using the ORM). So we would have to wrap _much_ more of 

1082 that code in our own interfaces to encapsulate timespan 

1083 representations there. 

1084 """ 

1085 return TimespanDatabaseRepresentation.Compound 

1086 

1087 @classmethod 

1088 def getSpatialRegionRepresentation(cls) -> Type[SpatialRegionDatabaseRepresentation]: 

1089 """Return a `type` that encapsulates the way `lsst.sphgeom.Region` 

1090 objects are stored in this database. 

1091 

1092 `Database` does not automatically use the return type of this method 

1093 anywhere else; calling code is responsible for making sure that DDL 

1094 and queries are consistent with it. 

1095 

1096 Returns 

1097 ------- 

1098 RegionReprClass : `type` (`SpatialRegionDatabaseRepresention` subclass) 

1099 A type that encapsulates the way `lsst.sphgeom.Region` objects 

1100 should be stored in this database. 

1101 

1102 Notes 

1103 ----- 

1104 See `getTimespanRepresentation` for comments on why this method is not 

1105 more tightly integrated with the rest of the `Database` interface. 

1106 """ 

1107 return SpatialRegionDatabaseRepresentation 

1108 

1109 def sync(self, table: sqlalchemy.schema.Table, *, 

1110 keys: Dict[str, Any], 

1111 compared: Optional[Dict[str, Any]] = None, 

1112 extra: Optional[Dict[str, Any]] = None, 

1113 returning: Optional[Sequence[str]] = None, 

1114 update: bool = False, 

1115 ) -> Tuple[Optional[Dict[str, Any]], Union[bool, Dict[str, Any]]]: 

1116 """Insert into a table as necessary to ensure database contains 

1117 values equivalent to the given ones. 

1118 

1119 Parameters 

1120 ---------- 

1121 table : `sqlalchemy.schema.Table` 

1122 Table to be queried and possibly inserted into. 

1123 keys : `dict` 

1124 Column name-value pairs used to search for an existing row; must 

1125 be a combination that can be used to select a single row if one 

1126 exists. If such a row does not exist, these values are used in 

1127 the insert. 

1128 compared : `dict`, optional 

1129 Column name-value pairs that are compared to those in any existing 

1130 row. If such a row does not exist, these rows are used in the 

1131 insert. 

1132 extra : `dict`, optional 

1133 Column name-value pairs that are ignored if a matching row exists, 

1134 but used in an insert if one is necessary. 

1135 returning : `~collections.abc.Sequence` of `str`, optional 

1136 The names of columns whose values should be returned. 

1137 update : `bool`, optional 

1138 If `True` (`False` is default), update the existing row with the 

1139 values in ``compared`` instead of raising `DatabaseConflictError`. 

1140 

1141 Returns 

1142 ------- 

1143 row : `dict`, optional 

1144 The value of the fields indicated by ``returning``, or `None` if 

1145 ``returning`` is `None`. 

1146 inserted_or_updated : `bool` or `dict` 

1147 If `True`, a new row was inserted; if `False`, a matching row 

1148 already existed. If a `dict` (only possible if ``update=True``), 

1149 then an existing row was updated, and the dict maps the names of 

1150 the updated columns to their *old* values (new values can be 

1151 obtained from ``compared``). 

1152 

1153 Raises 

1154 ------ 

1155 DatabaseConflictError 

1156 Raised if the values in ``compared`` do not match the values in the 

1157 database. 

1158 ReadOnlyDatabaseError 

1159 Raised if `isWriteable` returns `False`, and no matching record 

1160 already exists. 

1161 

1162 Notes 

1163 ----- 

1164 May be used inside transaction contexts, so implementations may not 

1165 perform operations that interrupt transactions. 

1166 

1167 It may be called on read-only databases if and only if the matching row 

1168 does in fact already exist. 

1169 """ 

1170 

1171 def check() -> Tuple[int, Optional[Dict[str, Any]], Optional[List]]: 

1172 """Query for a row that matches the ``key`` argument, and compare 

1173 to what was given by the caller. 

1174 

1175 Returns 

1176 ------- 

1177 n : `int` 

1178 Number of matching rows. ``n != 1`` is always an error, but 

1179 it's a different kind of error depending on where `check` is 

1180 being called. 

1181 bad : `dict` or `None` 

1182 The subset of the keys of ``compared`` for which the existing 

1183 values did not match the given one, mapped to the existing 

1184 values in the database. Once again, ``not bad`` is always an 

1185 error, but a different kind on context. `None` if ``n != 1`` 

1186 result : `list` or `None` 

1187 Results in the database that correspond to the columns given 

1188 in ``returning``, or `None` if ``returning is None``. 

1189 """ 

1190 toSelect: Set[str] = set() 

1191 if compared is not None: 

1192 toSelect.update(compared.keys()) 

1193 if returning is not None: 

1194 toSelect.update(returning) 

1195 if not toSelect: 

1196 # Need to select some column, even if we just want to see 

1197 # how many rows we get back. 

1198 toSelect.add(next(iter(keys.keys()))) 

1199 selectSql = sqlalchemy.sql.select( 

1200 *[table.columns[k].label(k) for k in toSelect] 

1201 ).select_from(table).where( 

1202 sqlalchemy.sql.and_(*[table.columns[k] == v for k, v in keys.items()]) 

1203 ) 

1204 with self._connection() as connection: 

1205 fetched = list(connection.execute(selectSql).mappings()) 

1206 if len(fetched) != 1: 

1207 return len(fetched), None, None 

1208 existing = fetched[0] 

1209 if compared is not None: 

1210 

1211 def safeNotEqual(a: Any, b: Any) -> bool: 

1212 if isinstance(a, astropy.time.Time): 

1213 return not time_utils.TimeConverter().times_equal(a, b) 

1214 return a != b 

1215 

1216 inconsistencies = { 

1217 k: existing[k] 

1218 for k, v in compared.items() 

1219 if safeNotEqual(existing[k], v) 

1220 } 

1221 else: 

1222 inconsistencies = {} 

1223 if returning is not None: 

1224 toReturn: Optional[list] = [existing[k] for k in returning] 

1225 else: 

1226 toReturn = None 

1227 return 1, inconsistencies, toReturn 

1228 

1229 def format_bad(inconsistencies: Dict[str, Any]) -> str: 

1230 """Format the 'bad' dictionary of existing values returned by 

1231 ``check`` into a string suitable for an error message. 

1232 """ 

1233 assert compared is not None, "Should not be able to get inconsistencies without comparing." 

1234 return ", ".join(f"{k}: {v!r} != {compared[k]!r}" for k, v in inconsistencies.items()) 

1235 

1236 if self.isTableWriteable(table): 

1237 # Try an insert first, but allow it to fail (in only specific 

1238 # ways). 

1239 row = keys.copy() 

1240 if compared is not None: 

1241 row.update(compared) 

1242 if extra is not None: 

1243 row.update(extra) 

1244 with self.transaction(): 

1245 inserted = bool(self.ensure(table, row)) 

1246 inserted_or_updated: Union[bool, Dict[str, Any]] 

1247 # Need to perform check() for this branch inside the 

1248 # transaction, so we roll back an insert that didn't do 

1249 # what we expected. That limits the extent to which we 

1250 # can reduce duplication between this block and the other 

1251 # ones that perform similar logic. 

1252 n, bad, result = check() 

1253 if n < 1: 

1254 raise ConflictingDefinitionError( 

1255 f"Attempted to ensure {row} exists by inserting it with ON CONFLICT IGNORE, " 

1256 f"but a post-insert query on {keys} returned no results. " 

1257 f"Insert was {'' if inserted else 'not '}reported as successful. " 

1258 "This can occur if the insert violated a database constraint other than the " 

1259 "unique constraint or primary key used to identify the row in this call." 

1260 ) 

1261 elif n > 1: 

1262 raise RuntimeError(f"Keys passed to sync {keys.keys()} do not comprise a " 

1263 f"unique constraint for table {table.name}.") 

1264 elif bad: 

1265 assert compared is not None, \ 

1266 "Should not be able to get inconsistencies without comparing." 

1267 if inserted: 

1268 raise RuntimeError( 

1269 f"Conflict ({bad}) in sync after successful insert; this is " 

1270 "possible if the same table is being updated by a concurrent " 

1271 "process that isn't using sync, but it may also be a bug in " 

1272 "daf_butler." 

1273 ) 

1274 elif update: 

1275 with self._connection() as connection: 

1276 connection.execute( 

1277 table.update().where( 

1278 sqlalchemy.sql.and_(*[table.columns[k] == v for k, v in keys.items()]) 

1279 ).values( 

1280 **{k: compared[k] for k in bad.keys()} 

1281 ) 

1282 ) 

1283 inserted_or_updated = bad 

1284 else: 

1285 raise DatabaseConflictError( 

1286 f"Conflict in sync for table {table.name} on column(s) {format_bad(bad)}." 

1287 ) 

1288 else: 

1289 inserted_or_updated = inserted 

1290 else: 

1291 # Database is not writeable; just see if the row exists. 

1292 n, bad, result = check() 

1293 if n < 1: 

1294 raise ReadOnlyDatabaseError("sync needs to insert, but database is read-only.") 

1295 elif n > 1: 

1296 raise RuntimeError("Keys passed to sync do not comprise a unique constraint.") 

1297 elif bad: 

1298 if update: 

1299 raise ReadOnlyDatabaseError("sync needs to update, but database is read-only.") 

1300 else: 

1301 raise DatabaseConflictError( 

1302 f"Conflict in sync for table {table.name} on column(s) {format_bad(bad)}." 

1303 ) 

1304 inserted_or_updated = False 

1305 if returning is None: 

1306 return None, inserted_or_updated 

1307 else: 

1308 assert result is not None 

1309 return {k: v for k, v in zip(returning, result)}, inserted_or_updated 

1310 

1311 def insert(self, table: sqlalchemy.schema.Table, *rows: dict, returnIds: bool = False, 

1312 select: Optional[sqlalchemy.sql.Select] = None, 

1313 names: Optional[Iterable[str]] = None, 

1314 ) -> Optional[List[int]]: 

1315 """Insert one or more rows into a table, optionally returning 

1316 autoincrement primary key values. 

1317 

1318 Parameters 

1319 ---------- 

1320 table : `sqlalchemy.schema.Table` 

1321 Table rows should be inserted into. 

1322 returnIds: `bool` 

1323 If `True` (`False` is default), return the values of the table's 

1324 autoincrement primary key field (which much exist). 

1325 select : `sqlalchemy.sql.Select`, optional 

1326 A SELECT query expression to insert rows from. Cannot be provided 

1327 with either ``rows`` or ``returnIds=True``. 

1328 names : `Iterable` [ `str` ], optional 

1329 Names of columns in ``table`` to be populated, ordered to match the 

1330 columns returned by ``select``. Ignored if ``select`` is `None`. 

1331 If not provided, the columns returned by ``select`` must be named 

1332 to match the desired columns of ``table``. 

1333 *rows 

1334 Positional arguments are the rows to be inserted, as dictionaries 

1335 mapping column name to value. The keys in all dictionaries must 

1336 be the same. 

1337 

1338 Returns 

1339 ------- 

1340 ids : `None`, or `list` of `int` 

1341 If ``returnIds`` is `True`, a `list` containing the inserted 

1342 values for the table's autoincrement primary key. 

1343 

1344 Raises 

1345 ------ 

1346 ReadOnlyDatabaseError 

1347 Raised if `isWriteable` returns `False` when this method is called. 

1348 

1349 Notes 

1350 ----- 

1351 The default implementation uses bulk insert syntax when ``returnIds`` 

1352 is `False`, and a loop over single-row insert operations when it is 

1353 `True`. 

1354 

1355 Derived classes should reimplement when they can provide a more 

1356 efficient implementation (especially for the latter case). 

1357 

1358 May be used inside transaction contexts, so implementations may not 

1359 perform operations that interrupt transactions. 

1360 """ 

1361 self.assertTableWriteable(table, f"Cannot insert into read-only table {table}.") 

1362 if select is not None and (rows or returnIds): 

1363 raise TypeError("'select' is incompatible with passing value rows or returnIds=True.") 

1364 if not rows and select is None: 

1365 if returnIds: 

1366 return [] 

1367 else: 

1368 return None 

1369 with self._connection() as connection: 

1370 if not returnIds: 

1371 if select is not None: 

1372 if names is None: 

1373 # columns() is deprecated since 1.4, but 

1374 # selected_columns() method did not exist in 1.3. 

1375 if hasattr(select, "selected_columns"): 

1376 names = select.selected_columns.keys() 

1377 else: 

1378 names = select.columns.keys() 

1379 connection.execute(table.insert().from_select(names, select)) 

1380 else: 

1381 connection.execute(table.insert(), rows) 

1382 return None 

1383 else: 

1384 sql = table.insert() 

1385 return [connection.execute(sql, row).inserted_primary_key[0] for row in rows] 

1386 

1387 @abstractmethod 

1388 def replace(self, table: sqlalchemy.schema.Table, *rows: dict) -> None: 

1389 """Insert one or more rows into a table, replacing any existing rows 

1390 for which insertion of a new row would violate the primary key 

1391 constraint. 

1392 

1393 Parameters 

1394 ---------- 

1395 table : `sqlalchemy.schema.Table` 

1396 Table rows should be inserted into. 

1397 *rows 

1398 Positional arguments are the rows to be inserted, as dictionaries 

1399 mapping column name to value. The keys in all dictionaries must 

1400 be the same. 

1401 

1402 Raises 

1403 ------ 

1404 ReadOnlyDatabaseError 

1405 Raised if `isWriteable` returns `False` when this method is called. 

1406 

1407 Notes 

1408 ----- 

1409 May be used inside transaction contexts, so implementations may not 

1410 perform operations that interrupt transactions. 

1411 

1412 Implementations should raise a `sqlalchemy.exc.IntegrityError` 

1413 exception when a constraint other than the primary key would be 

1414 violated. 

1415 

1416 Implementations are not required to support `replace` on tables 

1417 with autoincrement keys. 

1418 """ 

1419 raise NotImplementedError() 

1420 

1421 @abstractmethod 

1422 def ensure(self, table: sqlalchemy.schema.Table, *rows: dict) -> int: 

1423 """Insert one or more rows into a table, skipping any rows for which 

1424 insertion would violate any constraint. 

1425 

1426 Parameters 

1427 ---------- 

1428 table : `sqlalchemy.schema.Table` 

1429 Table rows should be inserted into. 

1430 *rows 

1431 Positional arguments are the rows to be inserted, as dictionaries 

1432 mapping column name to value. The keys in all dictionaries must 

1433 be the same. 

1434 

1435 Returns 

1436 ------- 

1437 count : `int` 

1438 The number of rows actually inserted. 

1439 

1440 Raises 

1441 ------ 

1442 ReadOnlyDatabaseError 

1443 Raised if `isWriteable` returns `False` when this method is called. 

1444 This is raised even if the operation would do nothing even on a 

1445 writeable database. 

1446 

1447 Notes 

1448 ----- 

1449 May be used inside transaction contexts, so implementations may not 

1450 perform operations that interrupt transactions. 

1451 

1452 Implementations are not required to support `ensure` on tables 

1453 with autoincrement keys. 

1454 """ 

1455 raise NotImplementedError() 

1456 

1457 def delete(self, table: sqlalchemy.schema.Table, columns: Iterable[str], *rows: dict) -> int: 

1458 """Delete one or more rows from a table. 

1459 

1460 Parameters 

1461 ---------- 

1462 table : `sqlalchemy.schema.Table` 

1463 Table that rows should be deleted from. 

1464 columns: `~collections.abc.Iterable` of `str` 

1465 The names of columns that will be used to constrain the rows to 

1466 be deleted; these will be combined via ``AND`` to form the 

1467 ``WHERE`` clause of the delete query. 

1468 *rows 

1469 Positional arguments are the keys of rows to be deleted, as 

1470 dictionaries mapping column name to value. The keys in all 

1471 dictionaries must be exactly the names in ``columns``. 

1472 

1473 Returns 

1474 ------- 

1475 count : `int` 

1476 Number of rows deleted. 

1477 

1478 Raises 

1479 ------ 

1480 ReadOnlyDatabaseError 

1481 Raised if `isWriteable` returns `False` when this method is called. 

1482 

1483 Notes 

1484 ----- 

1485 May be used inside transaction contexts, so implementations may not 

1486 perform operations that interrupt transactions. 

1487 

1488 The default implementation should be sufficient for most derived 

1489 classes. 

1490 """ 

1491 self.assertTableWriteable(table, f"Cannot delete from read-only table {table}.") 

1492 if columns and not rows: 

1493 # If there are no columns, this operation is supposed to delete 

1494 # everything (so we proceed as usual). But if there are columns, 

1495 # but no rows, it was a constrained bulk operation where the 

1496 # constraint is that no rows match, and we should short-circuit 

1497 # while reporting that no rows were affected. 

1498 return 0 

1499 sql = table.delete() 

1500 columns = list(columns) # Force iterators to list 

1501 

1502 # More efficient to use IN operator if there is only one 

1503 # variable changing across all rows. 

1504 content: Dict[str, Set] = defaultdict(set) 

1505 if len(columns) == 1: 

1506 # Nothing to calculate since we can always use IN 

1507 column = columns[0] 

1508 changing_columns = [column] 

1509 content[column] = set(row[column] for row in rows) 

1510 else: 

1511 for row in rows: 

1512 for k, v in row.items(): 

1513 content[k].add(v) 

1514 changing_columns = [col for col, values in content.items() if len(values) > 1] 

1515 

1516 if len(changing_columns) != 1: 

1517 # More than one column changes each time so do explicit bind 

1518 # parameters and have each row processed separately. 

1519 whereTerms = [table.columns[name] == sqlalchemy.sql.bindparam(name) for name in columns] 

1520 if whereTerms: 

1521 sql = sql.where(sqlalchemy.sql.and_(*whereTerms)) 

1522 with self._connection() as connection: 

1523 return connection.execute(sql, rows).rowcount 

1524 else: 

1525 # One of the columns has changing values but any others are 

1526 # fixed. In this case we can use an IN operator and be more 

1527 # efficient. 

1528 name = changing_columns.pop() 

1529 

1530 # Simple where clause for the unchanging columns 

1531 clauses = [] 

1532 for k, v in content.items(): 

1533 if k == name: 

1534 continue 

1535 column = table.columns[k] 

1536 # The set only has one element 

1537 clauses.append(column == v.pop()) 

1538 

1539 # The IN operator will not work for "infinite" numbers of 

1540 # rows so must batch it up into distinct calls. 

1541 in_content = list(content[name]) 

1542 n_elements = len(in_content) 

1543 

1544 rowcount = 0 

1545 iposn = 0 

1546 n_per_loop = 1_000 # Controls how many items to put in IN clause 

1547 with self._connection() as connection: 

1548 for iposn in range(0, n_elements, n_per_loop): 

1549 endpos = iposn + n_per_loop 

1550 in_clause = table.columns[name].in_(in_content[iposn:endpos]) 

1551 

1552 newsql = sql.where(sqlalchemy.sql.and_(*clauses, in_clause)) 

1553 rowcount += connection.execute(newsql).rowcount 

1554 return rowcount 

1555 

1556 def update(self, table: sqlalchemy.schema.Table, where: Dict[str, str], *rows: dict) -> int: 

1557 """Update one or more rows in a table. 

1558 

1559 Parameters 

1560 ---------- 

1561 table : `sqlalchemy.schema.Table` 

1562 Table containing the rows to be updated. 

1563 where : `dict` [`str`, `str`] 

1564 A mapping from the names of columns that will be used to search for 

1565 existing rows to the keys that will hold these values in the 

1566 ``rows`` dictionaries. Note that these may not be the same due to 

1567 SQLAlchemy limitations. 

1568 *rows 

1569 Positional arguments are the rows to be updated. The keys in all 

1570 dictionaries must be the same, and may correspond to either a 

1571 value in the ``where`` dictionary or the name of a column to be 

1572 updated. 

1573 

1574 Returns 

1575 ------- 

1576 count : `int` 

1577 Number of rows matched (regardless of whether the update actually 

1578 modified them). 

1579 

1580 Raises 

1581 ------ 

1582 ReadOnlyDatabaseError 

1583 Raised if `isWriteable` returns `False` when this method is called. 

1584 

1585 Notes 

1586 ----- 

1587 May be used inside transaction contexts, so implementations may not 

1588 perform operations that interrupt transactions. 

1589 

1590 The default implementation should be sufficient for most derived 

1591 classes. 

1592 """ 

1593 self.assertTableWriteable(table, f"Cannot update read-only table {table}.") 

1594 if not rows: 

1595 return 0 

1596 sql = table.update().where( 

1597 sqlalchemy.sql.and_(*[table.columns[k] == sqlalchemy.sql.bindparam(v) for k, v in where.items()]) 

1598 ) 

1599 with self._connection() as connection: 

1600 return connection.execute(sql, rows).rowcount 

1601 

1602 def query(self, sql: sqlalchemy.sql.FromClause, 

1603 *args: Any, **kwargs: Any) -> sqlalchemy.engine.ResultProxy: 

1604 """Run a SELECT query against the database. 

1605 

1606 Parameters 

1607 ---------- 

1608 sql : `sqlalchemy.sql.FromClause` 

1609 A SQLAlchemy representation of a ``SELECT`` query. 

1610 *args 

1611 Additional positional arguments are forwarded to 

1612 `sqlalchemy.engine.Connection.execute`. 

1613 **kwargs 

1614 Additional keyword arguments are forwarded to 

1615 `sqlalchemy.engine.Connection.execute`. 

1616 

1617 Returns 

1618 ------- 

1619 result : `sqlalchemy.engine.ResultProxy` 

1620 Query results. 

1621 

1622 Notes 

1623 ----- 

1624 The default implementation should be sufficient for most derived 

1625 classes. 

1626 """ 

1627 # We are returning a Result object so we need to take care of 

1628 # connection lifetime. If this is happening in transaction context 

1629 # then just use existing connection, otherwise make a special 

1630 # connection which will be closed when result is closed. 

1631 # 

1632 # TODO: May be better approach would be to make this method return a 

1633 # context manager, but this means big changes for callers of this 

1634 # method. 

1635 if self._session_connection is not None: 

1636 connection = self._session_connection 

1637 else: 

1638 connection = self._engine.connect(close_with_result=True) 

1639 # TODO: should we guard against non-SELECT queries here? 

1640 return connection.execute(sql, *args, **kwargs) 

1641 

1642 origin: int 

1643 """An integer ID that should be used as the default for any datasets, 

1644 quanta, or other entities that use a (autoincrement, origin) compound 

1645 primary key (`int`). 

1646 """ 

1647 

1648 namespace: Optional[str] 

1649 """The schema or namespace this database instance is associated with 

1650 (`str` or `None`). 

1651 """