Coverage for python/lsst/daf/butler/registry/interfaces/_database.py: 16%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

401 statements  

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = [ 

24 "Database", 

25 "ReadOnlyDatabaseError", 

26 "DatabaseConflictError", 

27 "SchemaAlreadyDefinedError", 

28 "StaticTablesContext", 

29] 

30 

31import uuid 

32import warnings 

33from abc import ABC, abstractmethod 

34from collections import defaultdict 

35from contextlib import contextmanager 

36from typing import Any, Callable, Dict, Iterable, Iterator, List, Optional, Sequence, Set, Tuple, Type, Union 

37 

38import astropy.time 

39import sqlalchemy 

40 

41from ...core import SpatialRegionDatabaseRepresentation, TimespanDatabaseRepresentation, ddl, time_utils 

42from .._exceptions import ConflictingDefinitionError 

43 

44_IN_SAVEPOINT_TRANSACTION = "IN_SAVEPOINT_TRANSACTION" 

45 

46 

47def _checkExistingTableDefinition(name: str, spec: ddl.TableSpec, inspection: List[Dict[str, Any]]) -> None: 

48 """Test that the definition of a table in a `ddl.TableSpec` and from 

49 database introspection are consistent. 

50 

51 Parameters 

52 ---------- 

53 name : `str` 

54 Name of the table (only used in error messages). 

55 spec : `ddl.TableSpec` 

56 Specification of the table. 

57 inspection : `dict` 

58 Dictionary returned by 

59 `sqlalchemy.engine.reflection.Inspector.get_columns`. 

60 

61 Raises 

62 ------ 

63 DatabaseConflictError 

64 Raised if the definitions are inconsistent. 

65 """ 

66 columnNames = [c["name"] for c in inspection] 

67 if spec.fields.names != set(columnNames): 

68 raise DatabaseConflictError( 

69 f"Table '{name}' exists but is defined differently in the database; " 

70 f"specification has columns {list(spec.fields.names)}, while the " 

71 f"table in the database has {columnNames}." 

72 ) 

73 

74 

75class ReadOnlyDatabaseError(RuntimeError): 

76 """Exception raised when a write operation is called on a read-only 

77 `Database`. 

78 """ 

79 

80 

81class DatabaseConflictError(ConflictingDefinitionError): 

82 """Exception raised when database content (row values or schema entities) 

83 are inconsistent with what this client expects. 

84 """ 

85 

86 

87class SchemaAlreadyDefinedError(RuntimeError): 

88 """Exception raised when trying to initialize database schema when some 

89 tables already exist. 

90 """ 

91 

92 

93class StaticTablesContext: 

94 """Helper class used to declare the static schema for a registry layer 

95 in a database. 

96 

97 An instance of this class is returned by `Database.declareStaticTables`, 

98 which should be the only way it should be constructed. 

99 """ 

100 

101 def __init__(self, db: Database): 

102 self._db = db 

103 self._foreignKeys: List[Tuple[sqlalchemy.schema.Table, sqlalchemy.schema.ForeignKeyConstraint]] = [] 

104 self._inspector = sqlalchemy.inspect(self._db._engine) 

105 self._tableNames = frozenset(self._inspector.get_table_names(schema=self._db.namespace)) 

106 self._initializers: List[Callable[[Database], None]] = [] 

107 

108 def addTable(self, name: str, spec: ddl.TableSpec) -> sqlalchemy.schema.Table: 

109 """Add a new table to the schema, returning its sqlalchemy 

110 representation. 

111 

112 The new table may not actually be created until the end of the 

113 context created by `Database.declareStaticTables`, allowing tables 

114 to be declared in any order even in the presence of foreign key 

115 relationships. 

116 """ 

117 name = self._db._mangleTableName(name) 

118 if name in self._tableNames: 

119 _checkExistingTableDefinition( 

120 name, spec, self._inspector.get_columns(name, schema=self._db.namespace) 

121 ) 

122 table = self._db._convertTableSpec(name, spec, self._db._metadata) 

123 for foreignKeySpec in spec.foreignKeys: 

124 self._foreignKeys.append( 

125 (table, self._db._convertForeignKeySpec(name, foreignKeySpec, self._db._metadata)) 

126 ) 

127 return table 

128 

129 def addTableTuple(self, specs: Tuple[ddl.TableSpec, ...]) -> Tuple[sqlalchemy.schema.Table, ...]: 

130 """Add a named tuple of tables to the schema, returning their 

131 SQLAlchemy representations in a named tuple of the same type. 

132 

133 The new tables may not actually be created until the end of the 

134 context created by `Database.declareStaticTables`, allowing tables 

135 to be declared in any order even in the presence of foreign key 

136 relationships. 

137 

138 Notes 

139 ----- 

140 ``specs`` *must* be an instance of a type created by 

141 `collections.namedtuple`, not just regular tuple, and the returned 

142 object is guaranteed to be the same. Because `~collections.namedtuple` 

143 is just a factory for `type` objects, not an actual type itself, 

144 we cannot represent this with type annotations. 

145 """ 

146 return specs._make( # type: ignore 

147 self.addTable(name, spec) for name, spec in zip(specs._fields, specs) # type: ignore 

148 ) 

149 

150 def addInitializer(self, initializer: Callable[[Database], None]) -> None: 

151 """Add a method that does one-time initialization of a database. 

152 

153 Initialization can mean anything that changes state of a database 

154 and needs to be done exactly once after database schema was created. 

155 An example for that could be population of schema attributes. 

156 

157 Parameters 

158 ---------- 

159 initializer : callable 

160 Method of a single argument which is a `Database` instance. 

161 """ 

162 self._initializers.append(initializer) 

163 

164 

165class Session: 

166 """Class representing a persistent connection to a database. 

167 

168 Parameters 

169 ---------- 

170 db : `Database` 

171 Database instance. 

172 

173 Notes 

174 ----- 

175 Instances of Session class should not be created by client code; 

176 `Database.session` should be used to create context for a session:: 

177 

178 with db.session() as session: 

179 session.method() 

180 db.method() 

181 

182 In the current implementation sessions can be nested and transactions can 

183 be nested within a session. All nested sessions and transaction share the 

184 same database connection. 

185 

186 Session class represents a limited subset of database API that requires 

187 persistent connection to a database (e.g. temporary tables which have 

188 lifetime of a session). Potentially most of the database API could be 

189 associated with a Session class. 

190 """ 

191 

192 def __init__(self, db: Database): 

193 self._db = db 

194 

195 def makeTemporaryTable(self, spec: ddl.TableSpec, name: Optional[str] = None) -> sqlalchemy.schema.Table: 

196 """Create a temporary table. 

197 

198 Parameters 

199 ---------- 

200 spec : `TableSpec` 

201 Specification for the table. 

202 name : `str`, optional 

203 A unique (within this session/connetion) name for the table. 

204 Subclasses may override to modify the actual name used. If not 

205 provided, a unique name will be generated. 

206 

207 Returns 

208 ------- 

209 table : `sqlalchemy.schema.Table` 

210 SQLAlchemy representation of the table. 

211 

212 Notes 

213 ----- 

214 Temporary tables may be created, dropped, and written to even in 

215 read-only databases - at least according to the Python-level 

216 protections in the `Database` classes. Server permissions may say 

217 otherwise, but in that case they probably need to be modified to 

218 support the full range of expected read-only butler behavior. 

219 

220 Temporary table rows are guaranteed to be dropped when a connection is 

221 closed. `Database` implementations are permitted to allow the table to 

222 remain as long as this is transparent to the user (i.e. "creating" the 

223 temporary table in a new session should not be an error, even if it 

224 does nothing). 

225 

226 It may not be possible to use temporary tables within transactions with 

227 some database engines (or configurations thereof). 

228 """ 

229 if name is None: 

230 name = f"tmp_{uuid.uuid4().hex}" 

231 table = self._db._convertTableSpec( 

232 name, spec, self._db._metadata, prefixes=["TEMPORARY"], schema=sqlalchemy.schema.BLANK_SCHEMA 

233 ) 

234 if table.key in self._db._tempTables: 

235 if table.key != name: 

236 raise ValueError( 

237 f"A temporary table with name {name} (transformed to {table.key} by " 

238 f"Database) already exists." 

239 ) 

240 for foreignKeySpec in spec.foreignKeys: 

241 table.append_constraint(self._db._convertForeignKeySpec(name, foreignKeySpec, self._db._metadata)) 

242 with self._db._connection() as connection: 

243 table.create(connection) 

244 self._db._tempTables.add(table.key) 

245 return table 

246 

247 def dropTemporaryTable(self, table: sqlalchemy.schema.Table) -> None: 

248 """Drop a temporary table. 

249 

250 Parameters 

251 ---------- 

252 table : `sqlalchemy.schema.Table` 

253 A SQLAlchemy object returned by a previous call to 

254 `makeTemporaryTable`. 

255 """ 

256 if table.key in self._db._tempTables: 

257 with self._db._connection() as connection: 

258 table.drop(connection) 

259 self._db._tempTables.remove(table.key) 

260 else: 

261 raise TypeError(f"Table {table.key} was not created by makeTemporaryTable.") 

262 

263 

264class Database(ABC): 

265 """An abstract interface that represents a particular database engine's 

266 representation of a single schema/namespace/database. 

267 

268 Parameters 

269 ---------- 

270 origin : `int` 

271 An integer ID that should be used as the default for any datasets, 

272 quanta, or other entities that use a (autoincrement, origin) compound 

273 primary key. 

274 engine : `sqlalchemy.engine.Engine` 

275 The SQLAlchemy engine for this `Database`. 

276 namespace : `str`, optional 

277 Name of the schema or namespace this instance is associated with. 

278 This is passed as the ``schema`` argument when constructing a 

279 `sqlalchemy.schema.MetaData` instance. We use ``namespace`` instead to 

280 avoid confusion between "schema means namespace" and "schema means 

281 table definitions". 

282 

283 Notes 

284 ----- 

285 `Database` requires all write operations to go through its special named 

286 methods. Our write patterns are sufficiently simple that we don't really 

287 need the full flexibility of SQL insert/update/delete syntax, and we need 

288 non-standard (but common) functionality in these operations sufficiently 

289 often that it seems worthwhile to provide our own generic API. 

290 

291 In contrast, `Database.query` allows arbitrary ``SELECT`` queries (via 

292 their SQLAlchemy representation) to be run, as we expect these to require 

293 significantly more sophistication while still being limited to standard 

294 SQL. 

295 

296 `Database` itself has several underscore-prefixed attributes: 

297 

298 - ``_engine``: SQLAlchemy object representing its engine. 

299 - ``_connection``: method returning a context manager for 

300 `sqlalchemy.engine.Connection` object. 

301 - ``_metadata``: the `sqlalchemy.schema.MetaData` object representing 

302 the tables and other schema entities. 

303 

304 These are considered protected (derived classes may access them, but other 

305 code should not), and read-only, aside from executing SQL via 

306 ``_connection``. 

307 """ 

308 

309 def __init__(self, *, origin: int, engine: sqlalchemy.engine.Engine, namespace: Optional[str] = None): 

310 self.origin = origin 

311 self.namespace = namespace 

312 self._engine = engine 

313 self._session_connection: Optional[sqlalchemy.engine.Connection] = None 

314 self._metadata: Optional[sqlalchemy.schema.MetaData] = None 

315 self._tempTables: Set[str] = set() 

316 

317 def __repr__(self) -> str: 

318 # Rather than try to reproduce all the parameters used to create 

319 # the object, instead report the more useful information of the 

320 # connection URL. 

321 if self._engine.url.password is not None: 

322 uri = str(self._engine.url.set(password="***")) 

323 else: 

324 uri = str(self._engine.url) 

325 if self.namespace: 

326 uri += f"#{self.namespace}" 

327 return f'{type(self).__name__}("{uri}")' 

328 

329 @classmethod 

330 def makeDefaultUri(cls, root: str) -> Optional[str]: 

331 """Create a default connection URI appropriate for the given root 

332 directory, or `None` if there can be no such default. 

333 """ 

334 return None 

335 

336 @classmethod 

337 def fromUri( 

338 cls, uri: str, *, origin: int, namespace: Optional[str] = None, writeable: bool = True 

339 ) -> Database: 

340 """Construct a database from a SQLAlchemy URI. 

341 

342 Parameters 

343 ---------- 

344 uri : `str` 

345 A SQLAlchemy URI connection string. 

346 origin : `int` 

347 An integer ID that should be used as the default for any datasets, 

348 quanta, or other entities that use a (autoincrement, origin) 

349 compound primary key. 

350 namespace : `str`, optional 

351 A database namespace (i.e. schema) the new instance should be 

352 associated with. If `None` (default), the namespace (if any) is 

353 inferred from the URI. 

354 writeable : `bool`, optional 

355 If `True`, allow write operations on the database, including 

356 ``CREATE TABLE``. 

357 

358 Returns 

359 ------- 

360 db : `Database` 

361 A new `Database` instance. 

362 """ 

363 return cls.fromEngine( 

364 cls.makeEngine(uri, writeable=writeable), origin=origin, namespace=namespace, writeable=writeable 

365 ) 

366 

367 @classmethod 

368 @abstractmethod 

369 def makeEngine(cls, uri: str, *, writeable: bool = True) -> sqlalchemy.engine.Engine: 

370 """Create a `sqlalchemy.engine.Engine` from a SQLAlchemy URI. 

371 

372 Parameters 

373 ---------- 

374 uri : `str` 

375 A SQLAlchemy URI connection string. 

376 writeable : `bool`, optional 

377 If `True`, allow write operations on the database, including 

378 ``CREATE TABLE``. 

379 

380 Returns 

381 ------- 

382 engine : `sqlalchemy.engine.Engine` 

383 A database engine. 

384 

385 Notes 

386 ----- 

387 Subclasses that support other ways to connect to a database are 

388 encouraged to add optional arguments to their implementation of this 

389 method, as long as they maintain compatibility with the base class 

390 call signature. 

391 """ 

392 raise NotImplementedError() 

393 

394 @classmethod 

395 @abstractmethod 

396 def fromEngine( 

397 cls, 

398 engine: sqlalchemy.engine.Engine, 

399 *, 

400 origin: int, 

401 namespace: Optional[str] = None, 

402 writeable: bool = True, 

403 ) -> Database: 

404 """Create a new `Database` from an existing `sqlalchemy.engine.Engine`. 

405 

406 Parameters 

407 ---------- 

408 engine : `sqlalchemy.engine.Engine` 

409 The engine for the database. May be shared between `Database` 

410 instances. 

411 origin : `int` 

412 An integer ID that should be used as the default for any datasets, 

413 quanta, or other entities that use a (autoincrement, origin) 

414 compound primary key. 

415 namespace : `str`, optional 

416 A different database namespace (i.e. schema) the new instance 

417 should be associated with. If `None` (default), the namespace 

418 (if any) is inferred from the connection. 

419 writeable : `bool`, optional 

420 If `True`, allow write operations on the database, including 

421 ``CREATE TABLE``. 

422 

423 Returns 

424 ------- 

425 db : `Database` 

426 A new `Database` instance. 

427 

428 Notes 

429 ----- 

430 This method allows different `Database` instances to share the same 

431 engine, which is desirable when they represent different namespaces 

432 can be queried together. 

433 """ 

434 raise NotImplementedError() 

435 

436 @contextmanager 

437 def session(self) -> Iterator: 

438 """Return a context manager that represents a session (persistent 

439 connection to a database). 

440 """ 

441 if self._session_connection is not None: 

442 # session already started, just reuse that 

443 yield Session(self) 

444 else: 

445 try: 

446 # open new connection and close it when done 

447 self._session_connection = self._engine.connect() 

448 yield Session(self) 

449 finally: 

450 if self._session_connection is not None: 

451 self._session_connection.close() 

452 self._session_connection = None 

453 # Temporary tables only live within session 

454 self._tempTables = set() 

455 

456 @contextmanager 

457 def transaction( 

458 self, 

459 *, 

460 interrupting: bool = False, 

461 savepoint: bool = False, 

462 lock: Iterable[sqlalchemy.schema.Table] = (), 

463 ) -> Iterator: 

464 """Return a context manager that represents a transaction. 

465 

466 Parameters 

467 ---------- 

468 interrupting : `bool`, optional 

469 If `True` (`False` is default), this transaction block may not be 

470 nested without an outer one, and attempting to do so is a logic 

471 (i.e. assertion) error. 

472 savepoint : `bool`, optional 

473 If `True` (`False` is default), create a `SAVEPOINT`, allowing 

474 exceptions raised by the database (e.g. due to constraint 

475 violations) during this transaction's context to be caught outside 

476 it without also rolling back all operations in an outer transaction 

477 block. If `False`, transactions may still be nested, but a 

478 rollback may be generated at any level and affects all levels, and 

479 commits are deferred until the outermost block completes. If any 

480 outer transaction block was created with ``savepoint=True``, all 

481 inner blocks will be as well (regardless of the actual value 

482 passed). This has no effect if this is the outermost transaction. 

483 lock : `Iterable` [ `sqlalchemy.schema.Table` ], optional 

484 A list of tables to lock for the duration of this transaction. 

485 These locks are guaranteed to prevent concurrent writes and allow 

486 this transaction (only) to acquire the same locks (others should 

487 block), but only prevent concurrent reads if the database engine 

488 requires that in order to block concurrent writes. 

489 

490 Notes 

491 ----- 

492 All transactions on a connection managed by one or more `Database` 

493 instances _must_ go through this method, or transaction state will not 

494 be correctly managed. 

495 """ 

496 # need a connection, use session to manage it 

497 with self.session(): 

498 assert self._session_connection is not None 

499 connection = self._session_connection 

500 assert not (interrupting and connection.in_transaction()), ( 

501 "Logic error in transaction nesting: an operation that would " 

502 "interrupt the active transaction context has been requested." 

503 ) 

504 # We remember whether we are already in a SAVEPOINT transaction via 

505 # the connection object's 'info' dict, which is explicitly for user 

506 # information like this. This is safer than a regular `Database` 

507 # instance attribute, because it guards against multiple `Database` 

508 # instances sharing the same connection. The need to use our own 

509 # flag here to track whether we're in a nested transaction should 

510 # go away in SQLAlchemy 1.4, which seems to have a 

511 # `Connection.in_nested_transaction()` method. 

512 savepoint = savepoint or connection.info.get(_IN_SAVEPOINT_TRANSACTION, False) 

513 connection.info[_IN_SAVEPOINT_TRANSACTION] = savepoint 

514 if connection.in_transaction() and savepoint: 

515 trans = connection.begin_nested() 

516 elif not connection.in_transaction(): 

517 # Use a regular (non-savepoint) transaction always for the 

518 # outermost context. 

519 trans = connection.begin() 

520 else: 

521 # Nested non-savepoint transactions, don't do anything. 

522 trans = None 

523 self._lockTables(connection, lock) 

524 try: 

525 yield 

526 if trans is not None: 

527 trans.commit() 

528 except BaseException: 

529 if trans is not None: 

530 trans.rollback() 

531 raise 

532 finally: 

533 if not connection.in_transaction(): 

534 connection.info.pop(_IN_SAVEPOINT_TRANSACTION, None) 

535 

536 @contextmanager 

537 def _connection(self) -> Iterator[sqlalchemy.engine.Connection]: 

538 """Return context manager for Connection.""" 

539 if self._session_connection is not None: 

540 # It means that we are in Session context, but we may not be in 

541 # transaction context. Start a short transaction in that case. 

542 if self._session_connection.in_transaction(): 

543 yield self._session_connection 

544 else: 

545 with self._session_connection.begin(): 

546 yield self._session_connection 

547 else: 

548 # Make new connection and transaction, transaction will be 

549 # committed on context exit. 

550 with self._engine.begin() as connection: 

551 yield connection 

552 

553 @abstractmethod 

554 def _lockTables( 

555 self, connection: sqlalchemy.engine.Connection, tables: Iterable[sqlalchemy.schema.Table] = () 

556 ) -> None: 

557 """Acquire locks on the given tables. 

558 

559 This is an implementation hook for subclasses, called by `transaction`. 

560 It should not be called directly by other code. 

561 

562 Parameters 

563 ---------- 

564 connection : `sqlalchemy.engine.Connection` 

565 Database connection object. It is guaranteed that transaction is 

566 already in a progress for this connection. 

567 tables : `Iterable` [ `sqlalchemy.schema.Table` ], optional 

568 A list of tables to lock for the duration of this transaction. 

569 These locks are guaranteed to prevent concurrent writes and allow 

570 this transaction (only) to acquire the same locks (others should 

571 block), but only prevent concurrent reads if the database engine 

572 requires that in order to block concurrent writes. 

573 """ 

574 raise NotImplementedError() 

575 

576 def isTableWriteable(self, table: sqlalchemy.schema.Table) -> bool: 

577 """Check whether a table is writeable, either because the database 

578 connection is read-write or the table is a temporary table. 

579 

580 Parameters 

581 ---------- 

582 table : `sqlalchemy.schema.Table` 

583 SQLAlchemy table object to check. 

584 

585 Returns 

586 ------- 

587 writeable : `bool` 

588 Whether this table is writeable. 

589 """ 

590 return self.isWriteable() or table.key in self._tempTables 

591 

592 def assertTableWriteable(self, table: sqlalchemy.schema.Table, msg: str) -> None: 

593 """Raise if the given table is not writeable, either because the 

594 database connection is read-write or the table is a temporary table. 

595 

596 Parameters 

597 ---------- 

598 table : `sqlalchemy.schema.Table` 

599 SQLAlchemy table object to check. 

600 msg : `str`, optional 

601 If provided, raise `ReadOnlyDatabaseError` instead of returning 

602 `False`, with this message. 

603 """ 

604 if not self.isTableWriteable(table): 

605 raise ReadOnlyDatabaseError(msg) 

606 

607 @contextmanager 

608 def declareStaticTables(self, *, create: bool) -> Iterator[StaticTablesContext]: 

609 """Return a context manager in which the database's static DDL schema 

610 can be declared. 

611 

612 Parameters 

613 ---------- 

614 create : `bool` 

615 If `True`, attempt to create all tables at the end of the context. 

616 If `False`, they will be assumed to already exist. 

617 

618 Returns 

619 ------- 

620 schema : `StaticTablesContext` 

621 A helper object that is used to add new tables. 

622 

623 Raises 

624 ------ 

625 ReadOnlyDatabaseError 

626 Raised if ``create`` is `True`, `Database.isWriteable` is `False`, 

627 and one or more declared tables do not already exist. 

628 

629 Examples 

630 -------- 

631 Given a `Database` instance ``db``:: 

632 

633 with db.declareStaticTables(create=True) as schema: 

634 schema.addTable("table1", TableSpec(...)) 

635 schema.addTable("table2", TableSpec(...)) 

636 

637 Notes 

638 ----- 

639 A database's static DDL schema must be declared before any dynamic 

640 tables are managed via calls to `ensureTableExists` or 

641 `getExistingTable`. The order in which static schema tables are added 

642 inside the context block is unimportant; they will automatically be 

643 sorted and added in an order consistent with their foreign key 

644 relationships. 

645 """ 

646 if create and not self.isWriteable(): 

647 raise ReadOnlyDatabaseError(f"Cannot create tables in read-only database {self}.") 

648 self._metadata = sqlalchemy.MetaData(schema=self.namespace) 

649 try: 

650 context = StaticTablesContext(self) 

651 if create and context._tableNames: 

652 # Looks like database is already initalized, to avoid danger 

653 # of modifying/destroying valid schema we refuse to do 

654 # anything in this case 

655 raise SchemaAlreadyDefinedError(f"Cannot create tables in non-empty database {self}.") 

656 yield context 

657 for table, foreignKey in context._foreignKeys: 

658 table.append_constraint(foreignKey) 

659 if create: 

660 if self.namespace is not None: 

661 if self.namespace not in context._inspector.get_schema_names(): 

662 with self._connection() as connection: 

663 connection.execute(sqlalchemy.schema.CreateSchema(self.namespace)) 

664 # In our tables we have columns that make use of sqlalchemy 

665 # Sequence objects. There is currently a bug in sqlalchemy that 

666 # causes a deprecation warning to be thrown on a property of 

667 # the Sequence object when the repr for the sequence is 

668 # created. Here a filter is used to catch these deprecation 

669 # warnings when tables are created. 

670 with warnings.catch_warnings(): 

671 warnings.simplefilter("ignore", category=sqlalchemy.exc.SADeprecationWarning) 

672 self._metadata.create_all(self._engine) 

673 # call all initializer methods sequentially 

674 for init in context._initializers: 

675 init(self) 

676 except BaseException: 

677 self._metadata = None 

678 raise 

679 

680 @abstractmethod 

681 def isWriteable(self) -> bool: 

682 """Return `True` if this database can be modified by this client.""" 

683 raise NotImplementedError() 

684 

685 @abstractmethod 

686 def __str__(self) -> str: 

687 """Return a human-readable identifier for this `Database`, including 

688 any namespace or schema that identifies its names within a `Registry`. 

689 """ 

690 raise NotImplementedError() 

691 

692 @property 

693 def dialect(self) -> sqlalchemy.engine.Dialect: 

694 """The SQLAlchemy dialect for this database engine 

695 (`sqlalchemy.engine.Dialect`). 

696 """ 

697 return self._engine.dialect 

698 

699 def shrinkDatabaseEntityName(self, original: str) -> str: 

700 """Return a version of the given name that fits within this database 

701 engine's length limits for table, constraint, indexes, and sequence 

702 names. 

703 

704 Implementations should not assume that simple truncation is safe, 

705 because multiple long names often begin with the same prefix. 

706 

707 The default implementation simply returns the given name. 

708 

709 Parameters 

710 ---------- 

711 original : `str` 

712 The original name. 

713 

714 Returns 

715 ------- 

716 shrunk : `str` 

717 The new, possibly shortened name. 

718 """ 

719 return original 

720 

721 def expandDatabaseEntityName(self, shrunk: str) -> str: 

722 """Retrieve the original name for a database entity that was too long 

723 to fit within the database engine's limits. 

724 

725 Parameters 

726 ---------- 

727 original : `str` 

728 The original name. 

729 

730 Returns 

731 ------- 

732 shrunk : `str` 

733 The new, possibly shortened name. 

734 """ 

735 return shrunk 

736 

737 def _mangleTableName(self, name: str) -> str: 

738 """Map a logical, user-visible table name to the true table name used 

739 in the database. 

740 

741 The default implementation returns the given name unchanged. 

742 

743 Parameters 

744 ---------- 

745 name : `str` 

746 Input table name. Should not include a namespace (i.e. schema) 

747 prefix. 

748 

749 Returns 

750 ------- 

751 mangled : `str` 

752 Mangled version of the table name (still with no namespace prefix). 

753 

754 Notes 

755 ----- 

756 Reimplementations of this method must be idempotent - mangling an 

757 already-mangled name must have no effect. 

758 """ 

759 return name 

760 

761 def _makeColumnConstraints(self, table: str, spec: ddl.FieldSpec) -> List[sqlalchemy.CheckConstraint]: 

762 """Create constraints based on this spec. 

763 

764 Parameters 

765 ---------- 

766 table : `str` 

767 Name of the table this column is being added to. 

768 spec : `FieldSpec` 

769 Specification for the field to be added. 

770 

771 Returns 

772 ------- 

773 constraint : `list` of `sqlalchemy.CheckConstraint` 

774 Constraint added for this column. 

775 """ 

776 # By default we return no additional constraints 

777 return [] 

778 

779 def _convertFieldSpec( 

780 self, table: str, spec: ddl.FieldSpec, metadata: sqlalchemy.MetaData, **kwargs: Any 

781 ) -> sqlalchemy.schema.Column: 

782 """Convert a `FieldSpec` to a `sqlalchemy.schema.Column`. 

783 

784 Parameters 

785 ---------- 

786 table : `str` 

787 Name of the table this column is being added to. 

788 spec : `FieldSpec` 

789 Specification for the field to be added. 

790 metadata : `sqlalchemy.MetaData` 

791 SQLAlchemy representation of the DDL schema this field's table is 

792 being added to. 

793 **kwargs 

794 Additional keyword arguments to forward to the 

795 `sqlalchemy.schema.Column` constructor. This is provided to make 

796 it easier for derived classes to delegate to ``super()`` while 

797 making only minor changes. 

798 

799 Returns 

800 ------- 

801 column : `sqlalchemy.schema.Column` 

802 SQLAlchemy representation of the field. 

803 """ 

804 args = [spec.name, spec.getSizedColumnType()] 

805 if spec.autoincrement: 

806 # Generate a sequence to use for auto incrementing for databases 

807 # that do not support it natively. This will be ignored by 

808 # sqlalchemy for databases that do support it. 

809 args.append( 

810 sqlalchemy.Sequence( 

811 self.shrinkDatabaseEntityName(f"{table}_seq_{spec.name}"), metadata=metadata 

812 ) 

813 ) 

814 assert spec.doc is None or isinstance(spec.doc, str), f"Bad doc for {table}.{spec.name}." 

815 return sqlalchemy.schema.Column( 

816 *args, 

817 nullable=spec.nullable, 

818 primary_key=spec.primaryKey, 

819 comment=spec.doc, 

820 server_default=spec.default, 

821 **kwargs, 

822 ) 

823 

824 def _convertForeignKeySpec( 

825 self, table: str, spec: ddl.ForeignKeySpec, metadata: sqlalchemy.MetaData, **kwargs: Any 

826 ) -> sqlalchemy.schema.ForeignKeyConstraint: 

827 """Convert a `ForeignKeySpec` to a 

828 `sqlalchemy.schema.ForeignKeyConstraint`. 

829 

830 Parameters 

831 ---------- 

832 table : `str` 

833 Name of the table this foreign key is being added to. 

834 spec : `ForeignKeySpec` 

835 Specification for the foreign key to be added. 

836 metadata : `sqlalchemy.MetaData` 

837 SQLAlchemy representation of the DDL schema this constraint is 

838 being added to. 

839 **kwargs 

840 Additional keyword arguments to forward to the 

841 `sqlalchemy.schema.ForeignKeyConstraint` constructor. This is 

842 provided to make it easier for derived classes to delegate to 

843 ``super()`` while making only minor changes. 

844 

845 Returns 

846 ------- 

847 constraint : `sqlalchemy.schema.ForeignKeyConstraint` 

848 SQLAlchemy representation of the constraint. 

849 """ 

850 name = self.shrinkDatabaseEntityName( 

851 "_".join( 

852 ["fkey", table, self._mangleTableName(spec.table)] + list(spec.target) + list(spec.source) 

853 ) 

854 ) 

855 return sqlalchemy.schema.ForeignKeyConstraint( 

856 spec.source, 

857 [f"{self._mangleTableName(spec.table)}.{col}" for col in spec.target], 

858 name=name, 

859 ondelete=spec.onDelete, 

860 ) 

861 

862 def _convertExclusionConstraintSpec( 

863 self, 

864 table: str, 

865 spec: Tuple[Union[str, Type[TimespanDatabaseRepresentation]], ...], 

866 metadata: sqlalchemy.MetaData, 

867 ) -> sqlalchemy.schema.Constraint: 

868 """Convert a `tuple` from `ddl.TableSpec.exclusion` into a SQLAlchemy 

869 constraint representation. 

870 

871 Parameters 

872 ---------- 

873 table : `str` 

874 Name of the table this constraint is being added to. 

875 spec : `tuple` [ `str` or `type` ] 

876 A tuple of `str` column names and the `type` object returned by 

877 `getTimespanRepresentation` (which must appear exactly once), 

878 indicating the order of the columns in the index used to back the 

879 constraint. 

880 metadata : `sqlalchemy.MetaData` 

881 SQLAlchemy representation of the DDL schema this constraint is 

882 being added to. 

883 

884 Returns 

885 ------- 

886 constraint : `sqlalchemy.schema.Constraint` 

887 SQLAlchemy representation of the constraint. 

888 

889 Raises 

890 ------ 

891 NotImplementedError 

892 Raised if this database does not support exclusion constraints. 

893 """ 

894 raise NotImplementedError(f"Database {self} does not support exclusion constraints.") 

895 

896 def _convertTableSpec( 

897 self, name: str, spec: ddl.TableSpec, metadata: sqlalchemy.MetaData, **kwargs: Any 

898 ) -> sqlalchemy.schema.Table: 

899 """Convert a `TableSpec` to a `sqlalchemy.schema.Table`. 

900 

901 Parameters 

902 ---------- 

903 spec : `TableSpec` 

904 Specification for the foreign key to be added. 

905 metadata : `sqlalchemy.MetaData` 

906 SQLAlchemy representation of the DDL schema this table is being 

907 added to. 

908 **kwargs 

909 Additional keyword arguments to forward to the 

910 `sqlalchemy.schema.Table` constructor. This is provided to make it 

911 easier for derived classes to delegate to ``super()`` while making 

912 only minor changes. 

913 

914 Returns 

915 ------- 

916 table : `sqlalchemy.schema.Table` 

917 SQLAlchemy representation of the table. 

918 

919 Notes 

920 ----- 

921 This method does not handle ``spec.foreignKeys`` at all, in order to 

922 avoid circular dependencies. These are added by higher-level logic in 

923 `ensureTableExists`, `getExistingTable`, and `declareStaticTables`. 

924 """ 

925 name = self._mangleTableName(name) 

926 args = [self._convertFieldSpec(name, fieldSpec, metadata) for fieldSpec in spec.fields] 

927 

928 # Add any column constraints 

929 for fieldSpec in spec.fields: 

930 args.extend(self._makeColumnConstraints(name, fieldSpec)) 

931 

932 # Track indexes added for primary key and unique constraints, to make 

933 # sure we don't add duplicate explicit or foreign key indexes for 

934 # those. 

935 allIndexes = {tuple(fieldSpec.name for fieldSpec in spec.fields if fieldSpec.primaryKey)} 

936 args.extend( 

937 sqlalchemy.schema.UniqueConstraint( 

938 *columns, name=self.shrinkDatabaseEntityName("_".join([name, "unq"] + list(columns))) 

939 ) 

940 for columns in spec.unique 

941 ) 

942 allIndexes.update(spec.unique) 

943 args.extend( 

944 sqlalchemy.schema.Index( 

945 self.shrinkDatabaseEntityName("_".join([name, "idx"] + list(columns))), 

946 *columns, 

947 unique=(columns in spec.unique), 

948 ) 

949 for columns in spec.indexes 

950 if columns not in allIndexes 

951 ) 

952 allIndexes.update(spec.indexes) 

953 args.extend( 

954 sqlalchemy.schema.Index( 

955 self.shrinkDatabaseEntityName("_".join((name, "fkidx") + fk.source)), 

956 *fk.source, 

957 ) 

958 for fk in spec.foreignKeys 

959 if fk.addIndex and fk.source not in allIndexes 

960 ) 

961 

962 args.extend(self._convertExclusionConstraintSpec(name, excl, metadata) for excl in spec.exclusion) 

963 

964 assert spec.doc is None or isinstance(spec.doc, str), f"Bad doc for {name}." 

965 return sqlalchemy.schema.Table(name, metadata, *args, comment=spec.doc, info=spec, **kwargs) 

966 

967 def ensureTableExists(self, name: str, spec: ddl.TableSpec) -> sqlalchemy.schema.Table: 

968 """Ensure that a table with the given name and specification exists, 

969 creating it if necessary. 

970 

971 Parameters 

972 ---------- 

973 name : `str` 

974 Name of the table (not including namespace qualifiers). 

975 spec : `TableSpec` 

976 Specification for the table. This will be used when creating the 

977 table, and *may* be used when obtaining an existing table to check 

978 for consistency, but no such check is guaranteed. 

979 

980 Returns 

981 ------- 

982 table : `sqlalchemy.schema.Table` 

983 SQLAlchemy representation of the table. 

984 

985 Raises 

986 ------ 

987 ReadOnlyDatabaseError 

988 Raised if `isWriteable` returns `False`, and the table does not 

989 already exist. 

990 DatabaseConflictError 

991 Raised if the table exists but ``spec`` is inconsistent with its 

992 definition. 

993 

994 Notes 

995 ----- 

996 This method may not be called within transactions. It may be called on 

997 read-only databases if and only if the table does in fact already 

998 exist. 

999 

1000 Subclasses may override this method, but usually should not need to. 

1001 """ 

1002 # TODO: if _engine is used to make a table then it uses separate 

1003 # connection and should not interfere with current transaction 

1004 assert ( 

1005 self._session_connection is None or not self._session_connection.in_transaction() 

1006 ), "Table creation interrupts transactions." 

1007 assert self._metadata is not None, "Static tables must be declared before dynamic tables." 

1008 table = self.getExistingTable(name, spec) 

1009 if table is not None: 

1010 return table 

1011 if not self.isWriteable(): 

1012 raise ReadOnlyDatabaseError( 

1013 f"Table {name} does not exist, and cannot be created " 

1014 f"because database {self} is read-only." 

1015 ) 

1016 table = self._convertTableSpec(name, spec, self._metadata) 

1017 for foreignKeySpec in spec.foreignKeys: 

1018 table.append_constraint(self._convertForeignKeySpec(name, foreignKeySpec, self._metadata)) 

1019 try: 

1020 with self._connection() as connection: 

1021 table.create(connection) 

1022 except sqlalchemy.exc.DatabaseError: 

1023 # Some other process could have created the table meanwhile, which 

1024 # usually causes OperationalError or ProgrammingError. We cannot 

1025 # use IF NOT EXISTS clause in this case due to PostgreSQL race 

1026 # condition on server side which causes IntegrityError. Instead we 

1027 # catch these exceptions (they all inherit DatabaseError) and 

1028 # re-check whether table is now there. 

1029 table = self.getExistingTable(name, spec) 

1030 if table is None: 

1031 raise 

1032 return table 

1033 

1034 def getExistingTable(self, name: str, spec: ddl.TableSpec) -> Optional[sqlalchemy.schema.Table]: 

1035 """Obtain an existing table with the given name and specification. 

1036 

1037 Parameters 

1038 ---------- 

1039 name : `str` 

1040 Name of the table (not including namespace qualifiers). 

1041 spec : `TableSpec` 

1042 Specification for the table. This will be used when creating the 

1043 SQLAlchemy representation of the table, and it is used to 

1044 check that the actual table in the database is consistent. 

1045 

1046 Returns 

1047 ------- 

1048 table : `sqlalchemy.schema.Table` or `None` 

1049 SQLAlchemy representation of the table, or `None` if it does not 

1050 exist. 

1051 

1052 Raises 

1053 ------ 

1054 DatabaseConflictError 

1055 Raised if the table exists but ``spec`` is inconsistent with its 

1056 definition. 

1057 

1058 Notes 

1059 ----- 

1060 This method can be called within transactions and never modifies the 

1061 database. 

1062 

1063 Subclasses may override this method, but usually should not need to. 

1064 """ 

1065 assert self._metadata is not None, "Static tables must be declared before dynamic tables." 

1066 name = self._mangleTableName(name) 

1067 table = self._metadata.tables.get(name if self.namespace is None else f"{self.namespace}.{name}") 

1068 if table is not None: 

1069 if spec.fields.names != set(table.columns.keys()): 

1070 raise DatabaseConflictError( 

1071 f"Table '{name}' has already been defined differently; the new " 

1072 f"specification has columns {list(spec.fields.names)}, while " 

1073 f"the previous definition has {list(table.columns.keys())}." 

1074 ) 

1075 else: 

1076 inspector = sqlalchemy.inspect(self._engine) 

1077 if name in inspector.get_table_names(schema=self.namespace): 

1078 _checkExistingTableDefinition(name, spec, inspector.get_columns(name, schema=self.namespace)) 

1079 table = self._convertTableSpec(name, spec, self._metadata) 

1080 for foreignKeySpec in spec.foreignKeys: 

1081 table.append_constraint(self._convertForeignKeySpec(name, foreignKeySpec, self._metadata)) 

1082 return table 

1083 return table 

1084 

1085 @classmethod 

1086 def getTimespanRepresentation(cls) -> Type[TimespanDatabaseRepresentation]: 

1087 """Return a `type` that encapsulates the way `Timespan` objects are 

1088 stored in this database. 

1089 

1090 `Database` does not automatically use the return type of this method 

1091 anywhere else; calling code is responsible for making sure that DDL 

1092 and queries are consistent with it. 

1093 

1094 Returns 

1095 ------- 

1096 TimespanReprClass : `type` (`TimespanDatabaseRepresention` subclass) 

1097 A type that encapsulates the way `Timespan` objects should be 

1098 stored in this database. 

1099 

1100 Notes 

1101 ----- 

1102 There are two big reasons we've decided to keep timespan-mangling logic 

1103 outside the `Database` implementations, even though the choice of 

1104 representation is ultimately up to a `Database` implementation: 

1105 

1106 - Timespans appear in relatively few tables and queries in our 

1107 typical usage, and the code that operates on them is already aware 

1108 that it is working with timespans. In contrast, a 

1109 timespan-representation-aware implementation of, say, `insert`, 

1110 would need to have extra logic to identify when timespan-mangling 

1111 needed to occur, which would usually be useless overhead. 

1112 

1113 - SQLAlchemy's rich SELECT query expression system has no way to wrap 

1114 multiple columns in a single expression object (the ORM does, but 

1115 we are not using the ORM). So we would have to wrap _much_ more of 

1116 that code in our own interfaces to encapsulate timespan 

1117 representations there. 

1118 """ 

1119 return TimespanDatabaseRepresentation.Compound 

1120 

1121 @classmethod 

1122 def getSpatialRegionRepresentation(cls) -> Type[SpatialRegionDatabaseRepresentation]: 

1123 """Return a `type` that encapsulates the way `lsst.sphgeom.Region` 

1124 objects are stored in this database. 

1125 

1126 `Database` does not automatically use the return type of this method 

1127 anywhere else; calling code is responsible for making sure that DDL 

1128 and queries are consistent with it. 

1129 

1130 Returns 

1131 ------- 

1132 RegionReprClass : `type` (`SpatialRegionDatabaseRepresention` subclass) 

1133 A type that encapsulates the way `lsst.sphgeom.Region` objects 

1134 should be stored in this database. 

1135 

1136 Notes 

1137 ----- 

1138 See `getTimespanRepresentation` for comments on why this method is not 

1139 more tightly integrated with the rest of the `Database` interface. 

1140 """ 

1141 return SpatialRegionDatabaseRepresentation 

1142 

1143 def sync( 

1144 self, 

1145 table: sqlalchemy.schema.Table, 

1146 *, 

1147 keys: Dict[str, Any], 

1148 compared: Optional[Dict[str, Any]] = None, 

1149 extra: Optional[Dict[str, Any]] = None, 

1150 returning: Optional[Sequence[str]] = None, 

1151 update: bool = False, 

1152 ) -> Tuple[Optional[Dict[str, Any]], Union[bool, Dict[str, Any]]]: 

1153 """Insert into a table as necessary to ensure database contains 

1154 values equivalent to the given ones. 

1155 

1156 Parameters 

1157 ---------- 

1158 table : `sqlalchemy.schema.Table` 

1159 Table to be queried and possibly inserted into. 

1160 keys : `dict` 

1161 Column name-value pairs used to search for an existing row; must 

1162 be a combination that can be used to select a single row if one 

1163 exists. If such a row does not exist, these values are used in 

1164 the insert. 

1165 compared : `dict`, optional 

1166 Column name-value pairs that are compared to those in any existing 

1167 row. If such a row does not exist, these rows are used in the 

1168 insert. 

1169 extra : `dict`, optional 

1170 Column name-value pairs that are ignored if a matching row exists, 

1171 but used in an insert if one is necessary. 

1172 returning : `~collections.abc.Sequence` of `str`, optional 

1173 The names of columns whose values should be returned. 

1174 update : `bool`, optional 

1175 If `True` (`False` is default), update the existing row with the 

1176 values in ``compared`` instead of raising `DatabaseConflictError`. 

1177 

1178 Returns 

1179 ------- 

1180 row : `dict`, optional 

1181 The value of the fields indicated by ``returning``, or `None` if 

1182 ``returning`` is `None`. 

1183 inserted_or_updated : `bool` or `dict` 

1184 If `True`, a new row was inserted; if `False`, a matching row 

1185 already existed. If a `dict` (only possible if ``update=True``), 

1186 then an existing row was updated, and the dict maps the names of 

1187 the updated columns to their *old* values (new values can be 

1188 obtained from ``compared``). 

1189 

1190 Raises 

1191 ------ 

1192 DatabaseConflictError 

1193 Raised if the values in ``compared`` do not match the values in the 

1194 database. 

1195 ReadOnlyDatabaseError 

1196 Raised if `isWriteable` returns `False`, and no matching record 

1197 already exists. 

1198 

1199 Notes 

1200 ----- 

1201 May be used inside transaction contexts, so implementations may not 

1202 perform operations that interrupt transactions. 

1203 

1204 It may be called on read-only databases if and only if the matching row 

1205 does in fact already exist. 

1206 """ 

1207 

1208 def check() -> Tuple[int, Optional[Dict[str, Any]], Optional[List]]: 

1209 """Query for a row that matches the ``key`` argument, and compare 

1210 to what was given by the caller. 

1211 

1212 Returns 

1213 ------- 

1214 n : `int` 

1215 Number of matching rows. ``n != 1`` is always an error, but 

1216 it's a different kind of error depending on where `check` is 

1217 being called. 

1218 bad : `dict` or `None` 

1219 The subset of the keys of ``compared`` for which the existing 

1220 values did not match the given one, mapped to the existing 

1221 values in the database. Once again, ``not bad`` is always an 

1222 error, but a different kind on context. `None` if ``n != 1`` 

1223 result : `list` or `None` 

1224 Results in the database that correspond to the columns given 

1225 in ``returning``, or `None` if ``returning is None``. 

1226 """ 

1227 toSelect: Set[str] = set() 

1228 if compared is not None: 

1229 toSelect.update(compared.keys()) 

1230 if returning is not None: 

1231 toSelect.update(returning) 

1232 if not toSelect: 

1233 # Need to select some column, even if we just want to see 

1234 # how many rows we get back. 

1235 toSelect.add(next(iter(keys.keys()))) 

1236 selectSql = ( 

1237 sqlalchemy.sql.select(*[table.columns[k].label(k) for k in toSelect]) 

1238 .select_from(table) 

1239 .where(sqlalchemy.sql.and_(*[table.columns[k] == v for k, v in keys.items()])) 

1240 ) 

1241 with self._connection() as connection: 

1242 fetched = list(connection.execute(selectSql).mappings()) 

1243 if len(fetched) != 1: 

1244 return len(fetched), None, None 

1245 existing = fetched[0] 

1246 if compared is not None: 

1247 

1248 def safeNotEqual(a: Any, b: Any) -> bool: 

1249 if isinstance(a, astropy.time.Time): 

1250 return not time_utils.TimeConverter().times_equal(a, b) 

1251 return a != b 

1252 

1253 inconsistencies = { 

1254 k: existing[k] for k, v in compared.items() if safeNotEqual(existing[k], v) 

1255 } 

1256 else: 

1257 inconsistencies = {} 

1258 if returning is not None: 

1259 toReturn: Optional[list] = [existing[k] for k in returning] 

1260 else: 

1261 toReturn = None 

1262 return 1, inconsistencies, toReturn 

1263 

1264 def format_bad(inconsistencies: Dict[str, Any]) -> str: 

1265 """Format the 'bad' dictionary of existing values returned by 

1266 ``check`` into a string suitable for an error message. 

1267 """ 

1268 assert compared is not None, "Should not be able to get inconsistencies without comparing." 

1269 return ", ".join(f"{k}: {v!r} != {compared[k]!r}" for k, v in inconsistencies.items()) 

1270 

1271 if self.isTableWriteable(table): 

1272 # Try an insert first, but allow it to fail (in only specific 

1273 # ways). 

1274 row = keys.copy() 

1275 if compared is not None: 

1276 row.update(compared) 

1277 if extra is not None: 

1278 row.update(extra) 

1279 with self.transaction(): 

1280 inserted = bool(self.ensure(table, row)) 

1281 inserted_or_updated: Union[bool, Dict[str, Any]] 

1282 # Need to perform check() for this branch inside the 

1283 # transaction, so we roll back an insert that didn't do 

1284 # what we expected. That limits the extent to which we 

1285 # can reduce duplication between this block and the other 

1286 # ones that perform similar logic. 

1287 n, bad, result = check() 

1288 if n < 1: 

1289 raise ConflictingDefinitionError( 

1290 f"Attempted to ensure {row} exists by inserting it with ON CONFLICT IGNORE, " 

1291 f"but a post-insert query on {keys} returned no results. " 

1292 f"Insert was {'' if inserted else 'not '}reported as successful. " 

1293 "This can occur if the insert violated a database constraint other than the " 

1294 "unique constraint or primary key used to identify the row in this call." 

1295 ) 

1296 elif n > 1: 

1297 raise RuntimeError( 

1298 f"Keys passed to sync {keys.keys()} do not comprise a " 

1299 f"unique constraint for table {table.name}." 

1300 ) 

1301 elif bad: 

1302 assert ( 

1303 compared is not None 

1304 ), "Should not be able to get inconsistencies without comparing." 

1305 if inserted: 

1306 raise RuntimeError( 

1307 f"Conflict ({bad}) in sync after successful insert; this is " 

1308 "possible if the same table is being updated by a concurrent " 

1309 "process that isn't using sync, but it may also be a bug in " 

1310 "daf_butler." 

1311 ) 

1312 elif update: 

1313 with self._connection() as connection: 

1314 connection.execute( 

1315 table.update() 

1316 .where(sqlalchemy.sql.and_(*[table.columns[k] == v for k, v in keys.items()])) 

1317 .values(**{k: compared[k] for k in bad.keys()}) 

1318 ) 

1319 inserted_or_updated = bad 

1320 else: 

1321 raise DatabaseConflictError( 

1322 f"Conflict in sync for table {table.name} on column(s) {format_bad(bad)}." 

1323 ) 

1324 else: 

1325 inserted_or_updated = inserted 

1326 else: 

1327 # Database is not writeable; just see if the row exists. 

1328 n, bad, result = check() 

1329 if n < 1: 

1330 raise ReadOnlyDatabaseError("sync needs to insert, but database is read-only.") 

1331 elif n > 1: 

1332 raise RuntimeError("Keys passed to sync do not comprise a unique constraint.") 

1333 elif bad: 

1334 if update: 

1335 raise ReadOnlyDatabaseError("sync needs to update, but database is read-only.") 

1336 else: 

1337 raise DatabaseConflictError( 

1338 f"Conflict in sync for table {table.name} on column(s) {format_bad(bad)}." 

1339 ) 

1340 inserted_or_updated = False 

1341 if returning is None: 

1342 return None, inserted_or_updated 

1343 else: 

1344 assert result is not None 

1345 return {k: v for k, v in zip(returning, result)}, inserted_or_updated 

1346 

1347 def insert( 

1348 self, 

1349 table: sqlalchemy.schema.Table, 

1350 *rows: dict, 

1351 returnIds: bool = False, 

1352 select: Optional[sqlalchemy.sql.Select] = None, 

1353 names: Optional[Iterable[str]] = None, 

1354 ) -> Optional[List[int]]: 

1355 """Insert one or more rows into a table, optionally returning 

1356 autoincrement primary key values. 

1357 

1358 Parameters 

1359 ---------- 

1360 table : `sqlalchemy.schema.Table` 

1361 Table rows should be inserted into. 

1362 returnIds: `bool` 

1363 If `True` (`False` is default), return the values of the table's 

1364 autoincrement primary key field (which much exist). 

1365 select : `sqlalchemy.sql.Select`, optional 

1366 A SELECT query expression to insert rows from. Cannot be provided 

1367 with either ``rows`` or ``returnIds=True``. 

1368 names : `Iterable` [ `str` ], optional 

1369 Names of columns in ``table`` to be populated, ordered to match the 

1370 columns returned by ``select``. Ignored if ``select`` is `None`. 

1371 If not provided, the columns returned by ``select`` must be named 

1372 to match the desired columns of ``table``. 

1373 *rows 

1374 Positional arguments are the rows to be inserted, as dictionaries 

1375 mapping column name to value. The keys in all dictionaries must 

1376 be the same. 

1377 

1378 Returns 

1379 ------- 

1380 ids : `None`, or `list` of `int` 

1381 If ``returnIds`` is `True`, a `list` containing the inserted 

1382 values for the table's autoincrement primary key. 

1383 

1384 Raises 

1385 ------ 

1386 ReadOnlyDatabaseError 

1387 Raised if `isWriteable` returns `False` when this method is called. 

1388 

1389 Notes 

1390 ----- 

1391 The default implementation uses bulk insert syntax when ``returnIds`` 

1392 is `False`, and a loop over single-row insert operations when it is 

1393 `True`. 

1394 

1395 Derived classes should reimplement when they can provide a more 

1396 efficient implementation (especially for the latter case). 

1397 

1398 May be used inside transaction contexts, so implementations may not 

1399 perform operations that interrupt transactions. 

1400 """ 

1401 self.assertTableWriteable(table, f"Cannot insert into read-only table {table}.") 

1402 if select is not None and (rows or returnIds): 

1403 raise TypeError("'select' is incompatible with passing value rows or returnIds=True.") 

1404 if not rows and select is None: 

1405 if returnIds: 

1406 return [] 

1407 else: 

1408 return None 

1409 with self._connection() as connection: 

1410 if not returnIds: 

1411 if select is not None: 

1412 if names is None: 

1413 # columns() is deprecated since 1.4, but 

1414 # selected_columns() method did not exist in 1.3. 

1415 if hasattr(select, "selected_columns"): 

1416 names = select.selected_columns.keys() 

1417 else: 

1418 names = select.columns.keys() 

1419 connection.execute(table.insert().from_select(names, select)) 

1420 else: 

1421 connection.execute(table.insert(), rows) 

1422 return None 

1423 else: 

1424 sql = table.insert() 

1425 return [connection.execute(sql, row).inserted_primary_key[0] for row in rows] 

1426 

1427 @abstractmethod 

1428 def replace(self, table: sqlalchemy.schema.Table, *rows: dict) -> None: 

1429 """Insert one or more rows into a table, replacing any existing rows 

1430 for which insertion of a new row would violate the primary key 

1431 constraint. 

1432 

1433 Parameters 

1434 ---------- 

1435 table : `sqlalchemy.schema.Table` 

1436 Table rows should be inserted into. 

1437 *rows 

1438 Positional arguments are the rows to be inserted, as dictionaries 

1439 mapping column name to value. The keys in all dictionaries must 

1440 be the same. 

1441 

1442 Raises 

1443 ------ 

1444 ReadOnlyDatabaseError 

1445 Raised if `isWriteable` returns `False` when this method is called. 

1446 

1447 Notes 

1448 ----- 

1449 May be used inside transaction contexts, so implementations may not 

1450 perform operations that interrupt transactions. 

1451 

1452 Implementations should raise a `sqlalchemy.exc.IntegrityError` 

1453 exception when a constraint other than the primary key would be 

1454 violated. 

1455 

1456 Implementations are not required to support `replace` on tables 

1457 with autoincrement keys. 

1458 """ 

1459 raise NotImplementedError() 

1460 

1461 @abstractmethod 

1462 def ensure(self, table: sqlalchemy.schema.Table, *rows: dict) -> int: 

1463 """Insert one or more rows into a table, skipping any rows for which 

1464 insertion would violate any constraint. 

1465 

1466 Parameters 

1467 ---------- 

1468 table : `sqlalchemy.schema.Table` 

1469 Table rows should be inserted into. 

1470 *rows 

1471 Positional arguments are the rows to be inserted, as dictionaries 

1472 mapping column name to value. The keys in all dictionaries must 

1473 be the same. 

1474 

1475 Returns 

1476 ------- 

1477 count : `int` 

1478 The number of rows actually inserted. 

1479 

1480 Raises 

1481 ------ 

1482 ReadOnlyDatabaseError 

1483 Raised if `isWriteable` returns `False` when this method is called. 

1484 This is raised even if the operation would do nothing even on a 

1485 writeable database. 

1486 

1487 Notes 

1488 ----- 

1489 May be used inside transaction contexts, so implementations may not 

1490 perform operations that interrupt transactions. 

1491 

1492 Implementations are not required to support `ensure` on tables 

1493 with autoincrement keys. 

1494 """ 

1495 raise NotImplementedError() 

1496 

1497 def delete(self, table: sqlalchemy.schema.Table, columns: Iterable[str], *rows: dict) -> int: 

1498 """Delete one or more rows from a table. 

1499 

1500 Parameters 

1501 ---------- 

1502 table : `sqlalchemy.schema.Table` 

1503 Table that rows should be deleted from. 

1504 columns: `~collections.abc.Iterable` of `str` 

1505 The names of columns that will be used to constrain the rows to 

1506 be deleted; these will be combined via ``AND`` to form the 

1507 ``WHERE`` clause of the delete query. 

1508 *rows 

1509 Positional arguments are the keys of rows to be deleted, as 

1510 dictionaries mapping column name to value. The keys in all 

1511 dictionaries must be exactly the names in ``columns``. 

1512 

1513 Returns 

1514 ------- 

1515 count : `int` 

1516 Number of rows deleted. 

1517 

1518 Raises 

1519 ------ 

1520 ReadOnlyDatabaseError 

1521 Raised if `isWriteable` returns `False` when this method is called. 

1522 

1523 Notes 

1524 ----- 

1525 May be used inside transaction contexts, so implementations may not 

1526 perform operations that interrupt transactions. 

1527 

1528 The default implementation should be sufficient for most derived 

1529 classes. 

1530 """ 

1531 self.assertTableWriteable(table, f"Cannot delete from read-only table {table}.") 

1532 if columns and not rows: 

1533 # If there are no columns, this operation is supposed to delete 

1534 # everything (so we proceed as usual). But if there are columns, 

1535 # but no rows, it was a constrained bulk operation where the 

1536 # constraint is that no rows match, and we should short-circuit 

1537 # while reporting that no rows were affected. 

1538 return 0 

1539 sql = table.delete() 

1540 columns = list(columns) # Force iterators to list 

1541 

1542 # More efficient to use IN operator if there is only one 

1543 # variable changing across all rows. 

1544 content: Dict[str, Set] = defaultdict(set) 

1545 if len(columns) == 1: 

1546 # Nothing to calculate since we can always use IN 

1547 column = columns[0] 

1548 changing_columns = [column] 

1549 content[column] = set(row[column] for row in rows) 

1550 else: 

1551 for row in rows: 

1552 for k, v in row.items(): 

1553 content[k].add(v) 

1554 changing_columns = [col for col, values in content.items() if len(values) > 1] 

1555 

1556 if len(changing_columns) != 1: 

1557 # More than one column changes each time so do explicit bind 

1558 # parameters and have each row processed separately. 

1559 whereTerms = [table.columns[name] == sqlalchemy.sql.bindparam(name) for name in columns] 

1560 if whereTerms: 

1561 sql = sql.where(sqlalchemy.sql.and_(*whereTerms)) 

1562 with self._connection() as connection: 

1563 return connection.execute(sql, rows).rowcount 

1564 else: 

1565 # One of the columns has changing values but any others are 

1566 # fixed. In this case we can use an IN operator and be more 

1567 # efficient. 

1568 name = changing_columns.pop() 

1569 

1570 # Simple where clause for the unchanging columns 

1571 clauses = [] 

1572 for k, v in content.items(): 

1573 if k == name: 

1574 continue 

1575 column = table.columns[k] 

1576 # The set only has one element 

1577 clauses.append(column == v.pop()) 

1578 

1579 # The IN operator will not work for "infinite" numbers of 

1580 # rows so must batch it up into distinct calls. 

1581 in_content = list(content[name]) 

1582 n_elements = len(in_content) 

1583 

1584 rowcount = 0 

1585 iposn = 0 

1586 n_per_loop = 1_000 # Controls how many items to put in IN clause 

1587 with self._connection() as connection: 

1588 for iposn in range(0, n_elements, n_per_loop): 

1589 endpos = iposn + n_per_loop 

1590 in_clause = table.columns[name].in_(in_content[iposn:endpos]) 

1591 

1592 newsql = sql.where(sqlalchemy.sql.and_(*clauses, in_clause)) 

1593 rowcount += connection.execute(newsql).rowcount 

1594 return rowcount 

1595 

1596 def deleteWhere(self, table: sqlalchemy.schema.Table, where: sqlalchemy.sql.ClauseElement) -> int: 

1597 """Delete rows from a table with pre-constructed WHERE clause. 

1598 

1599 Parameters 

1600 ---------- 

1601 table : `sqlalchemy.schema.Table` 

1602 Table that rows should be deleted from. 

1603 where: `sqlalchemy.sql.ClauseElement` 

1604 The names of columns that will be used to constrain the rows to 

1605 be deleted; these will be combined via ``AND`` to form the 

1606 ``WHERE`` clause of the delete query. 

1607 

1608 Returns 

1609 ------- 

1610 count : `int` 

1611 Number of rows deleted. 

1612 

1613 Raises 

1614 ------ 

1615 ReadOnlyDatabaseError 

1616 Raised if `isWriteable` returns `False` when this method is called. 

1617 

1618 Notes 

1619 ----- 

1620 May be used inside transaction contexts, so implementations may not 

1621 perform operations that interrupt transactions. 

1622 

1623 The default implementation should be sufficient for most derived 

1624 classes. 

1625 """ 

1626 self.assertTableWriteable(table, f"Cannot delete from read-only table {table}.") 

1627 

1628 sql = table.delete().where(where) 

1629 with self._connection() as connection: 

1630 return connection.execute(sql).rowcount 

1631 

1632 def update(self, table: sqlalchemy.schema.Table, where: Dict[str, str], *rows: dict) -> int: 

1633 """Update one or more rows in a table. 

1634 

1635 Parameters 

1636 ---------- 

1637 table : `sqlalchemy.schema.Table` 

1638 Table containing the rows to be updated. 

1639 where : `dict` [`str`, `str`] 

1640 A mapping from the names of columns that will be used to search for 

1641 existing rows to the keys that will hold these values in the 

1642 ``rows`` dictionaries. Note that these may not be the same due to 

1643 SQLAlchemy limitations. 

1644 *rows 

1645 Positional arguments are the rows to be updated. The keys in all 

1646 dictionaries must be the same, and may correspond to either a 

1647 value in the ``where`` dictionary or the name of a column to be 

1648 updated. 

1649 

1650 Returns 

1651 ------- 

1652 count : `int` 

1653 Number of rows matched (regardless of whether the update actually 

1654 modified them). 

1655 

1656 Raises 

1657 ------ 

1658 ReadOnlyDatabaseError 

1659 Raised if `isWriteable` returns `False` when this method is called. 

1660 

1661 Notes 

1662 ----- 

1663 May be used inside transaction contexts, so implementations may not 

1664 perform operations that interrupt transactions. 

1665 

1666 The default implementation should be sufficient for most derived 

1667 classes. 

1668 """ 

1669 self.assertTableWriteable(table, f"Cannot update read-only table {table}.") 

1670 if not rows: 

1671 return 0 

1672 sql = table.update().where( 

1673 sqlalchemy.sql.and_(*[table.columns[k] == sqlalchemy.sql.bindparam(v) for k, v in where.items()]) 

1674 ) 

1675 with self._connection() as connection: 

1676 return connection.execute(sql, rows).rowcount 

1677 

1678 def query( 

1679 self, sql: sqlalchemy.sql.FromClause, *args: Any, **kwargs: Any 

1680 ) -> sqlalchemy.engine.ResultProxy: 

1681 """Run a SELECT query against the database. 

1682 

1683 Parameters 

1684 ---------- 

1685 sql : `sqlalchemy.sql.FromClause` 

1686 A SQLAlchemy representation of a ``SELECT`` query. 

1687 *args 

1688 Additional positional arguments are forwarded to 

1689 `sqlalchemy.engine.Connection.execute`. 

1690 **kwargs 

1691 Additional keyword arguments are forwarded to 

1692 `sqlalchemy.engine.Connection.execute`. 

1693 

1694 Returns 

1695 ------- 

1696 result : `sqlalchemy.engine.ResultProxy` 

1697 Query results. 

1698 

1699 Notes 

1700 ----- 

1701 The default implementation should be sufficient for most derived 

1702 classes. 

1703 """ 

1704 # We are returning a Result object so we need to take care of 

1705 # connection lifetime. If this is happening in transaction context 

1706 # then just use existing connection, otherwise make a special 

1707 # connection which will be closed when result is closed. 

1708 # 

1709 # TODO: May be better approach would be to make this method return a 

1710 # context manager, but this means big changes for callers of this 

1711 # method. 

1712 if self._session_connection is not None: 

1713 connection = self._session_connection 

1714 else: 

1715 connection = self._engine.connect(close_with_result=True) 

1716 # TODO: should we guard against non-SELECT queries here? 

1717 return connection.execute(sql, *args, **kwargs) 

1718 

1719 origin: int 

1720 """An integer ID that should be used as the default for any datasets, 

1721 quanta, or other entities that use a (autoincrement, origin) compound 

1722 primary key (`int`). 

1723 """ 

1724 

1725 namespace: Optional[str] 

1726 """The schema or namespace this database instance is associated with 

1727 (`str` or `None`). 

1728 """