Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = [ 

24 "Database", 

25 "ReadOnlyDatabaseError", 

26 "DatabaseConflictError", 

27 "SchemaAlreadyDefinedError", 

28 "StaticTablesContext", 

29] 

30 

31from abc import ABC, abstractmethod 

32from contextlib import contextmanager 

33from typing import ( 

34 Any, 

35 Callable, 

36 Dict, 

37 Iterable, 

38 Iterator, 

39 List, 

40 Optional, 

41 Sequence, 

42 Set, 

43 Tuple, 

44) 

45import uuid 

46import warnings 

47 

48import astropy.time 

49import sqlalchemy 

50 

51from ...core import ddl, time_utils 

52from .._exceptions import ConflictingDefinitionError 

53 

54 

55def _checkExistingTableDefinition(name: str, spec: ddl.TableSpec, inspection: List[Dict[str, Any]]) -> None: 

56 """Test that the definition of a table in a `ddl.TableSpec` and from 

57 database introspection are consistent. 

58 

59 Parameters 

60 ---------- 

61 name : `str` 

62 Name of the table (only used in error messages). 

63 spec : `ddl.TableSpec` 

64 Specification of the table. 

65 inspection : `dict` 

66 Dictionary returned by 

67 `sqlalchemy.engine.reflection.Inspector.get_columns`. 

68 

69 Raises 

70 ------ 

71 DatabaseConflictError 

72 Raised if the definitions are inconsistent. 

73 """ 

74 columnNames = [c["name"] for c in inspection] 

75 if spec.fields.names != set(columnNames): 

76 raise DatabaseConflictError(f"Table '{name}' exists but is defined differently in the database; " 

77 f"specification has columns {list(spec.fields.names)}, while the " 

78 f"table in the database has {columnNames}.") 

79 

80 

81class ReadOnlyDatabaseError(RuntimeError): 

82 """Exception raised when a write operation is called on a read-only 

83 `Database`. 

84 """ 

85 

86 

87class DatabaseConflictError(ConflictingDefinitionError): 

88 """Exception raised when database content (row values or schema entities) 

89 are inconsistent with what this client expects. 

90 """ 

91 

92 

93class SchemaAlreadyDefinedError(RuntimeError): 

94 """Exception raised when trying to initialize database schema when some 

95 tables already exist. 

96 """ 

97 

98 

99class StaticTablesContext: 

100 """Helper class used to declare the static schema for a registry layer 

101 in a database. 

102 

103 An instance of this class is returned by `Database.declareStaticTables`, 

104 which should be the only way it should be constructed. 

105 """ 

106 

107 def __init__(self, db: Database): 

108 self._db = db 

109 self._foreignKeys: List[Tuple[sqlalchemy.schema.Table, sqlalchemy.schema.ForeignKeyConstraint]] = [] 

110 self._inspector = sqlalchemy.engine.reflection.Inspector(self._db._connection) 

111 self._tableNames = frozenset(self._inspector.get_table_names(schema=self._db.namespace)) 

112 self._initializers: List[Callable[[Database], None]] = [] 

113 

114 def addTable(self, name: str, spec: ddl.TableSpec) -> sqlalchemy.schema.Table: 

115 """Add a new table to the schema, returning its sqlalchemy 

116 representation. 

117 

118 The new table may not actually be created until the end of the 

119 context created by `Database.declareStaticTables`, allowing tables 

120 to be declared in any order even in the presence of foreign key 

121 relationships. 

122 """ 

123 name = self._db._mangleTableName(name) 

124 if name in self._tableNames: 

125 _checkExistingTableDefinition(name, spec, self._inspector.get_columns(name, 

126 schema=self._db.namespace)) 

127 table = self._db._convertTableSpec(name, spec, self._db._metadata) 

128 for foreignKeySpec in spec.foreignKeys: 

129 self._foreignKeys.append( 

130 (table, self._db._convertForeignKeySpec(name, foreignKeySpec, self._db._metadata)) 

131 ) 

132 return table 

133 

134 def addTableTuple(self, specs: Tuple[ddl.TableSpec, ...]) -> Tuple[sqlalchemy.schema.Table, ...]: 

135 """Add a named tuple of tables to the schema, returning their 

136 SQLAlchemy representations in a named tuple of the same type. 

137 

138 The new tables may not actually be created until the end of the 

139 context created by `Database.declareStaticTables`, allowing tables 

140 to be declared in any order even in the presence of foreign key 

141 relationships. 

142 

143 Notes 

144 ----- 

145 ``specs`` *must* be an instance of a type created by 

146 `collections.namedtuple`, not just regular tuple, and the returned 

147 object is guaranteed to be the same. Because `~collections.namedtuple` 

148 is just a factory for `type` objects, not an actual type itself, 

149 we cannot represent this with type annotations. 

150 """ 

151 return specs._make(self.addTable(name, spec) # type: ignore 

152 for name, spec in zip(specs._fields, specs)) # type: ignore 

153 

154 def addInitializer(self, initializer: Callable[[Database], None]) -> None: 

155 """Add a method that does one-time initialization of a database. 

156 

157 Initialization can mean anything that changes state of a database 

158 and needs to be done exactly once after database schema was created. 

159 An example for that could be population of schema attributes. 

160 

161 Parameters 

162 ---------- 

163 initializer : callable 

164 Method of a single argument which is a `Database` instance. 

165 """ 

166 self._initializers.append(initializer) 

167 

168 

169class Database(ABC): 

170 """An abstract interface that represents a particular database engine's 

171 representation of a single schema/namespace/database. 

172 

173 Parameters 

174 ---------- 

175 origin : `int` 

176 An integer ID that should be used as the default for any datasets, 

177 quanta, or other entities that use a (autoincrement, origin) compound 

178 primary key. 

179 connection : `sqlalchemy.engine.Connection` 

180 The SQLAlchemy connection this `Database` wraps. 

181 namespace : `str`, optional 

182 Name of the schema or namespace this instance is associated with. 

183 This is passed as the ``schema`` argument when constructing a 

184 `sqlalchemy.schema.MetaData` instance. We use ``namespace`` instead to 

185 avoid confusion between "schema means namespace" and "schema means 

186 table definitions". 

187 

188 Notes 

189 ----- 

190 `Database` requires all write operations to go through its special named 

191 methods. Our write patterns are sufficiently simple that we don't really 

192 need the full flexibility of SQL insert/update/delete syntax, and we need 

193 non-standard (but common) functionality in these operations sufficiently 

194 often that it seems worthwhile to provide our own generic API. 

195 

196 In contrast, `Database.query` allows arbitrary ``SELECT`` queries (via 

197 their SQLAlchemy representation) to be run, as we expect these to require 

198 significantly more sophistication while still being limited to standard 

199 SQL. 

200 

201 `Database` itself has several underscore-prefixed attributes: 

202 

203 - ``_connection``: SQLAlchemy object representing the connection. 

204 - ``_metadata``: the `sqlalchemy.schema.MetaData` object representing 

205 the tables and other schema entities. 

206 

207 These are considered protected (derived classes may access them, but other 

208 code should not), and read-only, aside from executing SQL via 

209 ``_connection``. 

210 """ 

211 

212 def __init__(self, *, origin: int, connection: sqlalchemy.engine.Connection, 

213 namespace: Optional[str] = None): 

214 self.origin = origin 

215 self.namespace = namespace 

216 self._connection = connection 

217 self._metadata: Optional[sqlalchemy.schema.MetaData] = None 

218 self._tempTables: Set[str] = set() 

219 

220 def __repr__(self) -> str: 

221 # Rather than try to reproduce all the parameters used to create 

222 # the object, instead report the more useful information of the 

223 # connection URL. 

224 uri = str(self._connection.engine.url) 

225 if self.namespace: 

226 uri += f"#{self.namespace}" 

227 return f'{type(self).__name__}("{uri}")' 

228 

229 @classmethod 

230 def makeDefaultUri(cls, root: str) -> Optional[str]: 

231 """Create a default connection URI appropriate for the given root 

232 directory, or `None` if there can be no such default. 

233 """ 

234 return None 

235 

236 @classmethod 

237 def fromUri(cls, uri: str, *, origin: int, namespace: Optional[str] = None, 

238 writeable: bool = True) -> Database: 

239 """Construct a database from a SQLAlchemy URI. 

240 

241 Parameters 

242 ---------- 

243 uri : `str` 

244 A SQLAlchemy URI connection string. 

245 origin : `int` 

246 An integer ID that should be used as the default for any datasets, 

247 quanta, or other entities that use a (autoincrement, origin) 

248 compound primary key. 

249 namespace : `str`, optional 

250 A database namespace (i.e. schema) the new instance should be 

251 associated with. If `None` (default), the namespace (if any) is 

252 inferred from the URI. 

253 writeable : `bool`, optional 

254 If `True`, allow write operations on the database, including 

255 ``CREATE TABLE``. 

256 

257 Returns 

258 ------- 

259 db : `Database` 

260 A new `Database` instance. 

261 """ 

262 return cls.fromConnection(cls.connect(uri, writeable=writeable), 

263 origin=origin, 

264 namespace=namespace, 

265 writeable=writeable) 

266 

267 @classmethod 

268 @abstractmethod 

269 def connect(cls, uri: str, *, writeable: bool = True) -> sqlalchemy.engine.Connection: 

270 """Create a `sqlalchemy.engine.Connection` from a SQLAlchemy URI. 

271 

272 Parameters 

273 ---------- 

274 uri : `str` 

275 A SQLAlchemy URI connection string. 

276 origin : `int` 

277 An integer ID that should be used as the default for any datasets, 

278 quanta, or other entities that use a (autoincrement, origin) 

279 compound primary key. 

280 writeable : `bool`, optional 

281 If `True`, allow write operations on the database, including 

282 ``CREATE TABLE``. 

283 

284 Returns 

285 ------- 

286 connection : `sqlalchemy.engine.Connection` 

287 A database connection. 

288 

289 Notes 

290 ----- 

291 Subclasses that support other ways to connect to a database are 

292 encouraged to add optional arguments to their implementation of this 

293 method, as long as they maintain compatibility with the base class 

294 call signature. 

295 """ 

296 raise NotImplementedError() 

297 

298 @classmethod 

299 @abstractmethod 

300 def fromConnection(cls, connection: sqlalchemy.engine.Connection, *, origin: int, 

301 namespace: Optional[str] = None, writeable: bool = True) -> Database: 

302 """Create a new `Database` from an existing 

303 `sqlalchemy.engine.Connection`. 

304 

305 Parameters 

306 ---------- 

307 connection : `sqllachemy.engine.Connection` 

308 The connection for the the database. May be shared between 

309 `Database` instances. 

310 origin : `int` 

311 An integer ID that should be used as the default for any datasets, 

312 quanta, or other entities that use a (autoincrement, origin) 

313 compound primary key. 

314 namespace : `str`, optional 

315 A different database namespace (i.e. schema) the new instance 

316 should be associated with. If `None` (default), the namespace 

317 (if any) is inferred from the connection. 

318 writeable : `bool`, optional 

319 If `True`, allow write operations on the database, including 

320 ``CREATE TABLE``. 

321 

322 Returns 

323 ------- 

324 db : `Database` 

325 A new `Database` instance. 

326 

327 Notes 

328 ----- 

329 This method allows different `Database` instances to share the same 

330 connection, which is desirable when they represent different namespaces 

331 can be queried together. This also ties their transaction state, 

332 however; starting a transaction in any database automatically starts 

333 on in all other databases. 

334 """ 

335 raise NotImplementedError() 

336 

337 @contextmanager 

338 def transaction(self, *, interrupting: bool = False) -> Iterator: 

339 """Return a context manager that represents a transaction. 

340 

341 Parameters 

342 ---------- 

343 interrupting : `bool` 

344 If `True`, this transaction block needs to be able to interrupt 

345 any existing one in order to yield correct behavior. 

346 """ 

347 assert not (interrupting and self._connection.in_transaction()), ( 

348 "Logic error in transaction nesting: an operation that would " 

349 "interrupt the active transaction context has been requested." 

350 ) 

351 if self._connection.in_transaction(): 

352 trans = self._connection.begin_nested() 

353 else: 

354 # Use a regular (non-savepoint) transaction only for the outermost 

355 # context. 

356 trans = self._connection.begin() 

357 try: 

358 yield 

359 trans.commit() 

360 except BaseException: 

361 trans.rollback() 

362 raise 

363 

364 @contextmanager 

365 def declareStaticTables(self, *, create: bool) -> Iterator[StaticTablesContext]: 

366 """Return a context manager in which the database's static DDL schema 

367 can be declared. 

368 

369 Parameters 

370 ---------- 

371 create : `bool` 

372 If `True`, attempt to create all tables at the end of the context. 

373 If `False`, they will be assumed to already exist. 

374 

375 Returns 

376 ------- 

377 schema : `StaticTablesContext` 

378 A helper object that is used to add new tables. 

379 

380 Raises 

381 ------ 

382 ReadOnlyDatabaseError 

383 Raised if ``create`` is `True`, `Database.isWriteable` is `False`, 

384 and one or more declared tables do not already exist. 

385 

386 Examples 

387 -------- 

388 Given a `Database` instance ``db``:: 

389 

390 with db.declareStaticTables(create=True) as schema: 

391 schema.addTable("table1", TableSpec(...)) 

392 schema.addTable("table2", TableSpec(...)) 

393 

394 Notes 

395 ----- 

396 A database's static DDL schema must be declared before any dynamic 

397 tables are managed via calls to `ensureTableExists` or 

398 `getExistingTable`. The order in which static schema tables are added 

399 inside the context block is unimportant; they will automatically be 

400 sorted and added in an order consistent with their foreign key 

401 relationships. 

402 """ 

403 if create and not self.isWriteable(): 

404 raise ReadOnlyDatabaseError(f"Cannot create tables in read-only database {self}.") 

405 self._metadata = sqlalchemy.MetaData(schema=self.namespace) 

406 try: 

407 context = StaticTablesContext(self) 

408 if create and context._tableNames: 

409 # Looks like database is already initalized, to avoid danger 

410 # of modifying/destroying valid schema we refuse to do 

411 # anything in this case 

412 raise SchemaAlreadyDefinedError(f"Cannot create tables in non-empty database {self}.") 

413 yield context 

414 for table, foreignKey in context._foreignKeys: 

415 table.append_constraint(foreignKey) 

416 if create: 

417 if self.namespace is not None: 

418 if self.namespace not in context._inspector.get_schema_names(): 

419 self._connection.execute(sqlalchemy.schema.CreateSchema(self.namespace)) 

420 # In our tables we have columns that make use of sqlalchemy 

421 # Sequence objects. There is currently a bug in sqlalchemy that 

422 # causes a deprecation warning to be thrown on a property of 

423 # the Sequence object when the repr for the sequence is 

424 # created. Here a filter is used to catch these deprecation 

425 # warnings when tables are created. 

426 with warnings.catch_warnings(): 

427 warnings.simplefilter("ignore", category=sqlalchemy.exc.SADeprecationWarning) 

428 self._metadata.create_all(self._connection) 

429 # call all initializer methods sequentially 

430 for init in context._initializers: 

431 init(self) 

432 except BaseException: 

433 self._metadata = None 

434 raise 

435 

436 @abstractmethod 

437 def isWriteable(self) -> bool: 

438 """Return `True` if this database can be modified by this client. 

439 """ 

440 raise NotImplementedError() 

441 

442 @abstractmethod 

443 def __str__(self) -> str: 

444 """Return a human-readable identifier for this `Database`, including 

445 any namespace or schema that identifies its names within a `Registry`. 

446 """ 

447 raise NotImplementedError() 

448 

449 def shrinkDatabaseEntityName(self, original: str) -> str: 

450 """Return a version of the given name that fits within this database 

451 engine's length limits for table, constraint, indexes, and sequence 

452 names. 

453 

454 Implementations should not assume that simple truncation is safe, 

455 because multiple long names often begin with the same prefix. 

456 

457 The default implementation simply returns the given name. 

458 

459 Parameters 

460 ---------- 

461 original : `str` 

462 The original name. 

463 

464 Returns 

465 ------- 

466 shrunk : `str` 

467 The new, possibly shortened name. 

468 """ 

469 return original 

470 

471 def expandDatabaseEntityName(self, shrunk: str) -> str: 

472 """Retrieve the original name for a database entity that was too long 

473 to fit within the database engine's limits. 

474 

475 Parameters 

476 ---------- 

477 original : `str` 

478 The original name. 

479 

480 Returns 

481 ------- 

482 shrunk : `str` 

483 The new, possibly shortened name. 

484 """ 

485 return shrunk 

486 

487 def _mangleTableName(self, name: str) -> str: 

488 """Map a logical, user-visible table name to the true table name used 

489 in the database. 

490 

491 The default implementation returns the given name unchanged. 

492 

493 Parameters 

494 ---------- 

495 name : `str` 

496 Input table name. Should not include a namespace (i.e. schema) 

497 prefix. 

498 

499 Returns 

500 ------- 

501 mangled : `str` 

502 Mangled version of the table name (still with no namespace prefix). 

503 

504 Notes 

505 ----- 

506 Reimplementations of this method must be idempotent - mangling an 

507 already-mangled name must have no effect. 

508 """ 

509 return name 

510 

511 def _makeColumnConstraints(self, table: str, spec: ddl.FieldSpec) -> List[sqlalchemy.CheckConstraint]: 

512 """Create constraints based on this spec. 

513 

514 Parameters 

515 ---------- 

516 table : `str` 

517 Name of the table this column is being added to. 

518 spec : `FieldSpec` 

519 Specification for the field to be added. 

520 

521 Returns 

522 ------- 

523 constraint : `list` of `sqlalchemy.CheckConstraint` 

524 Constraint added for this column. 

525 """ 

526 # By default we return no additional constraints 

527 return [] 

528 

529 def _convertFieldSpec(self, table: str, spec: ddl.FieldSpec, metadata: sqlalchemy.MetaData, 

530 **kwds: Any) -> sqlalchemy.schema.Column: 

531 """Convert a `FieldSpec` to a `sqlalchemy.schema.Column`. 

532 

533 Parameters 

534 ---------- 

535 table : `str` 

536 Name of the table this column is being added to. 

537 spec : `FieldSpec` 

538 Specification for the field to be added. 

539 metadata : `sqlalchemy.MetaData` 

540 SQLAlchemy representation of the DDL schema this field's table is 

541 being added to. 

542 **kwds 

543 Additional keyword arguments to forward to the 

544 `sqlalchemy.schema.Column` constructor. This is provided to make 

545 it easier for derived classes to delegate to ``super()`` while 

546 making only minor changes. 

547 

548 Returns 

549 ------- 

550 column : `sqlalchemy.schema.Column` 

551 SQLAlchemy representation of the field. 

552 """ 

553 args = [spec.name, spec.getSizedColumnType()] 

554 if spec.autoincrement: 

555 # Generate a sequence to use for auto incrementing for databases 

556 # that do not support it natively. This will be ignored by 

557 # sqlalchemy for databases that do support it. 

558 args.append(sqlalchemy.Sequence(self.shrinkDatabaseEntityName(f"{table}_seq_{spec.name}"), 

559 metadata=metadata)) 

560 assert spec.doc is None or isinstance(spec.doc, str), f"Bad doc for {table}.{spec.name}." 

561 return sqlalchemy.schema.Column(*args, nullable=spec.nullable, primary_key=spec.primaryKey, 

562 comment=spec.doc, **kwds) 

563 

564 def _convertForeignKeySpec(self, table: str, spec: ddl.ForeignKeySpec, metadata: sqlalchemy.MetaData, 

565 **kwds: Any) -> sqlalchemy.schema.ForeignKeyConstraint: 

566 """Convert a `ForeignKeySpec` to a 

567 `sqlalchemy.schema.ForeignKeyConstraint`. 

568 

569 Parameters 

570 ---------- 

571 table : `str` 

572 Name of the table this foreign key is being added to. 

573 spec : `ForeignKeySpec` 

574 Specification for the foreign key to be added. 

575 metadata : `sqlalchemy.MetaData` 

576 SQLAlchemy representation of the DDL schema this constraint is 

577 being added to. 

578 **kwds 

579 Additional keyword arguments to forward to the 

580 `sqlalchemy.schema.ForeignKeyConstraint` constructor. This is 

581 provided to make it easier for derived classes to delegate to 

582 ``super()`` while making only minor changes. 

583 

584 Returns 

585 ------- 

586 constraint : `sqlalchemy.schema.ForeignKeyConstraint` 

587 SQLAlchemy representation of the constraint. 

588 """ 

589 name = self.shrinkDatabaseEntityName( 

590 "_".join(["fkey", table, self._mangleTableName(spec.table)] 

591 + list(spec.target) + list(spec.source)) 

592 ) 

593 return sqlalchemy.schema.ForeignKeyConstraint( 

594 spec.source, 

595 [f"{self._mangleTableName(spec.table)}.{col}" for col in spec.target], 

596 name=name, 

597 ondelete=spec.onDelete 

598 ) 

599 

600 def _convertTableSpec(self, name: str, spec: ddl.TableSpec, metadata: sqlalchemy.MetaData, 

601 **kwds: Any) -> sqlalchemy.schema.Table: 

602 """Convert a `TableSpec` to a `sqlalchemy.schema.Table`. 

603 

604 Parameters 

605 ---------- 

606 spec : `TableSpec` 

607 Specification for the foreign key to be added. 

608 metadata : `sqlalchemy.MetaData` 

609 SQLAlchemy representation of the DDL schema this table is being 

610 added to. 

611 **kwds 

612 Additional keyword arguments to forward to the 

613 `sqlalchemy.schema.Table` constructor. This is provided to make it 

614 easier for derived classes to delegate to ``super()`` while making 

615 only minor changes. 

616 

617 Returns 

618 ------- 

619 table : `sqlalchemy.schema.Table` 

620 SQLAlchemy representation of the table. 

621 

622 Notes 

623 ----- 

624 This method does not handle ``spec.foreignKeys`` at all, in order to 

625 avoid circular dependencies. These are added by higher-level logic in 

626 `ensureTableExists`, `getExistingTable`, and `declareStaticTables`. 

627 """ 

628 name = self._mangleTableName(name) 

629 args = [self._convertFieldSpec(name, fieldSpec, metadata) for fieldSpec in spec.fields] 

630 

631 # Add any column constraints 

632 for fieldSpec in spec.fields: 

633 args.extend(self._makeColumnConstraints(name, fieldSpec)) 

634 

635 # Track indexes added for primary key and unique constraints, to make 

636 # sure we don't add duplicate explicit or foreign key indexes for 

637 # those. 

638 allIndexes = {tuple(fieldSpec.name for fieldSpec in spec.fields if fieldSpec.primaryKey)} 

639 args.extend( 

640 sqlalchemy.schema.UniqueConstraint( 

641 *columns, 

642 name=self.shrinkDatabaseEntityName("_".join([name, "unq"] + list(columns))) 

643 ) 

644 for columns in spec.unique 

645 ) 

646 allIndexes.update(spec.unique) 

647 args.extend( 

648 sqlalchemy.schema.Index( 

649 self.shrinkDatabaseEntityName("_".join([name, "idx"] + list(columns))), 

650 *columns, 

651 unique=(columns in spec.unique) 

652 ) 

653 for columns in spec.indexes if columns not in allIndexes 

654 ) 

655 allIndexes.update(spec.indexes) 

656 args.extend( 

657 sqlalchemy.schema.Index( 

658 self.shrinkDatabaseEntityName("_".join((name, "fkidx") + fk.source)), 

659 *fk.source, 

660 ) 

661 for fk in spec.foreignKeys if fk.addIndex and fk.source not in allIndexes 

662 ) 

663 assert spec.doc is None or isinstance(spec.doc, str), f"Bad doc for {name}." 

664 return sqlalchemy.schema.Table(name, metadata, *args, comment=spec.doc, info=spec, **kwds) 

665 

666 def ensureTableExists(self, name: str, spec: ddl.TableSpec) -> sqlalchemy.schema.Table: 

667 """Ensure that a table with the given name and specification exists, 

668 creating it if necessary. 

669 

670 Parameters 

671 ---------- 

672 name : `str` 

673 Name of the table (not including namespace qualifiers). 

674 spec : `TableSpec` 

675 Specification for the table. This will be used when creating the 

676 table, and *may* be used when obtaining an existing table to check 

677 for consistency, but no such check is guaranteed. 

678 

679 Returns 

680 ------- 

681 table : `sqlalchemy.schema.Table` 

682 SQLAlchemy representation of the table. 

683 

684 Raises 

685 ------ 

686 ReadOnlyDatabaseError 

687 Raised if `isWriteable` returns `False`, and the table does not 

688 already exist. 

689 DatabaseConflictError 

690 Raised if the table exists but ``spec`` is inconsistent with its 

691 definition. 

692 

693 Notes 

694 ----- 

695 This method may not be called within transactions. It may be called on 

696 read-only databases if and only if the table does in fact already 

697 exist. 

698 

699 Subclasses may override this method, but usually should not need to. 

700 """ 

701 assert not self._connection.in_transaction(), "Table creation interrupts transactions." 

702 assert self._metadata is not None, "Static tables must be declared before dynamic tables." 

703 table = self.getExistingTable(name, spec) 

704 if table is not None: 

705 return table 

706 if not self.isWriteable(): 

707 raise ReadOnlyDatabaseError( 

708 f"Table {name} does not exist, and cannot be created " 

709 f"because database {self} is read-only." 

710 ) 

711 table = self._convertTableSpec(name, spec, self._metadata) 

712 for foreignKeySpec in spec.foreignKeys: 

713 table.append_constraint(self._convertForeignKeySpec(name, foreignKeySpec, self._metadata)) 

714 table.create(self._connection) 

715 return table 

716 

717 def getExistingTable(self, name: str, spec: ddl.TableSpec) -> Optional[sqlalchemy.schema.Table]: 

718 """Obtain an existing table with the given name and specification. 

719 

720 Parameters 

721 ---------- 

722 name : `str` 

723 Name of the table (not including namespace qualifiers). 

724 spec : `TableSpec` 

725 Specification for the table. This will be used when creating the 

726 SQLAlchemy representation of the table, and it is used to 

727 check that the actual table in the database is consistent. 

728 

729 Returns 

730 ------- 

731 table : `sqlalchemy.schema.Table` or `None` 

732 SQLAlchemy representation of the table, or `None` if it does not 

733 exist. 

734 

735 Raises 

736 ------ 

737 DatabaseConflictError 

738 Raised if the table exists but ``spec`` is inconsistent with its 

739 definition. 

740 

741 Notes 

742 ----- 

743 This method can be called within transactions and never modifies the 

744 database. 

745 

746 Subclasses may override this method, but usually should not need to. 

747 """ 

748 assert self._metadata is not None, "Static tables must be declared before dynamic tables." 

749 name = self._mangleTableName(name) 

750 table = self._metadata.tables.get(name if self.namespace is None else f"{self.namespace}.{name}") 

751 if table is not None: 

752 if spec.fields.names != set(table.columns.keys()): 

753 raise DatabaseConflictError(f"Table '{name}' has already been defined differently; the new " 

754 f"specification has columns {list(spec.fields.names)}, while " 

755 f"the previous definition has {list(table.columns.keys())}.") 

756 else: 

757 inspector = sqlalchemy.engine.reflection.Inspector(self._connection) 

758 if name in inspector.get_table_names(schema=self.namespace): 

759 _checkExistingTableDefinition(name, spec, inspector.get_columns(name, schema=self.namespace)) 

760 table = self._convertTableSpec(name, spec, self._metadata) 

761 for foreignKeySpec in spec.foreignKeys: 

762 table.append_constraint(self._convertForeignKeySpec(name, foreignKeySpec, self._metadata)) 

763 return table 

764 return table 

765 

766 def makeTemporaryTable(self, spec: ddl.TableSpec, name: Optional[str] = None) -> sqlalchemy.schema.Table: 

767 """Create a temporary table. 

768 

769 Parameters 

770 ---------- 

771 spec : `TableSpec` 

772 Specification for the table. 

773 name : `str`, optional 

774 A unique (within this session/connetion) name for the table. 

775 Subclasses may override to modify the actual name used. If not 

776 provided, a unique name will be generated. 

777 

778 Returns 

779 ------- 

780 table : `sqlalchemy.schema.Table` 

781 SQLAlchemy representation of the table. 

782 

783 Notes 

784 ----- 

785 Temporary tables may be created, dropped, and written to even in 

786 read-only databases - at least according to the Python-level 

787 protections in the `Database` classes. Server permissions may say 

788 otherwise, but in that case they probably need to be modified to 

789 support the full range of expected read-only butler behavior. 

790 

791 Temporary table rows are guaranteed to be dropped when a connection is 

792 closed. `Database` implementations are permitted to allow the table to 

793 remain as long as this is transparent to the user (i.e. "creating" the 

794 temporary table in a new session should not be an error, even if it 

795 does nothing). 

796 

797 It may not be possible to use temporary tables within transactions with 

798 some database engines (or configurations thereof). 

799 """ 

800 if name is None: 

801 name = f"tmp_{uuid.uuid4().hex}" 

802 table = self._convertTableSpec(name, spec, self._metadata, prefixes=['TEMPORARY'], 

803 schema=sqlalchemy.schema.BLANK_SCHEMA) 

804 if table.key in self._tempTables: 

805 if table.key != name: 

806 raise ValueError(f"A temporary table with name {name} (transformed to {table.key} by " 

807 f"Database) already exists.") 

808 for foreignKeySpec in spec.foreignKeys: 

809 table.append_constraint(self._convertForeignKeySpec(name, foreignKeySpec, self._metadata)) 

810 table.create(self._connection) 

811 self._tempTables.add(table.key) 

812 return table 

813 

814 def dropTemporaryTable(self, table: sqlalchemy.schema.Table) -> None: 

815 """Drop a temporary table. 

816 

817 Parameters 

818 ---------- 

819 table : `sqlalchemy.schema.Table` 

820 A SQLAlchemy object returned by a previous call to 

821 `makeTemporaryTable`. 

822 """ 

823 if table.key in self._tempTables: 

824 table.drop(self._connection) 

825 self._tempTables.remove(table.key) 

826 else: 

827 raise TypeError(f"Table {table.key} was not created by makeTemporaryTable.") 

828 

829 def sync(self, table: sqlalchemy.schema.Table, *, 

830 keys: Dict[str, Any], 

831 compared: Optional[Dict[str, Any]] = None, 

832 extra: Optional[Dict[str, Any]] = None, 

833 returning: Optional[Sequence[str]] = None, 

834 ) -> Tuple[Optional[Dict[str, Any]], bool]: 

835 """Insert into a table as necessary to ensure database contains 

836 values equivalent to the given ones. 

837 

838 Parameters 

839 ---------- 

840 table : `sqlalchemy.schema.Table` 

841 Table to be queried and possibly inserted into. 

842 keys : `dict` 

843 Column name-value pairs used to search for an existing row; must 

844 be a combination that can be used to select a single row if one 

845 exists. If such a row does not exist, these values are used in 

846 the insert. 

847 compared : `dict`, optional 

848 Column name-value pairs that are compared to those in any existing 

849 row. If such a row does not exist, these rows are used in the 

850 insert. 

851 extra : `dict`, optional 

852 Column name-value pairs that are ignored if a matching row exists, 

853 but used in an insert if one is necessary. 

854 returning : `~collections.abc.Sequence` of `str`, optional 

855 The names of columns whose values should be returned. 

856 

857 Returns 

858 ------- 

859 row : `dict`, optional 

860 The value of the fields indicated by ``returning``, or `None` if 

861 ``returning`` is `None`. 

862 inserted : `bool` 

863 If `True`, a new row was inserted. 

864 

865 Raises 

866 ------ 

867 DatabaseConflictError 

868 Raised if the values in ``compared`` do not match the values in the 

869 database. 

870 ReadOnlyDatabaseError 

871 Raised if `isWriteable` returns `False`, and no matching record 

872 already exists. 

873 

874 Notes 

875 ----- 

876 This method may not be called within transactions. It may be called on 

877 read-only databases if and only if the matching row does in fact 

878 already exist. 

879 """ 

880 

881 def check() -> Tuple[int, Optional[List[str]], Optional[List]]: 

882 """Query for a row that matches the ``key`` argument, and compare 

883 to what was given by the caller. 

884 

885 Returns 

886 ------- 

887 n : `int` 

888 Number of matching rows. ``n != 1`` is always an error, but 

889 it's a different kind of error depending on where `check` is 

890 being called. 

891 bad : `list` of `str`, or `None` 

892 The subset of the keys of ``compared`` for which the existing 

893 values did not match the given one. Once again, ``not bad`` 

894 is always an error, but a different kind on context. `None` 

895 if ``n != 1`` 

896 result : `list` or `None` 

897 Results in the database that correspond to the columns given 

898 in ``returning``, or `None` if ``returning is None``. 

899 """ 

900 toSelect: Set[str] = set() 

901 if compared is not None: 

902 toSelect.update(compared.keys()) 

903 if returning is not None: 

904 toSelect.update(returning) 

905 if not toSelect: 

906 # Need to select some column, even if we just want to see 

907 # how many rows we get back. 

908 toSelect.add(next(iter(keys.keys()))) 

909 selectSql = sqlalchemy.sql.select( 

910 [table.columns[k].label(k) for k in toSelect] 

911 ).select_from(table).where( 

912 sqlalchemy.sql.and_(*[table.columns[k] == v for k, v in keys.items()]) 

913 ) 

914 fetched = list(self._connection.execute(selectSql).fetchall()) 

915 if len(fetched) != 1: 

916 return len(fetched), None, None 

917 existing = fetched[0] 

918 if compared is not None: 

919 

920 def safeNotEqual(a: Any, b: Any) -> bool: 

921 if isinstance(a, astropy.time.Time): 

922 return not time_utils.times_equal(a, b) 

923 return a != b 

924 

925 inconsistencies = [f"{k}: {existing[k]!r} != {v!r}" 

926 for k, v in compared.items() 

927 if safeNotEqual(existing[k], v)] 

928 else: 

929 inconsistencies = [] 

930 if returning is not None: 

931 toReturn: Optional[list] = [existing[k] for k in returning] 

932 else: 

933 toReturn = None 

934 return 1, inconsistencies, toReturn 

935 

936 if self.isWriteable() or table.key in self._tempTables: 

937 # Database is writeable. Try an insert first, but allow it to fail 

938 # (in only specific ways). 

939 row = keys.copy() 

940 if compared is not None: 

941 row.update(compared) 

942 if extra is not None: 

943 row.update(extra) 

944 insertSql = table.insert().values(row) 

945 try: 

946 with self.transaction(interrupting=True): 

947 self._connection.execute(insertSql) 

948 # Need to perform check() for this branch inside the 

949 # transaction, so we roll back an insert that didn't do 

950 # what we expected. That limits the extent to which we 

951 # can reduce duplication between this block and the other 

952 # ones that perform similar logic. 

953 n, bad, result = check() 

954 if n < 1: 

955 raise RuntimeError("Insertion in sync did not seem to affect table. This is a bug.") 

956 elif n > 1: 

957 raise RuntimeError(f"Keys passed to sync {keys.keys()} do not comprise a " 

958 f"unique constraint for table {table.name}.") 

959 elif bad: 

960 raise RuntimeError( 

961 f"Conflict ({bad}) in sync after successful insert; this is " 

962 f"possible if the same table is being updated by a concurrent " 

963 f"process that isn't using sync, but it may also be a bug in " 

964 f"daf_butler." 

965 ) 

966 # No exceptions, so it looks like we inserted the requested row 

967 # successfully. 

968 inserted = True 

969 except sqlalchemy.exc.IntegrityError as err: 

970 # Most likely cause is that an equivalent row already exists, 

971 # but it could also be some other constraint. Query for the 

972 # row we think we matched to resolve that question. 

973 n, bad, result = check() 

974 if n < 1: 

975 # There was no matched row; insertion failed for some 

976 # completely different reason. Just re-raise the original 

977 # IntegrityError. 

978 raise 

979 elif n > 2: 

980 # There were multiple matched rows, which means we 

981 # conflicted *and* the arguments were bad to begin with. 

982 raise RuntimeError(f"Keys passed to sync {keys.keys()} do not comprise a " 

983 f"unique constraint for table {table.name}.") from err 

984 elif bad: 

985 # No logic bug, but data conflicted on the keys given. 

986 raise DatabaseConflictError(f"Conflict in sync for table " 

987 f"{table.name} on column(s) {bad}.") from err 

988 # The desired row is already present and consistent with what 

989 # we tried to insert. 

990 inserted = False 

991 else: 

992 assert not self._connection.in_transaction(), ( 

993 "Calling sync within a transaction block is an error even " 

994 "on a read-only database." 

995 ) 

996 # Database is not writeable; just see if the row exists. 

997 n, bad, result = check() 

998 if n < 1: 

999 raise ReadOnlyDatabaseError("sync needs to insert, but database is read-only.") 

1000 elif n > 1: 

1001 raise RuntimeError("Keys passed to sync do not comprise a unique constraint.") 

1002 elif bad: 

1003 raise DatabaseConflictError(f"Conflict in sync on column(s) {bad}.") 

1004 inserted = False 

1005 if returning is None: 

1006 return None, inserted 

1007 else: 

1008 assert result is not None 

1009 return {k: v for k, v in zip(returning, result)}, inserted 

1010 

1011 def insert(self, table: sqlalchemy.schema.Table, *rows: dict, returnIds: bool = False, 

1012 select: Optional[sqlalchemy.sql.Select] = None, 

1013 names: Optional[Iterable[str]] = None, 

1014 ) -> Optional[List[int]]: 

1015 """Insert one or more rows into a table, optionally returning 

1016 autoincrement primary key values. 

1017 

1018 Parameters 

1019 ---------- 

1020 table : `sqlalchemy.schema.Table` 

1021 Table rows should be inserted into. 

1022 returnIds: `bool` 

1023 If `True` (`False` is default), return the values of the table's 

1024 autoincrement primary key field (which much exist). 

1025 select : `sqlalchemy.sql.Select`, optional 

1026 A SELECT query expression to insert rows from. Cannot be provided 

1027 with either ``rows`` or ``returnIds=True``. 

1028 names : `Iterable` [ `str` ], optional 

1029 Names of columns in ``table`` to be populated, ordered to match the 

1030 columns returned by ``select``. Ignored if ``select`` is `None`. 

1031 If not provided, the columns returned by ``select`` must be named 

1032 to match the desired columns of ``table``. 

1033 *rows 

1034 Positional arguments are the rows to be inserted, as dictionaries 

1035 mapping column name to value. The keys in all dictionaries must 

1036 be the same. 

1037 

1038 Returns 

1039 ------- 

1040 ids : `None`, or `list` of `int` 

1041 If ``returnIds`` is `True`, a `list` containing the inserted 

1042 values for the table's autoincrement primary key. 

1043 

1044 Raises 

1045 ------ 

1046 ReadOnlyDatabaseError 

1047 Raised if `isWriteable` returns `False` when this method is called. 

1048 

1049 Notes 

1050 ----- 

1051 The default implementation uses bulk insert syntax when ``returnIds`` 

1052 is `False`, and a loop over single-row insert operations when it is 

1053 `True`. 

1054 

1055 Derived classes should reimplement when they can provide a more 

1056 efficient implementation (especially for the latter case). 

1057 

1058 May be used inside transaction contexts, so implementations may not 

1059 perform operations that interrupt transactions. 

1060 """ 

1061 if not (self.isWriteable() or table.key in self._tempTables): 

1062 raise ReadOnlyDatabaseError(f"Attempt to insert into read-only database '{self}'.") 

1063 if select is not None and (rows or returnIds): 

1064 raise TypeError("'select' is incompatible with passing value rows or returnIds=True.") 

1065 if not rows and select is None: 

1066 if returnIds: 

1067 return [] 

1068 else: 

1069 return None 

1070 if not returnIds: 

1071 if select is not None: 

1072 if names is None: 

1073 names = select.columns.keys() 

1074 self._connection.execute(table.insert().from_select(names, select)) 

1075 else: 

1076 self._connection.execute(table.insert(), *rows) 

1077 return None 

1078 else: 

1079 sql = table.insert() 

1080 return [self._connection.execute(sql, row).inserted_primary_key[0] for row in rows] 

1081 

1082 @abstractmethod 

1083 def replace(self, table: sqlalchemy.schema.Table, *rows: dict) -> None: 

1084 """Insert one or more rows into a table, replacing any existing rows 

1085 for which insertion of a new row would violate the primary key 

1086 constraint. 

1087 

1088 Parameters 

1089 ---------- 

1090 table : `sqlalchemy.schema.Table` 

1091 Table rows should be inserted into. 

1092 *rows 

1093 Positional arguments are the rows to be inserted, as dictionaries 

1094 mapping column name to value. The keys in all dictionaries must 

1095 be the same. 

1096 

1097 Raises 

1098 ------ 

1099 ReadOnlyDatabaseError 

1100 Raised if `isWriteable` returns `False` when this method is called. 

1101 

1102 Notes 

1103 ----- 

1104 May be used inside transaction contexts, so implementations may not 

1105 perform operations that interrupt transactions. 

1106 

1107 Implementations should raise a `sqlalchemy.exc.IntegrityError` 

1108 exception when a constraint other than the primary key would be 

1109 violated. 

1110 

1111 Implementations are not required to support `replace` on tables 

1112 with autoincrement keys. 

1113 """ 

1114 raise NotImplementedError() 

1115 

1116 def delete(self, table: sqlalchemy.schema.Table, columns: Iterable[str], *rows: dict) -> int: 

1117 """Delete one or more rows from a table. 

1118 

1119 Parameters 

1120 ---------- 

1121 table : `sqlalchemy.schema.Table` 

1122 Table that rows should be deleted from. 

1123 columns: `~collections.abc.Iterable` of `str` 

1124 The names of columns that will be used to constrain the rows to 

1125 be deleted; these will be combined via ``AND`` to form the 

1126 ``WHERE`` clause of the delete query. 

1127 *rows 

1128 Positional arguments are the keys of rows to be deleted, as 

1129 dictionaries mapping column name to value. The keys in all 

1130 dictionaries must exactly the names in ``columns``. 

1131 

1132 Returns 

1133 ------- 

1134 count : `int` 

1135 Number of rows deleted. 

1136 

1137 Raises 

1138 ------ 

1139 ReadOnlyDatabaseError 

1140 Raised if `isWriteable` returns `False` when this method is called. 

1141 

1142 Notes 

1143 ----- 

1144 May be used inside transaction contexts, so implementations may not 

1145 perform operations that interrupt transactions. 

1146 

1147 The default implementation should be sufficient for most derived 

1148 classes. 

1149 """ 

1150 if not (self.isWriteable() or table.key in self._tempTables): 

1151 raise ReadOnlyDatabaseError(f"Attempt to delete from read-only database '{self}'.") 

1152 if columns and not rows: 

1153 # If there are no columns, this operation is supposed to delete 

1154 # everything (so we proceed as usual). But if there are columns, 

1155 # but no rows, it was a constrained bulk operation where the 

1156 # constraint is that no rows match, and we should short-circuit 

1157 # while reporting that no rows were affected. 

1158 return 0 

1159 sql = table.delete() 

1160 whereTerms = [table.columns[name] == sqlalchemy.sql.bindparam(name) for name in columns] 

1161 if whereTerms: 

1162 sql = sql.where(sqlalchemy.sql.and_(*whereTerms)) 

1163 return self._connection.execute(sql, *rows).rowcount 

1164 

1165 def update(self, table: sqlalchemy.schema.Table, where: Dict[str, str], *rows: dict) -> int: 

1166 """Update one or more rows in a table. 

1167 

1168 Parameters 

1169 ---------- 

1170 table : `sqlalchemy.schema.Table` 

1171 Table containing the rows to be updated. 

1172 where : `dict` [`str`, `str`] 

1173 A mapping from the names of columns that will be used to search for 

1174 existing rows to the keys that will hold these values in the 

1175 ``rows`` dictionaries. Note that these may not be the same due to 

1176 SQLAlchemy limitations. 

1177 *rows 

1178 Positional arguments are the rows to be updated. The keys in all 

1179 dictionaries must be the same, and may correspond to either a 

1180 value in the ``where`` dictionary or the name of a column to be 

1181 updated. 

1182 

1183 Returns 

1184 ------- 

1185 count : `int` 

1186 Number of rows matched (regardless of whether the update actually 

1187 modified them). 

1188 

1189 Raises 

1190 ------ 

1191 ReadOnlyDatabaseError 

1192 Raised if `isWriteable` returns `False` when this method is called. 

1193 

1194 Notes 

1195 ----- 

1196 May be used inside transaction contexts, so implementations may not 

1197 perform operations that interrupt transactions. 

1198 

1199 The default implementation should be sufficient for most derived 

1200 classes. 

1201 """ 

1202 if not (self.isWriteable() or table.key in self._tempTables): 

1203 raise ReadOnlyDatabaseError(f"Attempt to update read-only database '{self}'.") 

1204 if not rows: 

1205 return 0 

1206 sql = table.update().where( 

1207 sqlalchemy.sql.and_(*[table.columns[k] == sqlalchemy.sql.bindparam(v) for k, v in where.items()]) 

1208 ) 

1209 return self._connection.execute(sql, *rows).rowcount 

1210 

1211 def query(self, sql: sqlalchemy.sql.FromClause, 

1212 *args: Any, **kwds: Any) -> sqlalchemy.engine.ResultProxy: 

1213 """Run a SELECT query against the database. 

1214 

1215 Parameters 

1216 ---------- 

1217 sql : `sqlalchemy.sql.FromClause` 

1218 A SQLAlchemy representation of a ``SELECT`` query. 

1219 *args 

1220 Additional positional arguments are forwarded to 

1221 `sqlalchemy.engine.Connection.execute`. 

1222 **kwds 

1223 Additional keyword arguments are forwarded to 

1224 `sqlalchemy.engine.Connection.execute`. 

1225 

1226 Returns 

1227 ------- 

1228 result : `sqlalchemy.engine.ResultProxy` 

1229 Query results. 

1230 

1231 Notes 

1232 ----- 

1233 The default implementation should be sufficient for most derived 

1234 classes. 

1235 """ 

1236 # TODO: should we guard against non-SELECT queries here? 

1237 return self._connection.execute(sql, *args, **kwds) 

1238 

1239 origin: int 

1240 """An integer ID that should be used as the default for any datasets, 

1241 quanta, or other entities that use a (autoincrement, origin) compound 

1242 primary key (`int`). 

1243 """ 

1244 

1245 namespace: Optional[str] 

1246 """The schema or namespace this database instance is associated with 

1247 (`str` or `None`). 

1248 """