Coverage for python/lsst/daf/butler/registry/interfaces/_database.py: 14%
418 statements
« prev ^ index » next coverage.py v6.5.0, created at 2022-10-12 09:01 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2022-10-12 09:01 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = [
24 "Database",
25 "ReadOnlyDatabaseError",
26 "DatabaseConflictError",
27 "SchemaAlreadyDefinedError",
28 "Session",
29 "StaticTablesContext",
30]
32import uuid
33import warnings
34from abc import ABC, abstractmethod
35from collections import defaultdict
36from contextlib import contextmanager
37from typing import Any, Callable, Dict, Iterable, Iterator, List, Optional, Sequence, Set, Tuple, Type, Union
39import astropy.time
40import sqlalchemy
42from ...core import TimespanDatabaseRepresentation, ddl, time_utils
43from ...core.named import NamedValueAbstractSet
44from .._exceptions import ConflictingDefinitionError
46_IN_SAVEPOINT_TRANSACTION = "IN_SAVEPOINT_TRANSACTION"
49def _checkExistingTableDefinition(name: str, spec: ddl.TableSpec, inspection: List[Dict[str, Any]]) -> None:
50 """Test that the definition of a table in a `ddl.TableSpec` and from
51 database introspection are consistent.
53 Parameters
54 ----------
55 name : `str`
56 Name of the table (only used in error messages).
57 spec : `ddl.TableSpec`
58 Specification of the table.
59 inspection : `dict`
60 Dictionary returned by
61 `sqlalchemy.engine.reflection.Inspector.get_columns`.
63 Raises
64 ------
65 DatabaseConflictError
66 Raised if the definitions are inconsistent.
67 """
68 columnNames = [c["name"] for c in inspection]
69 if spec.fields.names != set(columnNames):
70 raise DatabaseConflictError(
71 f"Table '{name}' exists but is defined differently in the database; "
72 f"specification has columns {list(spec.fields.names)}, while the "
73 f"table in the database has {columnNames}."
74 )
77class ReadOnlyDatabaseError(RuntimeError):
78 """Exception raised when a write operation is called on a read-only
79 `Database`.
80 """
83class DatabaseConflictError(ConflictingDefinitionError):
84 """Exception raised when database content (row values or schema entities)
85 are inconsistent with what this client expects.
86 """
89class SchemaAlreadyDefinedError(RuntimeError):
90 """Exception raised when trying to initialize database schema when some
91 tables already exist.
92 """
95class StaticTablesContext:
96 """Helper class used to declare the static schema for a registry layer
97 in a database.
99 An instance of this class is returned by `Database.declareStaticTables`,
100 which should be the only way it should be constructed.
101 """
103 def __init__(self, db: Database):
104 self._db = db
105 self._foreignKeys: List[Tuple[sqlalchemy.schema.Table, sqlalchemy.schema.ForeignKeyConstraint]] = []
106 self._inspector = sqlalchemy.inspect(self._db._engine)
107 self._tableNames = frozenset(self._inspector.get_table_names(schema=self._db.namespace))
108 self._initializers: List[Callable[[Database], None]] = []
110 def addTable(self, name: str, spec: ddl.TableSpec) -> sqlalchemy.schema.Table:
111 """Add a new table to the schema, returning its sqlalchemy
112 representation.
114 The new table may not actually be created until the end of the
115 context created by `Database.declareStaticTables`, allowing tables
116 to be declared in any order even in the presence of foreign key
117 relationships.
118 """
119 name = self._db._mangleTableName(name)
120 if name in self._tableNames:
121 _checkExistingTableDefinition(
122 name, spec, self._inspector.get_columns(name, schema=self._db.namespace)
123 )
124 metadata = self._db._metadata
125 assert metadata is not None, "Guaranteed by context manager that returns this object."
126 table = self._db._convertTableSpec(name, spec, metadata)
127 for foreignKeySpec in spec.foreignKeys:
128 self._foreignKeys.append((table, self._db._convertForeignKeySpec(name, foreignKeySpec, metadata)))
129 return table
131 def addTableTuple(self, specs: Tuple[ddl.TableSpec, ...]) -> Tuple[sqlalchemy.schema.Table, ...]:
132 """Add a named tuple of tables to the schema, returning their
133 SQLAlchemy representations in a named tuple of the same type.
135 The new tables may not actually be created until the end of the
136 context created by `Database.declareStaticTables`, allowing tables
137 to be declared in any order even in the presence of foreign key
138 relationships.
140 Notes
141 -----
142 ``specs`` *must* be an instance of a type created by
143 `collections.namedtuple`, not just regular tuple, and the returned
144 object is guaranteed to be the same. Because `~collections.namedtuple`
145 is just a factory for `type` objects, not an actual type itself,
146 we cannot represent this with type annotations.
147 """
148 return specs._make( # type: ignore
149 self.addTable(name, spec) for name, spec in zip(specs._fields, specs) # type: ignore
150 )
152 def addInitializer(self, initializer: Callable[[Database], None]) -> None:
153 """Add a method that does one-time initialization of a database.
155 Initialization can mean anything that changes state of a database
156 and needs to be done exactly once after database schema was created.
157 An example for that could be population of schema attributes.
159 Parameters
160 ----------
161 initializer : callable
162 Method of a single argument which is a `Database` instance.
163 """
164 self._initializers.append(initializer)
167class Session:
168 """Class representing a persistent connection to a database.
170 Parameters
171 ----------
172 db : `Database`
173 Database instance.
175 Notes
176 -----
177 Instances of Session class should not be created by client code;
178 `Database.session` should be used to create context for a session::
180 with db.session() as session:
181 session.method()
182 db.method()
184 In the current implementation sessions can be nested and transactions can
185 be nested within a session. All nested sessions and transaction share the
186 same database connection.
188 Session class represents a limited subset of database API that requires
189 persistent connection to a database (e.g. temporary tables which have
190 lifetime of a session). Potentially most of the database API could be
191 associated with a Session class.
192 """
194 def __init__(self, db: Database):
195 self._db = db
197 def makeTemporaryTable(self, spec: ddl.TableSpec, name: Optional[str] = None) -> sqlalchemy.schema.Table:
198 """Create a temporary table.
200 Parameters
201 ----------
202 spec : `TableSpec`
203 Specification for the table.
204 name : `str`, optional
205 A unique (within this session/connetion) name for the table.
206 Subclasses may override to modify the actual name used. If not
207 provided, a unique name will be generated.
209 Returns
210 -------
211 table : `sqlalchemy.schema.Table`
212 SQLAlchemy representation of the table.
214 Notes
215 -----
216 Temporary tables may be created, dropped, and written to even in
217 read-only databases - at least according to the Python-level
218 protections in the `Database` classes. Server permissions may say
219 otherwise, but in that case they probably need to be modified to
220 support the full range of expected read-only butler behavior.
222 Temporary table rows are guaranteed to be dropped when a connection is
223 closed. `Database` implementations are permitted to allow the table to
224 remain as long as this is transparent to the user (i.e. "creating" the
225 temporary table in a new session should not be an error, even if it
226 does nothing).
228 It may not be possible to use temporary tables within transactions with
229 some database engines (or configurations thereof).
230 """
231 if name is None:
232 name = f"tmp_{uuid.uuid4().hex}"
233 metadata = self._db._metadata
234 if metadata is None:
235 raise RuntimeError("Cannot create temporary table before static schema is defined.")
236 table = self._db._convertTableSpec(
237 name, spec, metadata, prefixes=["TEMPORARY"], schema=sqlalchemy.schema.BLANK_SCHEMA
238 )
239 if table.key in self._db._tempTables:
240 if table.key != name:
241 raise ValueError(
242 f"A temporary table with name {name} (transformed to {table.key} by "
243 f"Database) already exists."
244 )
245 for foreignKeySpec in spec.foreignKeys:
246 table.append_constraint(self._db._convertForeignKeySpec(name, foreignKeySpec, metadata))
247 with self._db._connection() as connection:
248 table.create(connection)
249 self._db._tempTables.add(table.key)
250 return table
252 def dropTemporaryTable(self, table: sqlalchemy.schema.Table) -> None:
253 """Drop a temporary table.
255 Parameters
256 ----------
257 table : `sqlalchemy.schema.Table`
258 A SQLAlchemy object returned by a previous call to
259 `makeTemporaryTable`.
260 """
261 if table.key in self._db._tempTables:
262 with self._db._connection() as connection:
263 table.drop(connection)
264 self._db._tempTables.remove(table.key)
265 else:
266 raise TypeError(f"Table {table.key} was not created by makeTemporaryTable.")
268 @contextmanager
269 def temporary_table(
270 self, spec: ddl.TableSpec, name: Optional[str] = None
271 ) -> Iterator[sqlalchemy.schema.Table]:
272 """Return a context manager that creates and then drops a context
273 manager.
275 Parameters
276 ----------
277 spec : `ddl.TableSpec`
278 Specification for the columns. Unique and foreign key constraints
279 may be ignored.
280 name : `str`, optional
281 If provided, the name of the SQL construct. If not provided, an
282 opaque but unique identifier is generated.
284 Returns
285 -------
286 table : `sqlalchemy.schema.Table`
287 SQLAlchemy representation of the table.
288 """
289 table = self.makeTemporaryTable(spec=spec, name=name)
290 try:
291 yield table
292 finally:
293 self.dropTemporaryTable(table)
296class Database(ABC):
297 """An abstract interface that represents a particular database engine's
298 representation of a single schema/namespace/database.
300 Parameters
301 ----------
302 origin : `int`
303 An integer ID that should be used as the default for any datasets,
304 quanta, or other entities that use a (autoincrement, origin) compound
305 primary key.
306 engine : `sqlalchemy.engine.Engine`
307 The SQLAlchemy engine for this `Database`.
308 namespace : `str`, optional
309 Name of the schema or namespace this instance is associated with.
310 This is passed as the ``schema`` argument when constructing a
311 `sqlalchemy.schema.MetaData` instance. We use ``namespace`` instead to
312 avoid confusion between "schema means namespace" and "schema means
313 table definitions".
315 Notes
316 -----
317 `Database` requires all write operations to go through its special named
318 methods. Our write patterns are sufficiently simple that we don't really
319 need the full flexibility of SQL insert/update/delete syntax, and we need
320 non-standard (but common) functionality in these operations sufficiently
321 often that it seems worthwhile to provide our own generic API.
323 In contrast, `Database.query` allows arbitrary ``SELECT`` queries (via
324 their SQLAlchemy representation) to be run, as we expect these to require
325 significantly more sophistication while still being limited to standard
326 SQL.
328 `Database` itself has several underscore-prefixed attributes:
330 - ``_engine``: SQLAlchemy object representing its engine.
331 - ``_connection``: method returning a context manager for
332 `sqlalchemy.engine.Connection` object.
333 - ``_metadata``: the `sqlalchemy.schema.MetaData` object representing
334 the tables and other schema entities.
336 These are considered protected (derived classes may access them, but other
337 code should not), and read-only, aside from executing SQL via
338 ``_connection``.
339 """
341 def __init__(self, *, origin: int, engine: sqlalchemy.engine.Engine, namespace: Optional[str] = None):
342 self.origin = origin
343 self.namespace = namespace
344 self._engine = engine
345 self._session_connection: Optional[sqlalchemy.engine.Connection] = None
346 self._metadata: Optional[sqlalchemy.schema.MetaData] = None
347 self._tempTables: Set[str] = set()
349 def __repr__(self) -> str:
350 # Rather than try to reproduce all the parameters used to create
351 # the object, instead report the more useful information of the
352 # connection URL.
353 if self._engine.url.password is not None:
354 uri = str(self._engine.url.set(password="***"))
355 else:
356 uri = str(self._engine.url)
357 if self.namespace:
358 uri += f"#{self.namespace}"
359 return f'{type(self).__name__}("{uri}")'
361 @classmethod
362 def makeDefaultUri(cls, root: str) -> Optional[str]:
363 """Create a default connection URI appropriate for the given root
364 directory, or `None` if there can be no such default.
365 """
366 return None
368 @classmethod
369 def fromUri(
370 cls, uri: str, *, origin: int, namespace: Optional[str] = None, writeable: bool = True
371 ) -> Database:
372 """Construct a database from a SQLAlchemy URI.
374 Parameters
375 ----------
376 uri : `str`
377 A SQLAlchemy URI connection string.
378 origin : `int`
379 An integer ID that should be used as the default for any datasets,
380 quanta, or other entities that use a (autoincrement, origin)
381 compound primary key.
382 namespace : `str`, optional
383 A database namespace (i.e. schema) the new instance should be
384 associated with. If `None` (default), the namespace (if any) is
385 inferred from the URI.
386 writeable : `bool`, optional
387 If `True`, allow write operations on the database, including
388 ``CREATE TABLE``.
390 Returns
391 -------
392 db : `Database`
393 A new `Database` instance.
394 """
395 return cls.fromEngine(
396 cls.makeEngine(uri, writeable=writeable), origin=origin, namespace=namespace, writeable=writeable
397 )
399 @classmethod
400 @abstractmethod
401 def makeEngine(cls, uri: str, *, writeable: bool = True) -> sqlalchemy.engine.Engine:
402 """Create a `sqlalchemy.engine.Engine` from a SQLAlchemy URI.
404 Parameters
405 ----------
406 uri : `str`
407 A SQLAlchemy URI connection string.
408 writeable : `bool`, optional
409 If `True`, allow write operations on the database, including
410 ``CREATE TABLE``.
412 Returns
413 -------
414 engine : `sqlalchemy.engine.Engine`
415 A database engine.
417 Notes
418 -----
419 Subclasses that support other ways to connect to a database are
420 encouraged to add optional arguments to their implementation of this
421 method, as long as they maintain compatibility with the base class
422 call signature.
423 """
424 raise NotImplementedError()
426 @classmethod
427 @abstractmethod
428 def fromEngine(
429 cls,
430 engine: sqlalchemy.engine.Engine,
431 *,
432 origin: int,
433 namespace: Optional[str] = None,
434 writeable: bool = True,
435 ) -> Database:
436 """Create a new `Database` from an existing `sqlalchemy.engine.Engine`.
438 Parameters
439 ----------
440 engine : `sqlalchemy.engine.Engine`
441 The engine for the database. May be shared between `Database`
442 instances.
443 origin : `int`
444 An integer ID that should be used as the default for any datasets,
445 quanta, or other entities that use a (autoincrement, origin)
446 compound primary key.
447 namespace : `str`, optional
448 A different database namespace (i.e. schema) the new instance
449 should be associated with. If `None` (default), the namespace
450 (if any) is inferred from the connection.
451 writeable : `bool`, optional
452 If `True`, allow write operations on the database, including
453 ``CREATE TABLE``.
455 Returns
456 -------
457 db : `Database`
458 A new `Database` instance.
460 Notes
461 -----
462 This method allows different `Database` instances to share the same
463 engine, which is desirable when they represent different namespaces
464 can be queried together.
465 """
466 raise NotImplementedError()
468 @contextmanager
469 def session(self) -> Iterator:
470 """Return a context manager that represents a session (persistent
471 connection to a database).
472 """
473 if self._session_connection is not None:
474 # session already started, just reuse that
475 yield Session(self)
476 else:
477 try:
478 # open new connection and close it when done
479 self._session_connection = self._engine.connect()
480 yield Session(self)
481 finally:
482 if self._session_connection is not None:
483 self._session_connection.close()
484 self._session_connection = None
485 # Temporary tables only live within session
486 self._tempTables = set()
488 @contextmanager
489 def transaction(
490 self,
491 *,
492 interrupting: bool = False,
493 savepoint: bool = False,
494 lock: Iterable[sqlalchemy.schema.Table] = (),
495 ) -> Iterator:
496 """Return a context manager that represents a transaction.
498 Parameters
499 ----------
500 interrupting : `bool`, optional
501 If `True` (`False` is default), this transaction block may not be
502 nested without an outer one, and attempting to do so is a logic
503 (i.e. assertion) error.
504 savepoint : `bool`, optional
505 If `True` (`False` is default), create a `SAVEPOINT`, allowing
506 exceptions raised by the database (e.g. due to constraint
507 violations) during this transaction's context to be caught outside
508 it without also rolling back all operations in an outer transaction
509 block. If `False`, transactions may still be nested, but a
510 rollback may be generated at any level and affects all levels, and
511 commits are deferred until the outermost block completes. If any
512 outer transaction block was created with ``savepoint=True``, all
513 inner blocks will be as well (regardless of the actual value
514 passed). This has no effect if this is the outermost transaction.
515 lock : `Iterable` [ `sqlalchemy.schema.Table` ], optional
516 A list of tables to lock for the duration of this transaction.
517 These locks are guaranteed to prevent concurrent writes and allow
518 this transaction (only) to acquire the same locks (others should
519 block), but only prevent concurrent reads if the database engine
520 requires that in order to block concurrent writes.
522 Notes
523 -----
524 All transactions on a connection managed by one or more `Database`
525 instances _must_ go through this method, or transaction state will not
526 be correctly managed.
527 """
528 # need a connection, use session to manage it
529 with self.session():
530 assert self._session_connection is not None
531 connection = self._session_connection
532 assert not (interrupting and connection.in_transaction()), (
533 "Logic error in transaction nesting: an operation that would "
534 "interrupt the active transaction context has been requested."
535 )
536 # We remember whether we are already in a SAVEPOINT transaction via
537 # the connection object's 'info' dict, which is explicitly for user
538 # information like this. This is safer than a regular `Database`
539 # instance attribute, because it guards against multiple `Database`
540 # instances sharing the same connection. The need to use our own
541 # flag here to track whether we're in a nested transaction should
542 # go away in SQLAlchemy 1.4, which seems to have a
543 # `Connection.in_nested_transaction()` method.
544 savepoint = savepoint or connection.info.get(_IN_SAVEPOINT_TRANSACTION, False)
545 connection.info[_IN_SAVEPOINT_TRANSACTION] = savepoint
546 trans: sqlalchemy.engine.Transaction
547 if connection.in_transaction() and savepoint:
548 trans = connection.begin_nested()
549 elif not connection.in_transaction():
550 # Use a regular (non-savepoint) transaction always for the
551 # outermost context.
552 trans = connection.begin()
553 else:
554 # Nested non-savepoint transactions, don't do anything.
555 trans = None
556 self._lockTables(connection, lock)
557 try:
558 yield
559 if trans is not None:
560 trans.commit()
561 except BaseException:
562 if trans is not None:
563 trans.rollback()
564 raise
565 finally:
566 if not connection.in_transaction():
567 connection.info.pop(_IN_SAVEPOINT_TRANSACTION, None)
569 @contextmanager
570 def _connection(self) -> Iterator[sqlalchemy.engine.Connection]:
571 """Return context manager for Connection."""
572 if self._session_connection is not None:
573 # It means that we are in Session context, but we may not be in
574 # transaction context. Start a short transaction in that case.
575 if self._session_connection.in_transaction():
576 yield self._session_connection
577 else:
578 with self._session_connection.begin():
579 yield self._session_connection
580 else:
581 # Make new connection and transaction, transaction will be
582 # committed on context exit.
583 with self._engine.begin() as connection:
584 yield connection
586 @abstractmethod
587 def _lockTables(
588 self, connection: sqlalchemy.engine.Connection, tables: Iterable[sqlalchemy.schema.Table] = ()
589 ) -> None:
590 """Acquire locks on the given tables.
592 This is an implementation hook for subclasses, called by `transaction`.
593 It should not be called directly by other code.
595 Parameters
596 ----------
597 connection : `sqlalchemy.engine.Connection`
598 Database connection object. It is guaranteed that transaction is
599 already in a progress for this connection.
600 tables : `Iterable` [ `sqlalchemy.schema.Table` ], optional
601 A list of tables to lock for the duration of this transaction.
602 These locks are guaranteed to prevent concurrent writes and allow
603 this transaction (only) to acquire the same locks (others should
604 block), but only prevent concurrent reads if the database engine
605 requires that in order to block concurrent writes.
606 """
607 raise NotImplementedError()
609 def isTableWriteable(self, table: sqlalchemy.schema.Table) -> bool:
610 """Check whether a table is writeable, either because the database
611 connection is read-write or the table is a temporary table.
613 Parameters
614 ----------
615 table : `sqlalchemy.schema.Table`
616 SQLAlchemy table object to check.
618 Returns
619 -------
620 writeable : `bool`
621 Whether this table is writeable.
622 """
623 return self.isWriteable() or table.key in self._tempTables
625 def assertTableWriteable(self, table: sqlalchemy.schema.Table, msg: str) -> None:
626 """Raise if the given table is not writeable, either because the
627 database connection is read-write or the table is a temporary table.
629 Parameters
630 ----------
631 table : `sqlalchemy.schema.Table`
632 SQLAlchemy table object to check.
633 msg : `str`, optional
634 If provided, raise `ReadOnlyDatabaseError` instead of returning
635 `False`, with this message.
636 """
637 if not self.isTableWriteable(table):
638 raise ReadOnlyDatabaseError(msg)
640 @contextmanager
641 def declareStaticTables(self, *, create: bool) -> Iterator[StaticTablesContext]:
642 """Return a context manager in which the database's static DDL schema
643 can be declared.
645 Parameters
646 ----------
647 create : `bool`
648 If `True`, attempt to create all tables at the end of the context.
649 If `False`, they will be assumed to already exist.
651 Returns
652 -------
653 schema : `StaticTablesContext`
654 A helper object that is used to add new tables.
656 Raises
657 ------
658 ReadOnlyDatabaseError
659 Raised if ``create`` is `True`, `Database.isWriteable` is `False`,
660 and one or more declared tables do not already exist.
662 Examples
663 --------
664 Given a `Database` instance ``db``::
666 with db.declareStaticTables(create=True) as schema:
667 schema.addTable("table1", TableSpec(...))
668 schema.addTable("table2", TableSpec(...))
670 Notes
671 -----
672 A database's static DDL schema must be declared before any dynamic
673 tables are managed via calls to `ensureTableExists` or
674 `getExistingTable`. The order in which static schema tables are added
675 inside the context block is unimportant; they will automatically be
676 sorted and added in an order consistent with their foreign key
677 relationships.
678 """
679 if create and not self.isWriteable():
680 raise ReadOnlyDatabaseError(f"Cannot create tables in read-only database {self}.")
681 self._metadata = sqlalchemy.MetaData(schema=self.namespace)
682 try:
683 context = StaticTablesContext(self)
684 if create and context._tableNames:
685 # Looks like database is already initalized, to avoid danger
686 # of modifying/destroying valid schema we refuse to do
687 # anything in this case
688 raise SchemaAlreadyDefinedError(f"Cannot create tables in non-empty database {self}.")
689 yield context
690 for table, foreignKey in context._foreignKeys:
691 table.append_constraint(foreignKey)
692 if create:
693 if self.namespace is not None:
694 if self.namespace not in context._inspector.get_schema_names():
695 with self._connection() as connection:
696 connection.execute(sqlalchemy.schema.CreateSchema(self.namespace))
697 # In our tables we have columns that make use of sqlalchemy
698 # Sequence objects. There is currently a bug in sqlalchemy that
699 # causes a deprecation warning to be thrown on a property of
700 # the Sequence object when the repr for the sequence is
701 # created. Here a filter is used to catch these deprecation
702 # warnings when tables are created.
703 with warnings.catch_warnings():
704 warnings.simplefilter("ignore", category=sqlalchemy.exc.SADeprecationWarning)
705 self._metadata.create_all(self._engine)
706 # call all initializer methods sequentially
707 for init in context._initializers:
708 init(self)
709 except BaseException:
710 self._metadata = None
711 raise
713 @abstractmethod
714 def isWriteable(self) -> bool:
715 """Return `True` if this database can be modified by this client."""
716 raise NotImplementedError()
718 @abstractmethod
719 def __str__(self) -> str:
720 """Return a human-readable identifier for this `Database`, including
721 any namespace or schema that identifies its names within a `Registry`.
722 """
723 raise NotImplementedError()
725 @property
726 def dialect(self) -> sqlalchemy.engine.Dialect:
727 """The SQLAlchemy dialect for this database engine
728 (`sqlalchemy.engine.Dialect`).
729 """
730 return self._engine.dialect
732 def shrinkDatabaseEntityName(self, original: str) -> str:
733 """Return a version of the given name that fits within this database
734 engine's length limits for table, constraint, indexes, and sequence
735 names.
737 Implementations should not assume that simple truncation is safe,
738 because multiple long names often begin with the same prefix.
740 The default implementation simply returns the given name.
742 Parameters
743 ----------
744 original : `str`
745 The original name.
747 Returns
748 -------
749 shrunk : `str`
750 The new, possibly shortened name.
751 """
752 return original
754 def expandDatabaseEntityName(self, shrunk: str) -> str:
755 """Retrieve the original name for a database entity that was too long
756 to fit within the database engine's limits.
758 Parameters
759 ----------
760 original : `str`
761 The original name.
763 Returns
764 -------
765 shrunk : `str`
766 The new, possibly shortened name.
767 """
768 return shrunk
770 def _mangleTableName(self, name: str) -> str:
771 """Map a logical, user-visible table name to the true table name used
772 in the database.
774 The default implementation returns the given name unchanged.
776 Parameters
777 ----------
778 name : `str`
779 Input table name. Should not include a namespace (i.e. schema)
780 prefix.
782 Returns
783 -------
784 mangled : `str`
785 Mangled version of the table name (still with no namespace prefix).
787 Notes
788 -----
789 Reimplementations of this method must be idempotent - mangling an
790 already-mangled name must have no effect.
791 """
792 return name
794 def _makeColumnConstraints(self, table: str, spec: ddl.FieldSpec) -> List[sqlalchemy.CheckConstraint]:
795 """Create constraints based on this spec.
797 Parameters
798 ----------
799 table : `str`
800 Name of the table this column is being added to.
801 spec : `FieldSpec`
802 Specification for the field to be added.
804 Returns
805 -------
806 constraint : `list` of `sqlalchemy.CheckConstraint`
807 Constraint added for this column.
808 """
809 # By default we return no additional constraints
810 return []
812 def _convertFieldSpec(
813 self, table: str, spec: ddl.FieldSpec, metadata: sqlalchemy.MetaData, **kwargs: Any
814 ) -> sqlalchemy.schema.Column:
815 """Convert a `FieldSpec` to a `sqlalchemy.schema.Column`.
817 Parameters
818 ----------
819 table : `str`
820 Name of the table this column is being added to.
821 spec : `FieldSpec`
822 Specification for the field to be added.
823 metadata : `sqlalchemy.MetaData`
824 SQLAlchemy representation of the DDL schema this field's table is
825 being added to.
826 **kwargs
827 Additional keyword arguments to forward to the
828 `sqlalchemy.schema.Column` constructor. This is provided to make
829 it easier for derived classes to delegate to ``super()`` while
830 making only minor changes.
832 Returns
833 -------
834 column : `sqlalchemy.schema.Column`
835 SQLAlchemy representation of the field.
836 """
837 args = [spec.name, spec.getSizedColumnType()]
838 if spec.autoincrement:
839 # Generate a sequence to use for auto incrementing for databases
840 # that do not support it natively. This will be ignored by
841 # sqlalchemy for databases that do support it.
842 args.append(
843 sqlalchemy.Sequence(
844 self.shrinkDatabaseEntityName(f"{table}_seq_{spec.name}"), metadata=metadata
845 )
846 )
847 assert spec.doc is None or isinstance(spec.doc, str), f"Bad doc for {table}.{spec.name}."
848 return sqlalchemy.schema.Column(
849 *args,
850 nullable=spec.nullable,
851 primary_key=spec.primaryKey,
852 comment=spec.doc,
853 server_default=spec.default,
854 **kwargs,
855 )
857 def _convertForeignKeySpec(
858 self, table: str, spec: ddl.ForeignKeySpec, metadata: sqlalchemy.MetaData, **kwargs: Any
859 ) -> sqlalchemy.schema.ForeignKeyConstraint:
860 """Convert a `ForeignKeySpec` to a
861 `sqlalchemy.schema.ForeignKeyConstraint`.
863 Parameters
864 ----------
865 table : `str`
866 Name of the table this foreign key is being added to.
867 spec : `ForeignKeySpec`
868 Specification for the foreign key to be added.
869 metadata : `sqlalchemy.MetaData`
870 SQLAlchemy representation of the DDL schema this constraint is
871 being added to.
872 **kwargs
873 Additional keyword arguments to forward to the
874 `sqlalchemy.schema.ForeignKeyConstraint` constructor. This is
875 provided to make it easier for derived classes to delegate to
876 ``super()`` while making only minor changes.
878 Returns
879 -------
880 constraint : `sqlalchemy.schema.ForeignKeyConstraint`
881 SQLAlchemy representation of the constraint.
882 """
883 name = self.shrinkDatabaseEntityName(
884 "_".join(
885 ["fkey", table, self._mangleTableName(spec.table)] + list(spec.target) + list(spec.source)
886 )
887 )
888 return sqlalchemy.schema.ForeignKeyConstraint(
889 spec.source,
890 [f"{self._mangleTableName(spec.table)}.{col}" for col in spec.target],
891 name=name,
892 ondelete=spec.onDelete,
893 )
895 def _convertExclusionConstraintSpec(
896 self,
897 table: str,
898 spec: Tuple[Union[str, Type[TimespanDatabaseRepresentation]], ...],
899 metadata: sqlalchemy.MetaData,
900 ) -> sqlalchemy.schema.Constraint:
901 """Convert a `tuple` from `ddl.TableSpec.exclusion` into a SQLAlchemy
902 constraint representation.
904 Parameters
905 ----------
906 table : `str`
907 Name of the table this constraint is being added to.
908 spec : `tuple` [ `str` or `type` ]
909 A tuple of `str` column names and the `type` object returned by
910 `getTimespanRepresentation` (which must appear exactly once),
911 indicating the order of the columns in the index used to back the
912 constraint.
913 metadata : `sqlalchemy.MetaData`
914 SQLAlchemy representation of the DDL schema this constraint is
915 being added to.
917 Returns
918 -------
919 constraint : `sqlalchemy.schema.Constraint`
920 SQLAlchemy representation of the constraint.
922 Raises
923 ------
924 NotImplementedError
925 Raised if this database does not support exclusion constraints.
926 """
927 raise NotImplementedError(f"Database {self} does not support exclusion constraints.")
929 def _convertTableSpec(
930 self, name: str, spec: ddl.TableSpec, metadata: sqlalchemy.MetaData, **kwargs: Any
931 ) -> sqlalchemy.schema.Table:
932 """Convert a `TableSpec` to a `sqlalchemy.schema.Table`.
934 Parameters
935 ----------
936 spec : `TableSpec`
937 Specification for the foreign key to be added.
938 metadata : `sqlalchemy.MetaData`
939 SQLAlchemy representation of the DDL schema this table is being
940 added to.
941 **kwargs
942 Additional keyword arguments to forward to the
943 `sqlalchemy.schema.Table` constructor. This is provided to make it
944 easier for derived classes to delegate to ``super()`` while making
945 only minor changes.
947 Returns
948 -------
949 table : `sqlalchemy.schema.Table`
950 SQLAlchemy representation of the table.
952 Notes
953 -----
954 This method does not handle ``spec.foreignKeys`` at all, in order to
955 avoid circular dependencies. These are added by higher-level logic in
956 `ensureTableExists`, `getExistingTable`, and `declareStaticTables`.
957 """
958 name = self._mangleTableName(name)
959 args = [self._convertFieldSpec(name, fieldSpec, metadata) for fieldSpec in spec.fields]
961 # Add any column constraints
962 for fieldSpec in spec.fields:
963 args.extend(self._makeColumnConstraints(name, fieldSpec))
965 # Track indexes added for primary key and unique constraints, to make
966 # sure we don't add duplicate explicit or foreign key indexes for
967 # those.
968 allIndexes = {tuple(fieldSpec.name for fieldSpec in spec.fields if fieldSpec.primaryKey)}
969 args.extend(
970 sqlalchemy.schema.UniqueConstraint(
971 *columns, name=self.shrinkDatabaseEntityName("_".join([name, "unq"] + list(columns)))
972 )
973 for columns in spec.unique
974 )
975 allIndexes.update(spec.unique)
976 args.extend(
977 sqlalchemy.schema.Index(
978 self.shrinkDatabaseEntityName("_".join([name, "idx"] + list(columns))),
979 *columns,
980 unique=(columns in spec.unique),
981 )
982 for columns in spec.indexes
983 if columns not in allIndexes
984 )
985 allIndexes.update(spec.indexes)
986 args.extend(
987 sqlalchemy.schema.Index(
988 self.shrinkDatabaseEntityName("_".join((name, "fkidx") + fk.source)),
989 *fk.source,
990 )
991 for fk in spec.foreignKeys
992 if fk.addIndex and fk.source not in allIndexes
993 )
995 args.extend(self._convertExclusionConstraintSpec(name, excl, metadata) for excl in spec.exclusion)
997 assert spec.doc is None or isinstance(spec.doc, str), f"Bad doc for {name}."
998 return sqlalchemy.schema.Table(name, metadata, *args, comment=spec.doc, info=spec, **kwargs)
1000 def ensureTableExists(self, name: str, spec: ddl.TableSpec) -> sqlalchemy.schema.Table:
1001 """Ensure that a table with the given name and specification exists,
1002 creating it if necessary.
1004 Parameters
1005 ----------
1006 name : `str`
1007 Name of the table (not including namespace qualifiers).
1008 spec : `TableSpec`
1009 Specification for the table. This will be used when creating the
1010 table, and *may* be used when obtaining an existing table to check
1011 for consistency, but no such check is guaranteed.
1013 Returns
1014 -------
1015 table : `sqlalchemy.schema.Table`
1016 SQLAlchemy representation of the table.
1018 Raises
1019 ------
1020 ReadOnlyDatabaseError
1021 Raised if `isWriteable` returns `False`, and the table does not
1022 already exist.
1023 DatabaseConflictError
1024 Raised if the table exists but ``spec`` is inconsistent with its
1025 definition.
1027 Notes
1028 -----
1029 This method may not be called within transactions. It may be called on
1030 read-only databases if and only if the table does in fact already
1031 exist.
1033 Subclasses may override this method, but usually should not need to.
1034 """
1035 # TODO: if _engine is used to make a table then it uses separate
1036 # connection and should not interfere with current transaction
1037 assert (
1038 self._session_connection is None or not self._session_connection.in_transaction()
1039 ), "Table creation interrupts transactions."
1040 assert self._metadata is not None, "Static tables must be declared before dynamic tables."
1041 table = self.getExistingTable(name, spec)
1042 if table is not None:
1043 return table
1044 if not self.isWriteable():
1045 raise ReadOnlyDatabaseError(
1046 f"Table {name} does not exist, and cannot be created "
1047 f"because database {self} is read-only."
1048 )
1049 table = self._convertTableSpec(name, spec, self._metadata)
1050 for foreignKeySpec in spec.foreignKeys:
1051 table.append_constraint(self._convertForeignKeySpec(name, foreignKeySpec, self._metadata))
1052 try:
1053 with self._connection() as connection:
1054 table.create(connection)
1055 except sqlalchemy.exc.DatabaseError:
1056 # Some other process could have created the table meanwhile, which
1057 # usually causes OperationalError or ProgrammingError. We cannot
1058 # use IF NOT EXISTS clause in this case due to PostgreSQL race
1059 # condition on server side which causes IntegrityError. Instead we
1060 # catch these exceptions (they all inherit DatabaseError) and
1061 # re-check whether table is now there.
1062 table = self.getExistingTable(name, spec)
1063 if table is None:
1064 raise
1065 return table
1067 def getExistingTable(self, name: str, spec: ddl.TableSpec) -> Optional[sqlalchemy.schema.Table]:
1068 """Obtain an existing table with the given name and specification.
1070 Parameters
1071 ----------
1072 name : `str`
1073 Name of the table (not including namespace qualifiers).
1074 spec : `TableSpec`
1075 Specification for the table. This will be used when creating the
1076 SQLAlchemy representation of the table, and it is used to
1077 check that the actual table in the database is consistent.
1079 Returns
1080 -------
1081 table : `sqlalchemy.schema.Table` or `None`
1082 SQLAlchemy representation of the table, or `None` if it does not
1083 exist.
1085 Raises
1086 ------
1087 DatabaseConflictError
1088 Raised if the table exists but ``spec`` is inconsistent with its
1089 definition.
1091 Notes
1092 -----
1093 This method can be called within transactions and never modifies the
1094 database.
1096 Subclasses may override this method, but usually should not need to.
1097 """
1098 assert self._metadata is not None, "Static tables must be declared before dynamic tables."
1099 name = self._mangleTableName(name)
1100 table = self._metadata.tables.get(name if self.namespace is None else f"{self.namespace}.{name}")
1101 if table is not None:
1102 if spec.fields.names != set(table.columns.keys()):
1103 raise DatabaseConflictError(
1104 f"Table '{name}' has already been defined differently; the new "
1105 f"specification has columns {list(spec.fields.names)}, while "
1106 f"the previous definition has {list(table.columns.keys())}."
1107 )
1108 else:
1109 inspector = sqlalchemy.inspect(self._engine)
1110 if name in inspector.get_table_names(schema=self.namespace):
1111 _checkExistingTableDefinition(name, spec, inspector.get_columns(name, schema=self.namespace))
1112 table = self._convertTableSpec(name, spec, self._metadata)
1113 for foreignKeySpec in spec.foreignKeys:
1114 table.append_constraint(self._convertForeignKeySpec(name, foreignKeySpec, self._metadata))
1115 return table
1116 return table
1118 @classmethod
1119 def getTimespanRepresentation(cls) -> Type[TimespanDatabaseRepresentation]:
1120 """Return a `type` that encapsulates the way `Timespan` objects are
1121 stored in this database.
1123 `Database` does not automatically use the return type of this method
1124 anywhere else; calling code is responsible for making sure that DDL
1125 and queries are consistent with it.
1127 Returns
1128 -------
1129 TimespanReprClass : `type` (`TimespanDatabaseRepresention` subclass)
1130 A type that encapsulates the way `Timespan` objects should be
1131 stored in this database.
1133 Notes
1134 -----
1135 There are two big reasons we've decided to keep timespan-mangling logic
1136 outside the `Database` implementations, even though the choice of
1137 representation is ultimately up to a `Database` implementation:
1139 - Timespans appear in relatively few tables and queries in our
1140 typical usage, and the code that operates on them is already aware
1141 that it is working with timespans. In contrast, a
1142 timespan-representation-aware implementation of, say, `insert`,
1143 would need to have extra logic to identify when timespan-mangling
1144 needed to occur, which would usually be useless overhead.
1146 - SQLAlchemy's rich SELECT query expression system has no way to wrap
1147 multiple columns in a single expression object (the ORM does, but
1148 we are not using the ORM). So we would have to wrap _much_ more of
1149 that code in our own interfaces to encapsulate timespan
1150 representations there.
1151 """
1152 return TimespanDatabaseRepresentation.Compound
1154 def sync(
1155 self,
1156 table: sqlalchemy.schema.Table,
1157 *,
1158 keys: Dict[str, Any],
1159 compared: Optional[Dict[str, Any]] = None,
1160 extra: Optional[Dict[str, Any]] = None,
1161 returning: Optional[Sequence[str]] = None,
1162 update: bool = False,
1163 ) -> Tuple[Optional[Dict[str, Any]], Union[bool, Dict[str, Any]]]:
1164 """Insert into a table as necessary to ensure database contains
1165 values equivalent to the given ones.
1167 Parameters
1168 ----------
1169 table : `sqlalchemy.schema.Table`
1170 Table to be queried and possibly inserted into.
1171 keys : `dict`
1172 Column name-value pairs used to search for an existing row; must
1173 be a combination that can be used to select a single row if one
1174 exists. If such a row does not exist, these values are used in
1175 the insert.
1176 compared : `dict`, optional
1177 Column name-value pairs that are compared to those in any existing
1178 row. If such a row does not exist, these rows are used in the
1179 insert.
1180 extra : `dict`, optional
1181 Column name-value pairs that are ignored if a matching row exists,
1182 but used in an insert if one is necessary.
1183 returning : `~collections.abc.Sequence` of `str`, optional
1184 The names of columns whose values should be returned.
1185 update : `bool`, optional
1186 If `True` (`False` is default), update the existing row with the
1187 values in ``compared`` instead of raising `DatabaseConflictError`.
1189 Returns
1190 -------
1191 row : `dict`, optional
1192 The value of the fields indicated by ``returning``, or `None` if
1193 ``returning`` is `None`.
1194 inserted_or_updated : `bool` or `dict`
1195 If `True`, a new row was inserted; if `False`, a matching row
1196 already existed. If a `dict` (only possible if ``update=True``),
1197 then an existing row was updated, and the dict maps the names of
1198 the updated columns to their *old* values (new values can be
1199 obtained from ``compared``).
1201 Raises
1202 ------
1203 DatabaseConflictError
1204 Raised if the values in ``compared`` do not match the values in the
1205 database.
1206 ReadOnlyDatabaseError
1207 Raised if `isWriteable` returns `False`, and no matching record
1208 already exists.
1210 Notes
1211 -----
1212 May be used inside transaction contexts, so implementations may not
1213 perform operations that interrupt transactions.
1215 It may be called on read-only databases if and only if the matching row
1216 does in fact already exist.
1217 """
1219 def check() -> Tuple[int, Optional[Dict[str, Any]], Optional[List]]:
1220 """Query for a row that matches the ``key`` argument, and compare
1221 to what was given by the caller.
1223 Returns
1224 -------
1225 n : `int`
1226 Number of matching rows. ``n != 1`` is always an error, but
1227 it's a different kind of error depending on where `check` is
1228 being called.
1229 bad : `dict` or `None`
1230 The subset of the keys of ``compared`` for which the existing
1231 values did not match the given one, mapped to the existing
1232 values in the database. Once again, ``not bad`` is always an
1233 error, but a different kind on context. `None` if ``n != 1``
1234 result : `list` or `None`
1235 Results in the database that correspond to the columns given
1236 in ``returning``, or `None` if ``returning is None``.
1237 """
1238 toSelect: Set[str] = set()
1239 if compared is not None:
1240 toSelect.update(compared.keys())
1241 if returning is not None:
1242 toSelect.update(returning)
1243 if not toSelect:
1244 # Need to select some column, even if we just want to see
1245 # how many rows we get back.
1246 toSelect.add(next(iter(keys.keys())))
1247 selectSql = (
1248 sqlalchemy.sql.select(*[table.columns[k].label(k) for k in toSelect])
1249 .select_from(table)
1250 .where(sqlalchemy.sql.and_(*[table.columns[k] == v for k, v in keys.items()]))
1251 )
1252 with self._connection() as connection:
1253 fetched = list(connection.execute(selectSql).mappings())
1254 if len(fetched) != 1:
1255 return len(fetched), None, None
1256 existing = fetched[0]
1257 if compared is not None:
1259 def safeNotEqual(a: Any, b: Any) -> bool:
1260 if isinstance(a, astropy.time.Time):
1261 return not time_utils.TimeConverter().times_equal(a, b)
1262 return a != b
1264 inconsistencies = {
1265 k: existing[k] for k, v in compared.items() if safeNotEqual(existing[k], v)
1266 }
1267 else:
1268 inconsistencies = {}
1269 if returning is not None:
1270 toReturn: Optional[list] = [existing[k] for k in returning]
1271 else:
1272 toReturn = None
1273 return 1, inconsistencies, toReturn
1275 def format_bad(inconsistencies: Dict[str, Any]) -> str:
1276 """Format the 'bad' dictionary of existing values returned by
1277 ``check`` into a string suitable for an error message.
1278 """
1279 assert compared is not None, "Should not be able to get inconsistencies without comparing."
1280 return ", ".join(f"{k}: {v!r} != {compared[k]!r}" for k, v in inconsistencies.items())
1282 if self.isTableWriteable(table):
1283 # Try an insert first, but allow it to fail (in only specific
1284 # ways).
1285 row = keys.copy()
1286 if compared is not None:
1287 row.update(compared)
1288 if extra is not None:
1289 row.update(extra)
1290 with self.transaction():
1291 inserted = bool(self.ensure(table, row))
1292 inserted_or_updated: Union[bool, Dict[str, Any]]
1293 # Need to perform check() for this branch inside the
1294 # transaction, so we roll back an insert that didn't do
1295 # what we expected. That limits the extent to which we
1296 # can reduce duplication between this block and the other
1297 # ones that perform similar logic.
1298 n, bad, result = check()
1299 if n < 1:
1300 raise ConflictingDefinitionError(
1301 f"Attempted to ensure {row} exists by inserting it with ON CONFLICT IGNORE, "
1302 f"but a post-insert query on {keys} returned no results. "
1303 f"Insert was {'' if inserted else 'not '}reported as successful. "
1304 "This can occur if the insert violated a database constraint other than the "
1305 "unique constraint or primary key used to identify the row in this call."
1306 )
1307 elif n > 1:
1308 raise RuntimeError(
1309 f"Keys passed to sync {keys.keys()} do not comprise a "
1310 f"unique constraint for table {table.name}."
1311 )
1312 elif bad:
1313 assert (
1314 compared is not None
1315 ), "Should not be able to get inconsistencies without comparing."
1316 if inserted:
1317 raise RuntimeError(
1318 f"Conflict ({bad}) in sync after successful insert; this is "
1319 "possible if the same table is being updated by a concurrent "
1320 "process that isn't using sync, but it may also be a bug in "
1321 "daf_butler."
1322 )
1323 elif update:
1324 with self._connection() as connection:
1325 connection.execute(
1326 table.update()
1327 .where(sqlalchemy.sql.and_(*[table.columns[k] == v for k, v in keys.items()]))
1328 .values(**{k: compared[k] for k in bad.keys()})
1329 )
1330 inserted_or_updated = bad
1331 else:
1332 raise DatabaseConflictError(
1333 f"Conflict in sync for table {table.name} on column(s) {format_bad(bad)}."
1334 )
1335 else:
1336 inserted_or_updated = inserted
1337 else:
1338 # Database is not writeable; just see if the row exists.
1339 n, bad, result = check()
1340 if n < 1:
1341 raise ReadOnlyDatabaseError("sync needs to insert, but database is read-only.")
1342 elif n > 1:
1343 raise RuntimeError("Keys passed to sync do not comprise a unique constraint.")
1344 elif bad:
1345 if update:
1346 raise ReadOnlyDatabaseError("sync needs to update, but database is read-only.")
1347 else:
1348 raise DatabaseConflictError(
1349 f"Conflict in sync for table {table.name} on column(s) {format_bad(bad)}."
1350 )
1351 inserted_or_updated = False
1352 if returning is None:
1353 return None, inserted_or_updated
1354 else:
1355 assert result is not None
1356 return {k: v for k, v in zip(returning, result)}, inserted_or_updated
1358 def insert(
1359 self,
1360 table: sqlalchemy.schema.Table,
1361 *rows: dict,
1362 returnIds: bool = False,
1363 select: Optional[sqlalchemy.sql.expression.SelectBase] = None,
1364 names: Optional[Iterable[str]] = None,
1365 ) -> Optional[List[int]]:
1366 """Insert one or more rows into a table, optionally returning
1367 autoincrement primary key values.
1369 Parameters
1370 ----------
1371 table : `sqlalchemy.schema.Table`
1372 Table rows should be inserted into.
1373 returnIds: `bool`
1374 If `True` (`False` is default), return the values of the table's
1375 autoincrement primary key field (which much exist).
1376 select : `sqlalchemy.sql.SelectBase`, optional
1377 A SELECT query expression to insert rows from. Cannot be provided
1378 with either ``rows`` or ``returnIds=True``.
1379 names : `Iterable` [ `str` ], optional
1380 Names of columns in ``table`` to be populated, ordered to match the
1381 columns returned by ``select``. Ignored if ``select`` is `None`.
1382 If not provided, the columns returned by ``select`` must be named
1383 to match the desired columns of ``table``.
1384 *rows
1385 Positional arguments are the rows to be inserted, as dictionaries
1386 mapping column name to value. The keys in all dictionaries must
1387 be the same.
1389 Returns
1390 -------
1391 ids : `None`, or `list` of `int`
1392 If ``returnIds`` is `True`, a `list` containing the inserted
1393 values for the table's autoincrement primary key.
1395 Raises
1396 ------
1397 ReadOnlyDatabaseError
1398 Raised if `isWriteable` returns `False` when this method is called.
1400 Notes
1401 -----
1402 The default implementation uses bulk insert syntax when ``returnIds``
1403 is `False`, and a loop over single-row insert operations when it is
1404 `True`.
1406 Derived classes should reimplement when they can provide a more
1407 efficient implementation (especially for the latter case).
1409 May be used inside transaction contexts, so implementations may not
1410 perform operations that interrupt transactions.
1411 """
1412 self.assertTableWriteable(table, f"Cannot insert into read-only table {table}.")
1413 if select is not None and (rows or returnIds):
1414 raise TypeError("'select' is incompatible with passing value rows or returnIds=True.")
1415 if not rows and select is None:
1416 if returnIds:
1417 return []
1418 else:
1419 return None
1420 with self._connection() as connection:
1421 if not returnIds:
1422 if select is not None:
1423 if names is None:
1424 # columns() is deprecated since 1.4, but
1425 # selected_columns() method did not exist in 1.3.
1426 if hasattr(select, "selected_columns"):
1427 names = select.selected_columns.keys()
1428 else:
1429 names = select.columns.keys()
1430 connection.execute(table.insert().from_select(names, select))
1431 else:
1432 connection.execute(table.insert(), rows)
1433 return None
1434 else:
1435 sql = table.insert()
1436 return [connection.execute(sql, row).inserted_primary_key[0] for row in rows]
1438 @abstractmethod
1439 def replace(self, table: sqlalchemy.schema.Table, *rows: dict) -> None:
1440 """Insert one or more rows into a table, replacing any existing rows
1441 for which insertion of a new row would violate the primary key
1442 constraint.
1444 Parameters
1445 ----------
1446 table : `sqlalchemy.schema.Table`
1447 Table rows should be inserted into.
1448 *rows
1449 Positional arguments are the rows to be inserted, as dictionaries
1450 mapping column name to value. The keys in all dictionaries must
1451 be the same.
1453 Raises
1454 ------
1455 ReadOnlyDatabaseError
1456 Raised if `isWriteable` returns `False` when this method is called.
1458 Notes
1459 -----
1460 May be used inside transaction contexts, so implementations may not
1461 perform operations that interrupt transactions.
1463 Implementations should raise a `sqlalchemy.exc.IntegrityError`
1464 exception when a constraint other than the primary key would be
1465 violated.
1467 Implementations are not required to support `replace` on tables
1468 with autoincrement keys.
1469 """
1470 raise NotImplementedError()
1472 @abstractmethod
1473 def ensure(self, table: sqlalchemy.schema.Table, *rows: dict, primary_key_only: bool = False) -> int:
1474 """Insert one or more rows into a table, skipping any rows for which
1475 insertion would violate a unique constraint.
1477 Parameters
1478 ----------
1479 table : `sqlalchemy.schema.Table`
1480 Table rows should be inserted into.
1481 *rows
1482 Positional arguments are the rows to be inserted, as dictionaries
1483 mapping column name to value. The keys in all dictionaries must
1484 be the same.
1485 primary_key_only : `bool`, optional
1486 If `True` (`False` is default), only skip rows that violate the
1487 primary key constraint, and raise an exception (and rollback
1488 transactions) for other constraint violations.
1490 Returns
1491 -------
1492 count : `int`
1493 The number of rows actually inserted.
1495 Raises
1496 ------
1497 ReadOnlyDatabaseError
1498 Raised if `isWriteable` returns `False` when this method is called.
1499 This is raised even if the operation would do nothing even on a
1500 writeable database.
1502 Notes
1503 -----
1504 May be used inside transaction contexts, so implementations may not
1505 perform operations that interrupt transactions.
1507 Implementations are not required to support `ensure` on tables
1508 with autoincrement keys.
1509 """
1510 raise NotImplementedError()
1512 def delete(self, table: sqlalchemy.schema.Table, columns: Iterable[str], *rows: dict) -> int:
1513 """Delete one or more rows from a table.
1515 Parameters
1516 ----------
1517 table : `sqlalchemy.schema.Table`
1518 Table that rows should be deleted from.
1519 columns: `~collections.abc.Iterable` of `str`
1520 The names of columns that will be used to constrain the rows to
1521 be deleted; these will be combined via ``AND`` to form the
1522 ``WHERE`` clause of the delete query.
1523 *rows
1524 Positional arguments are the keys of rows to be deleted, as
1525 dictionaries mapping column name to value. The keys in all
1526 dictionaries must be exactly the names in ``columns``.
1528 Returns
1529 -------
1530 count : `int`
1531 Number of rows deleted.
1533 Raises
1534 ------
1535 ReadOnlyDatabaseError
1536 Raised if `isWriteable` returns `False` when this method is called.
1538 Notes
1539 -----
1540 May be used inside transaction contexts, so implementations may not
1541 perform operations that interrupt transactions.
1543 The default implementation should be sufficient for most derived
1544 classes.
1545 """
1546 self.assertTableWriteable(table, f"Cannot delete from read-only table {table}.")
1547 if columns and not rows:
1548 # If there are no columns, this operation is supposed to delete
1549 # everything (so we proceed as usual). But if there are columns,
1550 # but no rows, it was a constrained bulk operation where the
1551 # constraint is that no rows match, and we should short-circuit
1552 # while reporting that no rows were affected.
1553 return 0
1554 sql = table.delete()
1555 columns = list(columns) # Force iterators to list
1557 # More efficient to use IN operator if there is only one
1558 # variable changing across all rows.
1559 content: Dict[str, Set] = defaultdict(set)
1560 if len(columns) == 1:
1561 # Nothing to calculate since we can always use IN
1562 column = columns[0]
1563 changing_columns = [column]
1564 content[column] = set(row[column] for row in rows)
1565 else:
1566 for row in rows:
1567 for k, v in row.items():
1568 content[k].add(v)
1569 changing_columns = [col for col, values in content.items() if len(values) > 1]
1571 if len(changing_columns) != 1:
1572 # More than one column changes each time so do explicit bind
1573 # parameters and have each row processed separately.
1574 whereTerms = [table.columns[name] == sqlalchemy.sql.bindparam(name) for name in columns]
1575 if whereTerms:
1576 sql = sql.where(sqlalchemy.sql.and_(*whereTerms))
1577 with self._connection() as connection:
1578 return connection.execute(sql, rows).rowcount
1579 else:
1580 # One of the columns has changing values but any others are
1581 # fixed. In this case we can use an IN operator and be more
1582 # efficient.
1583 name = changing_columns.pop()
1585 # Simple where clause for the unchanging columns
1586 clauses = []
1587 for k, v in content.items():
1588 if k == name:
1589 continue
1590 column = table.columns[k]
1591 # The set only has one element
1592 clauses.append(column == v.pop())
1594 # The IN operator will not work for "infinite" numbers of
1595 # rows so must batch it up into distinct calls.
1596 in_content = list(content[name])
1597 n_elements = len(in_content)
1599 rowcount = 0
1600 iposn = 0
1601 n_per_loop = 1_000 # Controls how many items to put in IN clause
1602 with self._connection() as connection:
1603 for iposn in range(0, n_elements, n_per_loop):
1604 endpos = iposn + n_per_loop
1605 in_clause = table.columns[name].in_(in_content[iposn:endpos])
1607 newsql = sql.where(sqlalchemy.sql.and_(*clauses, in_clause))
1608 rowcount += connection.execute(newsql).rowcount
1609 return rowcount
1611 def deleteWhere(self, table: sqlalchemy.schema.Table, where: sqlalchemy.sql.ClauseElement) -> int:
1612 """Delete rows from a table with pre-constructed WHERE clause.
1614 Parameters
1615 ----------
1616 table : `sqlalchemy.schema.Table`
1617 Table that rows should be deleted from.
1618 where: `sqlalchemy.sql.ClauseElement`
1619 The names of columns that will be used to constrain the rows to
1620 be deleted; these will be combined via ``AND`` to form the
1621 ``WHERE`` clause of the delete query.
1623 Returns
1624 -------
1625 count : `int`
1626 Number of rows deleted.
1628 Raises
1629 ------
1630 ReadOnlyDatabaseError
1631 Raised if `isWriteable` returns `False` when this method is called.
1633 Notes
1634 -----
1635 May be used inside transaction contexts, so implementations may not
1636 perform operations that interrupt transactions.
1638 The default implementation should be sufficient for most derived
1639 classes.
1640 """
1641 self.assertTableWriteable(table, f"Cannot delete from read-only table {table}.")
1643 sql = table.delete().where(where)
1644 with self._connection() as connection:
1645 return connection.execute(sql).rowcount
1647 def update(self, table: sqlalchemy.schema.Table, where: Dict[str, str], *rows: dict) -> int:
1648 """Update one or more rows in a table.
1650 Parameters
1651 ----------
1652 table : `sqlalchemy.schema.Table`
1653 Table containing the rows to be updated.
1654 where : `dict` [`str`, `str`]
1655 A mapping from the names of columns that will be used to search for
1656 existing rows to the keys that will hold these values in the
1657 ``rows`` dictionaries. Note that these may not be the same due to
1658 SQLAlchemy limitations.
1659 *rows
1660 Positional arguments are the rows to be updated. The keys in all
1661 dictionaries must be the same, and may correspond to either a
1662 value in the ``where`` dictionary or the name of a column to be
1663 updated.
1665 Returns
1666 -------
1667 count : `int`
1668 Number of rows matched (regardless of whether the update actually
1669 modified them).
1671 Raises
1672 ------
1673 ReadOnlyDatabaseError
1674 Raised if `isWriteable` returns `False` when this method is called.
1676 Notes
1677 -----
1678 May be used inside transaction contexts, so implementations may not
1679 perform operations that interrupt transactions.
1681 The default implementation should be sufficient for most derived
1682 classes.
1683 """
1684 self.assertTableWriteable(table, f"Cannot update read-only table {table}.")
1685 if not rows:
1686 return 0
1687 sql = table.update().where(
1688 sqlalchemy.sql.and_(*[table.columns[k] == sqlalchemy.sql.bindparam(v) for k, v in where.items()])
1689 )
1690 with self._connection() as connection:
1691 return connection.execute(sql, rows).rowcount
1693 def query(
1694 self, sql: sqlalchemy.sql.Selectable, *args: Any, **kwargs: Any
1695 ) -> sqlalchemy.engine.ResultProxy:
1696 """Run a SELECT query against the database.
1698 Parameters
1699 ----------
1700 sql : `sqlalchemy.sql.Selectable`
1701 A SQLAlchemy representation of a ``SELECT`` query.
1702 *args
1703 Additional positional arguments are forwarded to
1704 `sqlalchemy.engine.Connection.execute`.
1705 **kwargs
1706 Additional keyword arguments are forwarded to
1707 `sqlalchemy.engine.Connection.execute`.
1709 Returns
1710 -------
1711 result : `sqlalchemy.engine.ResultProxy`
1712 Query results.
1714 Notes
1715 -----
1716 The default implementation should be sufficient for most derived
1717 classes.
1718 """
1719 # We are returning a Result object so we need to take care of
1720 # connection lifetime. If this is happening in transaction context
1721 # then just use existing connection, otherwise make a special
1722 # connection which will be closed when result is closed.
1723 #
1724 # TODO: May be better approach would be to make this method return a
1725 # context manager, but this means big changes for callers of this
1726 # method.
1727 if self._session_connection is not None:
1728 connection = self._session_connection
1729 else:
1730 connection = self._engine.connect(close_with_result=True)
1731 # TODO: should we guard against non-SELECT queries here?
1732 return connection.execute(sql, *args, **kwargs)
1734 @abstractmethod
1735 def constant_rows(
1736 self,
1737 fields: NamedValueAbstractSet[ddl.FieldSpec],
1738 *rows: dict,
1739 name: Optional[str] = None,
1740 ) -> sqlalchemy.sql.FromClause:
1741 """Return a SQLAlchemy object that represents a small number of
1742 constant-valued rows.
1744 Parameters
1745 ----------
1746 fields : `NamedValueAbstractSet` [ `ddl.FieldSpec` ]
1747 The columns of the rows. Unique and foreign key constraints are
1748 ignored.
1749 *rows : `dict`
1750 Values for the rows.
1751 name : `str`, optional
1752 If provided, the name of the SQL construct. If not provided, an
1753 opaque but unique identifier is generated.
1755 Returns
1756 -------
1757 from_clause : `sqlalchemy.sql.FromClause`
1758 SQLAlchemy object representing the given rows. This is guaranteed
1759 to be something that can be directly joined into a ``SELECT``
1760 query's ``FROM`` clause, and will not involve a temporary table
1761 that needs to be cleaned up later.
1763 Notes
1764 -----
1765 The default implementation uses the SQL-standard ``VALUES`` construct,
1766 but support for that construct is varied enough across popular RDBMSs
1767 that the method is still marked abstract to force explicit opt-in via
1768 delegation to `super`.
1769 """
1770 if name is None:
1771 name = f"tmp_{uuid.uuid4().hex}"
1772 return sqlalchemy.sql.values(
1773 *[sqlalchemy.Column(field.name, field.getSizedColumnType()) for field in fields],
1774 name=name,
1775 ).data([tuple(row[name] for name in fields.names) for row in rows])
1777 def get_constant_rows_max(self) -> int:
1778 """Return the maximum number of rows that should be passed to
1779 `constant_rows` for this backend.
1781 Returns
1782 -------
1783 max : `int`
1784 Maximum number of rows.
1786 Notes
1787 -----
1788 This should reflect typical performance profiles (or a guess at these),
1789 not just hard database engine limits.
1790 """
1791 return 100
1793 origin: int
1794 """An integer ID that should be used as the default for any datasets,
1795 quanta, or other entities that use a (autoincrement, origin) compound
1796 primary key (`int`).
1797 """
1799 namespace: Optional[str]
1800 """The schema or namespace this database instance is associated with
1801 (`str` or `None`).
1802 """