Coverage for python/lsst/daf/butler/registry/interfaces/_database.py: 14%
418 statements
« prev ^ index » next coverage.py v6.5.0, created at 2022-11-09 02:51 -0800
« prev ^ index » next coverage.py v6.5.0, created at 2022-11-09 02:51 -0800
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = [
24 "Database",
25 "ReadOnlyDatabaseError",
26 "DatabaseConflictError",
27 "SchemaAlreadyDefinedError",
28 "Session",
29 "StaticTablesContext",
30]
32import uuid
33import warnings
34from abc import ABC, abstractmethod
35from collections import defaultdict
36from contextlib import contextmanager
37from typing import Any, Callable, Dict, Iterable, Iterator, List, Optional, Sequence, Set, Tuple, Type, Union
39import astropy.time
40import sqlalchemy
42from ...core import TimespanDatabaseRepresentation, ddl, time_utils
43from ...core.named import NamedValueAbstractSet
44from .._exceptions import ConflictingDefinitionError
46_IN_SAVEPOINT_TRANSACTION = "IN_SAVEPOINT_TRANSACTION"
49def _checkExistingTableDefinition(name: str, spec: ddl.TableSpec, inspection: List[Dict[str, Any]]) -> None:
50 """Test that the definition of a table in a `ddl.TableSpec` and from
51 database introspection are consistent.
53 Parameters
54 ----------
55 name : `str`
56 Name of the table (only used in error messages).
57 spec : `ddl.TableSpec`
58 Specification of the table.
59 inspection : `dict`
60 Dictionary returned by
61 `sqlalchemy.engine.reflection.Inspector.get_columns`.
63 Raises
64 ------
65 DatabaseConflictError
66 Raised if the definitions are inconsistent.
67 """
68 columnNames = [c["name"] for c in inspection]
69 if spec.fields.names != set(columnNames):
70 raise DatabaseConflictError(
71 f"Table '{name}' exists but is defined differently in the database; "
72 f"specification has columns {list(spec.fields.names)}, while the "
73 f"table in the database has {columnNames}."
74 )
77class ReadOnlyDatabaseError(RuntimeError):
78 """Exception raised when a write operation is called on a read-only
79 `Database`.
80 """
83class DatabaseConflictError(ConflictingDefinitionError):
84 """Exception raised when database content (row values or schema entities)
85 are inconsistent with what this client expects.
86 """
89class SchemaAlreadyDefinedError(RuntimeError):
90 """Exception raised when trying to initialize database schema when some
91 tables already exist.
92 """
95class StaticTablesContext:
96 """Helper class used to declare the static schema for a registry layer
97 in a database.
99 An instance of this class is returned by `Database.declareStaticTables`,
100 which should be the only way it should be constructed.
101 """
103 def __init__(self, db: Database):
104 self._db = db
105 self._foreignKeys: List[Tuple[sqlalchemy.schema.Table, sqlalchemy.schema.ForeignKeyConstraint]] = []
106 self._inspector = sqlalchemy.inspect(self._db._engine)
107 self._tableNames = frozenset(self._inspector.get_table_names(schema=self._db.namespace))
108 self._initializers: List[Callable[[Database], None]] = []
110 def addTable(self, name: str, spec: ddl.TableSpec) -> sqlalchemy.schema.Table:
111 """Add a new table to the schema, returning its sqlalchemy
112 representation.
114 The new table may not actually be created until the end of the
115 context created by `Database.declareStaticTables`, allowing tables
116 to be declared in any order even in the presence of foreign key
117 relationships.
118 """
119 name = self._db._mangleTableName(name)
120 if name in self._tableNames:
121 _checkExistingTableDefinition(
122 name, spec, self._inspector.get_columns(name, schema=self._db.namespace)
123 )
124 metadata = self._db._metadata
125 assert metadata is not None, "Guaranteed by context manager that returns this object."
126 table = self._db._convertTableSpec(name, spec, metadata)
127 for foreignKeySpec in spec.foreignKeys:
128 self._foreignKeys.append((table, self._db._convertForeignKeySpec(name, foreignKeySpec, metadata)))
129 return table
131 def addTableTuple(self, specs: Tuple[ddl.TableSpec, ...]) -> Tuple[sqlalchemy.schema.Table, ...]:
132 """Add a named tuple of tables to the schema, returning their
133 SQLAlchemy representations in a named tuple of the same type.
135 The new tables may not actually be created until the end of the
136 context created by `Database.declareStaticTables`, allowing tables
137 to be declared in any order even in the presence of foreign key
138 relationships.
140 Notes
141 -----
142 ``specs`` *must* be an instance of a type created by
143 `collections.namedtuple`, not just regular tuple, and the returned
144 object is guaranteed to be the same. Because `~collections.namedtuple`
145 is just a factory for `type` objects, not an actual type itself,
146 we cannot represent this with type annotations.
147 """
148 return specs._make( # type: ignore
149 self.addTable(name, spec) for name, spec in zip(specs._fields, specs) # type: ignore
150 )
152 def addInitializer(self, initializer: Callable[[Database], None]) -> None:
153 """Add a method that does one-time initialization of a database.
155 Initialization can mean anything that changes state of a database
156 and needs to be done exactly once after database schema was created.
157 An example for that could be population of schema attributes.
159 Parameters
160 ----------
161 initializer : callable
162 Method of a single argument which is a `Database` instance.
163 """
164 self._initializers.append(initializer)
167class Session:
168 """Class representing a persistent connection to a database.
170 Parameters
171 ----------
172 db : `Database`
173 Database instance.
175 Notes
176 -----
177 Instances of Session class should not be created by client code;
178 `Database.session` should be used to create context for a session::
180 with db.session() as session:
181 session.method()
182 db.method()
184 In the current implementation sessions can be nested and transactions can
185 be nested within a session. All nested sessions and transaction share the
186 same database connection.
188 Session class represents a limited subset of database API that requires
189 persistent connection to a database (e.g. temporary tables which have
190 lifetime of a session). Potentially most of the database API could be
191 associated with a Session class.
192 """
194 def __init__(self, db: Database):
195 self._db = db
197 def makeTemporaryTable(self, spec: ddl.TableSpec, name: Optional[str] = None) -> sqlalchemy.schema.Table:
198 """Create a temporary table.
200 Parameters
201 ----------
202 spec : `TableSpec`
203 Specification for the table.
204 name : `str`, optional
205 A unique (within this session/connetion) name for the table.
206 Subclasses may override to modify the actual name used. If not
207 provided, a unique name will be generated.
209 Returns
210 -------
211 table : `sqlalchemy.schema.Table`
212 SQLAlchemy representation of the table.
214 Notes
215 -----
216 Temporary tables may be created, dropped, and written to even in
217 read-only databases - at least according to the Python-level
218 protections in the `Database` classes. Server permissions may say
219 otherwise, but in that case they probably need to be modified to
220 support the full range of expected read-only butler behavior.
222 Temporary table rows are guaranteed to be dropped when a connection is
223 closed. `Database` implementations are permitted to allow the table to
224 remain as long as this is transparent to the user (i.e. "creating" the
225 temporary table in a new session should not be an error, even if it
226 does nothing).
228 It may not be possible to use temporary tables within transactions with
229 some database engines (or configurations thereof).
230 """
231 if name is None:
232 name = f"tmp_{uuid.uuid4().hex}"
233 metadata = self._db._metadata
234 if metadata is None:
235 raise RuntimeError("Cannot create temporary table before static schema is defined.")
236 table = self._db._convertTableSpec(
237 name, spec, metadata, prefixes=["TEMPORARY"], schema=sqlalchemy.schema.BLANK_SCHEMA
238 )
239 if table.key in self._db._tempTables:
240 if table.key != name:
241 raise ValueError(
242 f"A temporary table with name {name} (transformed to {table.key} by "
243 f"Database) already exists."
244 )
245 for foreignKeySpec in spec.foreignKeys:
246 table.append_constraint(self._db._convertForeignKeySpec(name, foreignKeySpec, metadata))
247 with self._db._connection() as connection:
248 table.create(connection)
249 self._db._tempTables.add(table.key)
250 return table
252 def dropTemporaryTable(self, table: sqlalchemy.schema.Table) -> None:
253 """Drop a temporary table.
255 Parameters
256 ----------
257 table : `sqlalchemy.schema.Table`
258 A SQLAlchemy object returned by a previous call to
259 `makeTemporaryTable`.
260 """
261 if table.key in self._db._tempTables:
262 with self._db._connection() as connection:
263 table.drop(connection)
264 self._db._tempTables.remove(table.key)
265 else:
266 raise TypeError(f"Table {table.key} was not created by makeTemporaryTable.")
268 @contextmanager
269 def temporary_table(
270 self, spec: ddl.TableSpec, name: Optional[str] = None
271 ) -> Iterator[sqlalchemy.schema.Table]:
272 """Return a context manager that creates and then drops a context
273 manager.
275 Parameters
276 ----------
277 spec : `ddl.TableSpec`
278 Specification for the columns. Unique and foreign key constraints
279 may be ignored.
280 name : `str`, optional
281 If provided, the name of the SQL construct. If not provided, an
282 opaque but unique identifier is generated.
284 Returns
285 -------
286 table : `sqlalchemy.schema.Table`
287 SQLAlchemy representation of the table.
288 """
289 table = self.makeTemporaryTable(spec=spec, name=name)
290 try:
291 yield table
292 finally:
293 self.dropTemporaryTable(table)
296class Database(ABC):
297 """An abstract interface that represents a particular database engine's
298 representation of a single schema/namespace/database.
300 Parameters
301 ----------
302 origin : `int`
303 An integer ID that should be used as the default for any datasets,
304 quanta, or other entities that use a (autoincrement, origin) compound
305 primary key.
306 engine : `sqlalchemy.engine.Engine`
307 The SQLAlchemy engine for this `Database`.
308 namespace : `str`, optional
309 Name of the schema or namespace this instance is associated with.
310 This is passed as the ``schema`` argument when constructing a
311 `sqlalchemy.schema.MetaData` instance. We use ``namespace`` instead to
312 avoid confusion between "schema means namespace" and "schema means
313 table definitions".
315 Notes
316 -----
317 `Database` requires all write operations to go through its special named
318 methods. Our write patterns are sufficiently simple that we don't really
319 need the full flexibility of SQL insert/update/delete syntax, and we need
320 non-standard (but common) functionality in these operations sufficiently
321 often that it seems worthwhile to provide our own generic API.
323 In contrast, `Database.query` allows arbitrary ``SELECT`` queries (via
324 their SQLAlchemy representation) to be run, as we expect these to require
325 significantly more sophistication while still being limited to standard
326 SQL.
328 `Database` itself has several underscore-prefixed attributes:
330 - ``_engine``: SQLAlchemy object representing its engine.
331 - ``_connection``: method returning a context manager for
332 `sqlalchemy.engine.Connection` object.
333 - ``_metadata``: the `sqlalchemy.schema.MetaData` object representing
334 the tables and other schema entities.
336 These are considered protected (derived classes may access them, but other
337 code should not), and read-only, aside from executing SQL via
338 ``_connection``.
339 """
341 def __init__(self, *, origin: int, engine: sqlalchemy.engine.Engine, namespace: Optional[str] = None):
342 self.origin = origin
343 self.namespace = namespace
344 self._engine = engine
345 self._session_connection: Optional[sqlalchemy.engine.Connection] = None
346 self._metadata: Optional[sqlalchemy.schema.MetaData] = None
347 self._tempTables: Set[str] = set()
349 def __repr__(self) -> str:
350 # Rather than try to reproduce all the parameters used to create
351 # the object, instead report the more useful information of the
352 # connection URL.
353 if self._engine.url.password is not None:
354 uri = str(self._engine.url.set(password="***"))
355 else:
356 uri = str(self._engine.url)
357 if self.namespace:
358 uri += f"#{self.namespace}"
359 return f'{type(self).__name__}("{uri}")'
361 @classmethod
362 def makeDefaultUri(cls, root: str) -> Optional[str]:
363 """Create a default connection URI appropriate for the given root
364 directory, or `None` if there can be no such default.
365 """
366 return None
368 @classmethod
369 def fromUri(
370 cls, uri: str, *, origin: int, namespace: Optional[str] = None, writeable: bool = True
371 ) -> Database:
372 """Construct a database from a SQLAlchemy URI.
374 Parameters
375 ----------
376 uri : `str`
377 A SQLAlchemy URI connection string.
378 origin : `int`
379 An integer ID that should be used as the default for any datasets,
380 quanta, or other entities that use a (autoincrement, origin)
381 compound primary key.
382 namespace : `str`, optional
383 A database namespace (i.e. schema) the new instance should be
384 associated with. If `None` (default), the namespace (if any) is
385 inferred from the URI.
386 writeable : `bool`, optional
387 If `True`, allow write operations on the database, including
388 ``CREATE TABLE``.
390 Returns
391 -------
392 db : `Database`
393 A new `Database` instance.
394 """
395 return cls.fromEngine(
396 cls.makeEngine(uri, writeable=writeable), origin=origin, namespace=namespace, writeable=writeable
397 )
399 @classmethod
400 @abstractmethod
401 def makeEngine(cls, uri: str, *, writeable: bool = True) -> sqlalchemy.engine.Engine:
402 """Create a `sqlalchemy.engine.Engine` from a SQLAlchemy URI.
404 Parameters
405 ----------
406 uri : `str`
407 A SQLAlchemy URI connection string.
408 writeable : `bool`, optional
409 If `True`, allow write operations on the database, including
410 ``CREATE TABLE``.
412 Returns
413 -------
414 engine : `sqlalchemy.engine.Engine`
415 A database engine.
417 Notes
418 -----
419 Subclasses that support other ways to connect to a database are
420 encouraged to add optional arguments to their implementation of this
421 method, as long as they maintain compatibility with the base class
422 call signature.
423 """
424 raise NotImplementedError()
426 @classmethod
427 @abstractmethod
428 def fromEngine(
429 cls,
430 engine: sqlalchemy.engine.Engine,
431 *,
432 origin: int,
433 namespace: Optional[str] = None,
434 writeable: bool = True,
435 ) -> Database:
436 """Create a new `Database` from an existing `sqlalchemy.engine.Engine`.
438 Parameters
439 ----------
440 engine : `sqlalchemy.engine.Engine`
441 The engine for the database. May be shared between `Database`
442 instances.
443 origin : `int`
444 An integer ID that should be used as the default for any datasets,
445 quanta, or other entities that use a (autoincrement, origin)
446 compound primary key.
447 namespace : `str`, optional
448 A different database namespace (i.e. schema) the new instance
449 should be associated with. If `None` (default), the namespace
450 (if any) is inferred from the connection.
451 writeable : `bool`, optional
452 If `True`, allow write operations on the database, including
453 ``CREATE TABLE``.
455 Returns
456 -------
457 db : `Database`
458 A new `Database` instance.
460 Notes
461 -----
462 This method allows different `Database` instances to share the same
463 engine, which is desirable when they represent different namespaces
464 can be queried together.
465 """
466 raise NotImplementedError()
468 @contextmanager
469 def session(self) -> Iterator:
470 """Return a context manager that represents a session (persistent
471 connection to a database).
472 """
473 if self._session_connection is not None:
474 # session already started, just reuse that
475 yield Session(self)
476 else:
477 try:
478 # open new connection and close it when done
479 self._session_connection = self._engine.connect()
480 yield Session(self)
481 finally:
482 if self._session_connection is not None:
483 self._session_connection.close()
484 self._session_connection = None
485 # Temporary tables only live within session
486 self._tempTables = set()
488 @contextmanager
489 def transaction(
490 self,
491 *,
492 interrupting: bool = False,
493 savepoint: bool = False,
494 lock: Iterable[sqlalchemy.schema.Table] = (),
495 ) -> Iterator:
496 """Return a context manager that represents a transaction.
498 Parameters
499 ----------
500 interrupting : `bool`, optional
501 If `True` (`False` is default), this transaction block may not be
502 nested without an outer one, and attempting to do so is a logic
503 (i.e. assertion) error.
504 savepoint : `bool`, optional
505 If `True` (`False` is default), create a `SAVEPOINT`, allowing
506 exceptions raised by the database (e.g. due to constraint
507 violations) during this transaction's context to be caught outside
508 it without also rolling back all operations in an outer transaction
509 block. If `False`, transactions may still be nested, but a
510 rollback may be generated at any level and affects all levels, and
511 commits are deferred until the outermost block completes. If any
512 outer transaction block was created with ``savepoint=True``, all
513 inner blocks will be as well (regardless of the actual value
514 passed). This has no effect if this is the outermost transaction.
515 lock : `Iterable` [ `sqlalchemy.schema.Table` ], optional
516 A list of tables to lock for the duration of this transaction.
517 These locks are guaranteed to prevent concurrent writes and allow
518 this transaction (only) to acquire the same locks (others should
519 block), but only prevent concurrent reads if the database engine
520 requires that in order to block concurrent writes.
522 Notes
523 -----
524 All transactions on a connection managed by one or more `Database`
525 instances _must_ go through this method, or transaction state will not
526 be correctly managed.
527 """
528 # need a connection, use session to manage it
529 with self.session():
530 assert self._session_connection is not None
531 connection = self._session_connection
532 assert not (interrupting and connection.in_transaction()), (
533 "Logic error in transaction nesting: an operation that would "
534 "interrupt the active transaction context has been requested."
535 )
536 # We remember whether we are already in a SAVEPOINT transaction via
537 # the connection object's 'info' dict, which is explicitly for user
538 # information like this. This is safer than a regular `Database`
539 # instance attribute, because it guards against multiple `Database`
540 # instances sharing the same connection. The need to use our own
541 # flag here to track whether we're in a nested transaction should
542 # go away in SQLAlchemy 1.4, which seems to have a
543 # `Connection.in_nested_transaction()` method.
544 savepoint = savepoint or connection.info.get(_IN_SAVEPOINT_TRANSACTION, False)
545 connection.info[_IN_SAVEPOINT_TRANSACTION] = savepoint
546 trans: sqlalchemy.engine.Transaction
547 if connection.in_transaction() and savepoint:
548 trans = connection.begin_nested()
549 elif not connection.in_transaction():
550 # Use a regular (non-savepoint) transaction always for the
551 # outermost context.
552 trans = connection.begin()
553 else:
554 # Nested non-savepoint transactions, don't do anything.
555 trans = None
556 self._lockTables(connection, lock)
557 try:
558 yield
559 if trans is not None:
560 trans.commit()
561 except BaseException:
562 if trans is not None:
563 trans.rollback()
564 raise
565 finally:
566 if not connection.in_transaction():
567 connection.info.pop(_IN_SAVEPOINT_TRANSACTION, None)
569 @contextmanager
570 def _connection(self) -> Iterator[sqlalchemy.engine.Connection]:
571 """Return context manager for Connection."""
572 if self._session_connection is not None:
573 # It means that we are in Session context, but we may not be in
574 # transaction context. Start a short transaction in that case.
575 if self._session_connection.in_transaction():
576 yield self._session_connection
577 else:
578 with self._session_connection.begin():
579 yield self._session_connection
580 else:
581 # Make new connection and transaction, transaction will be
582 # committed on context exit.
583 with self._engine.begin() as connection:
584 yield connection
586 @abstractmethod
587 def _lockTables(
588 self, connection: sqlalchemy.engine.Connection, tables: Iterable[sqlalchemy.schema.Table] = ()
589 ) -> None:
590 """Acquire locks on the given tables.
592 This is an implementation hook for subclasses, called by `transaction`.
593 It should not be called directly by other code.
595 Parameters
596 ----------
597 connection : `sqlalchemy.engine.Connection`
598 Database connection object. It is guaranteed that transaction is
599 already in a progress for this connection.
600 tables : `Iterable` [ `sqlalchemy.schema.Table` ], optional
601 A list of tables to lock for the duration of this transaction.
602 These locks are guaranteed to prevent concurrent writes and allow
603 this transaction (only) to acquire the same locks (others should
604 block), but only prevent concurrent reads if the database engine
605 requires that in order to block concurrent writes.
606 """
607 raise NotImplementedError()
609 def isTableWriteable(self, table: sqlalchemy.schema.Table) -> bool:
610 """Check whether a table is writeable, either because the database
611 connection is read-write or the table is a temporary table.
613 Parameters
614 ----------
615 table : `sqlalchemy.schema.Table`
616 SQLAlchemy table object to check.
618 Returns
619 -------
620 writeable : `bool`
621 Whether this table is writeable.
622 """
623 return self.isWriteable() or table.key in self._tempTables
625 def assertTableWriteable(self, table: sqlalchemy.schema.Table, msg: str) -> None:
626 """Raise if the given table is not writeable, either because the
627 database connection is read-write or the table is a temporary table.
629 Parameters
630 ----------
631 table : `sqlalchemy.schema.Table`
632 SQLAlchemy table object to check.
633 msg : `str`, optional
634 If provided, raise `ReadOnlyDatabaseError` instead of returning
635 `False`, with this message.
636 """
637 if not self.isTableWriteable(table):
638 raise ReadOnlyDatabaseError(msg)
640 @contextmanager
641 def declareStaticTables(self, *, create: bool) -> Iterator[StaticTablesContext]:
642 """Return a context manager in which the database's static DDL schema
643 can be declared.
645 Parameters
646 ----------
647 create : `bool`
648 If `True`, attempt to create all tables at the end of the context.
649 If `False`, they will be assumed to already exist.
651 Returns
652 -------
653 schema : `StaticTablesContext`
654 A helper object that is used to add new tables.
656 Raises
657 ------
658 ReadOnlyDatabaseError
659 Raised if ``create`` is `True`, `Database.isWriteable` is `False`,
660 and one or more declared tables do not already exist.
662 Examples
663 --------
664 Given a `Database` instance ``db``::
666 with db.declareStaticTables(create=True) as schema:
667 schema.addTable("table1", TableSpec(...))
668 schema.addTable("table2", TableSpec(...))
670 Notes
671 -----
672 A database's static DDL schema must be declared before any dynamic
673 tables are managed via calls to `ensureTableExists` or
674 `getExistingTable`. The order in which static schema tables are added
675 inside the context block is unimportant; they will automatically be
676 sorted and added in an order consistent with their foreign key
677 relationships.
678 """
679 if create and not self.isWriteable():
680 raise ReadOnlyDatabaseError(f"Cannot create tables in read-only database {self}.")
681 self._metadata = sqlalchemy.MetaData(schema=self.namespace)
682 try:
683 context = StaticTablesContext(self)
684 if create and context._tableNames:
685 # Looks like database is already initalized, to avoid danger
686 # of modifying/destroying valid schema we refuse to do
687 # anything in this case
688 raise SchemaAlreadyDefinedError(f"Cannot create tables in non-empty database {self}.")
689 yield context
690 for table, foreignKey in context._foreignKeys:
691 table.append_constraint(foreignKey)
692 if create:
693 if self.namespace is not None:
694 if self.namespace not in context._inspector.get_schema_names():
695 with self._connection() as connection:
696 connection.execute(sqlalchemy.schema.CreateSchema(self.namespace))
697 # In our tables we have columns that make use of sqlalchemy
698 # Sequence objects. There is currently a bug in sqlalchemy that
699 # causes a deprecation warning to be thrown on a property of
700 # the Sequence object when the repr for the sequence is
701 # created. Here a filter is used to catch these deprecation
702 # warnings when tables are created.
703 with warnings.catch_warnings():
704 warnings.simplefilter("ignore", category=sqlalchemy.exc.SADeprecationWarning)
705 self._metadata.create_all(self._engine)
706 # call all initializer methods sequentially
707 for init in context._initializers:
708 init(self)
709 except BaseException:
710 self._metadata = None
711 raise
713 @abstractmethod
714 def isWriteable(self) -> bool:
715 """Return `True` if this database can be modified by this client."""
716 raise NotImplementedError()
718 @abstractmethod
719 def __str__(self) -> str:
720 """Return a human-readable identifier for this `Database`, including
721 any namespace or schema that identifies its names within a `Registry`.
722 """
723 raise NotImplementedError()
725 @property
726 def dialect(self) -> sqlalchemy.engine.Dialect:
727 """The SQLAlchemy dialect for this database engine
728 (`sqlalchemy.engine.Dialect`).
729 """
730 return self._engine.dialect
732 def shrinkDatabaseEntityName(self, original: str) -> str:
733 """Return a version of the given name that fits within this database
734 engine's length limits for table, constraint, indexes, and sequence
735 names.
737 Implementations should not assume that simple truncation is safe,
738 because multiple long names often begin with the same prefix.
740 The default implementation simply returns the given name.
742 Parameters
743 ----------
744 original : `str`
745 The original name.
747 Returns
748 -------
749 shrunk : `str`
750 The new, possibly shortened name.
751 """
752 return original
754 def expandDatabaseEntityName(self, shrunk: str) -> str:
755 """Retrieve the original name for a database entity that was too long
756 to fit within the database engine's limits.
758 Parameters
759 ----------
760 original : `str`
761 The original name.
763 Returns
764 -------
765 shrunk : `str`
766 The new, possibly shortened name.
767 """
768 return shrunk
770 def _mangleTableName(self, name: str) -> str:
771 """Map a logical, user-visible table name to the true table name used
772 in the database.
774 The default implementation returns the given name unchanged.
776 Parameters
777 ----------
778 name : `str`
779 Input table name. Should not include a namespace (i.e. schema)
780 prefix.
782 Returns
783 -------
784 mangled : `str`
785 Mangled version of the table name (still with no namespace prefix).
787 Notes
788 -----
789 Reimplementations of this method must be idempotent - mangling an
790 already-mangled name must have no effect.
791 """
792 return name
794 def _makeColumnConstraints(self, table: str, spec: ddl.FieldSpec) -> List[sqlalchemy.CheckConstraint]:
795 """Create constraints based on this spec.
797 Parameters
798 ----------
799 table : `str`
800 Name of the table this column is being added to.
801 spec : `FieldSpec`
802 Specification for the field to be added.
804 Returns
805 -------
806 constraint : `list` of `sqlalchemy.CheckConstraint`
807 Constraint added for this column.
808 """
809 # By default we return no additional constraints
810 return []
812 def _convertFieldSpec(
813 self, table: str, spec: ddl.FieldSpec, metadata: sqlalchemy.MetaData, **kwargs: Any
814 ) -> sqlalchemy.schema.Column:
815 """Convert a `FieldSpec` to a `sqlalchemy.schema.Column`.
817 Parameters
818 ----------
819 table : `str`
820 Name of the table this column is being added to.
821 spec : `FieldSpec`
822 Specification for the field to be added.
823 metadata : `sqlalchemy.MetaData`
824 SQLAlchemy representation of the DDL schema this field's table is
825 being added to.
826 **kwargs
827 Additional keyword arguments to forward to the
828 `sqlalchemy.schema.Column` constructor. This is provided to make
829 it easier for derived classes to delegate to ``super()`` while
830 making only minor changes.
832 Returns
833 -------
834 column : `sqlalchemy.schema.Column`
835 SQLAlchemy representation of the field.
836 """
837 args = [spec.name, spec.getSizedColumnType()]
838 if spec.autoincrement:
839 # Generate a sequence to use for auto incrementing for databases
840 # that do not support it natively. This will be ignored by
841 # sqlalchemy for databases that do support it.
842 args.append(
843 sqlalchemy.Sequence(
844 self.shrinkDatabaseEntityName(f"{table}_seq_{spec.name}"), metadata=metadata
845 )
846 )
847 assert spec.doc is None or isinstance(spec.doc, str), f"Bad doc for {table}.{spec.name}."
848 return sqlalchemy.schema.Column(
849 *args,
850 nullable=spec.nullable,
851 primary_key=spec.primaryKey,
852 comment=spec.doc,
853 server_default=spec.default,
854 **kwargs,
855 )
857 def _convertForeignKeySpec(
858 self, table: str, spec: ddl.ForeignKeySpec, metadata: sqlalchemy.MetaData, **kwargs: Any
859 ) -> sqlalchemy.schema.ForeignKeyConstraint:
860 """Convert a `ForeignKeySpec` to a
861 `sqlalchemy.schema.ForeignKeyConstraint`.
863 Parameters
864 ----------
865 table : `str`
866 Name of the table this foreign key is being added to.
867 spec : `ForeignKeySpec`
868 Specification for the foreign key to be added.
869 metadata : `sqlalchemy.MetaData`
870 SQLAlchemy representation of the DDL schema this constraint is
871 being added to.
872 **kwargs
873 Additional keyword arguments to forward to the
874 `sqlalchemy.schema.ForeignKeyConstraint` constructor. This is
875 provided to make it easier for derived classes to delegate to
876 ``super()`` while making only minor changes.
878 Returns
879 -------
880 constraint : `sqlalchemy.schema.ForeignKeyConstraint`
881 SQLAlchemy representation of the constraint.
882 """
883 name = self.shrinkDatabaseEntityName(
884 "_".join(
885 ["fkey", table, self._mangleTableName(spec.table)] + list(spec.target) + list(spec.source)
886 )
887 )
888 return sqlalchemy.schema.ForeignKeyConstraint(
889 spec.source,
890 [f"{self._mangleTableName(spec.table)}.{col}" for col in spec.target],
891 name=name,
892 ondelete=spec.onDelete,
893 )
895 def _convertExclusionConstraintSpec(
896 self,
897 table: str,
898 spec: Tuple[Union[str, Type[TimespanDatabaseRepresentation]], ...],
899 metadata: sqlalchemy.MetaData,
900 ) -> sqlalchemy.schema.Constraint:
901 """Convert a `tuple` from `ddl.TableSpec.exclusion` into a SQLAlchemy
902 constraint representation.
904 Parameters
905 ----------
906 table : `str`
907 Name of the table this constraint is being added to.
908 spec : `tuple` [ `str` or `type` ]
909 A tuple of `str` column names and the `type` object returned by
910 `getTimespanRepresentation` (which must appear exactly once),
911 indicating the order of the columns in the index used to back the
912 constraint.
913 metadata : `sqlalchemy.MetaData`
914 SQLAlchemy representation of the DDL schema this constraint is
915 being added to.
917 Returns
918 -------
919 constraint : `sqlalchemy.schema.Constraint`
920 SQLAlchemy representation of the constraint.
922 Raises
923 ------
924 NotImplementedError
925 Raised if this database does not support exclusion constraints.
926 """
927 raise NotImplementedError(f"Database {self} does not support exclusion constraints.")
929 def _convertTableSpec(
930 self, name: str, spec: ddl.TableSpec, metadata: sqlalchemy.MetaData, **kwargs: Any
931 ) -> sqlalchemy.schema.Table:
932 """Convert a `TableSpec` to a `sqlalchemy.schema.Table`.
934 Parameters
935 ----------
936 spec : `TableSpec`
937 Specification for the foreign key to be added.
938 metadata : `sqlalchemy.MetaData`
939 SQLAlchemy representation of the DDL schema this table is being
940 added to.
941 **kwargs
942 Additional keyword arguments to forward to the
943 `sqlalchemy.schema.Table` constructor. This is provided to make it
944 easier for derived classes to delegate to ``super()`` while making
945 only minor changes.
947 Returns
948 -------
949 table : `sqlalchemy.schema.Table`
950 SQLAlchemy representation of the table.
952 Notes
953 -----
954 This method does not handle ``spec.foreignKeys`` at all, in order to
955 avoid circular dependencies. These are added by higher-level logic in
956 `ensureTableExists`, `getExistingTable`, and `declareStaticTables`.
957 """
958 name = self._mangleTableName(name)
959 args = [self._convertFieldSpec(name, fieldSpec, metadata) for fieldSpec in spec.fields]
961 # Add any column constraints
962 for fieldSpec in spec.fields:
963 args.extend(self._makeColumnConstraints(name, fieldSpec))
965 # Track indexes added for primary key and unique constraints, to make
966 # sure we don't add duplicate explicit or foreign key indexes for
967 # those.
968 allIndexes = {tuple(fieldSpec.name for fieldSpec in spec.fields if fieldSpec.primaryKey)}
969 args.extend(
970 sqlalchemy.schema.UniqueConstraint(
971 *columns, name=self.shrinkDatabaseEntityName("_".join([name, "unq"] + list(columns)))
972 )
973 for columns in spec.unique
974 )
975 allIndexes.update(spec.unique)
976 args.extend(
977 sqlalchemy.schema.Index(
978 self.shrinkDatabaseEntityName("_".join([name, "idx"] + list(index.columns))),
979 *index.columns,
980 unique=(index.columns in spec.unique),
981 **index.kwargs,
982 )
983 for index in spec.indexes
984 if index.columns not in allIndexes
985 )
986 allIndexes.update(index.columns for index in spec.indexes)
987 args.extend(
988 sqlalchemy.schema.Index(
989 self.shrinkDatabaseEntityName("_".join((name, "fkidx") + fk.source)),
990 *fk.source,
991 )
992 for fk in spec.foreignKeys
993 if fk.addIndex and fk.source not in allIndexes
994 )
996 args.extend(self._convertExclusionConstraintSpec(name, excl, metadata) for excl in spec.exclusion)
998 assert spec.doc is None or isinstance(spec.doc, str), f"Bad doc for {name}."
999 return sqlalchemy.schema.Table(name, metadata, *args, comment=spec.doc, info=spec, **kwargs)
1001 def ensureTableExists(self, name: str, spec: ddl.TableSpec) -> sqlalchemy.schema.Table:
1002 """Ensure that a table with the given name and specification exists,
1003 creating it if necessary.
1005 Parameters
1006 ----------
1007 name : `str`
1008 Name of the table (not including namespace qualifiers).
1009 spec : `TableSpec`
1010 Specification for the table. This will be used when creating the
1011 table, and *may* be used when obtaining an existing table to check
1012 for consistency, but no such check is guaranteed.
1014 Returns
1015 -------
1016 table : `sqlalchemy.schema.Table`
1017 SQLAlchemy representation of the table.
1019 Raises
1020 ------
1021 ReadOnlyDatabaseError
1022 Raised if `isWriteable` returns `False`, and the table does not
1023 already exist.
1024 DatabaseConflictError
1025 Raised if the table exists but ``spec`` is inconsistent with its
1026 definition.
1028 Notes
1029 -----
1030 This method may not be called within transactions. It may be called on
1031 read-only databases if and only if the table does in fact already
1032 exist.
1034 Subclasses may override this method, but usually should not need to.
1035 """
1036 # TODO: if _engine is used to make a table then it uses separate
1037 # connection and should not interfere with current transaction
1038 assert (
1039 self._session_connection is None or not self._session_connection.in_transaction()
1040 ), "Table creation interrupts transactions."
1041 assert self._metadata is not None, "Static tables must be declared before dynamic tables."
1042 table = self.getExistingTable(name, spec)
1043 if table is not None:
1044 return table
1045 if not self.isWriteable():
1046 raise ReadOnlyDatabaseError(
1047 f"Table {name} does not exist, and cannot be created "
1048 f"because database {self} is read-only."
1049 )
1050 table = self._convertTableSpec(name, spec, self._metadata)
1051 for foreignKeySpec in spec.foreignKeys:
1052 table.append_constraint(self._convertForeignKeySpec(name, foreignKeySpec, self._metadata))
1053 try:
1054 with self._connection() as connection:
1055 table.create(connection)
1056 except sqlalchemy.exc.DatabaseError:
1057 # Some other process could have created the table meanwhile, which
1058 # usually causes OperationalError or ProgrammingError. We cannot
1059 # use IF NOT EXISTS clause in this case due to PostgreSQL race
1060 # condition on server side which causes IntegrityError. Instead we
1061 # catch these exceptions (they all inherit DatabaseError) and
1062 # re-check whether table is now there.
1063 table = self.getExistingTable(name, spec)
1064 if table is None:
1065 raise
1066 return table
1068 def getExistingTable(self, name: str, spec: ddl.TableSpec) -> Optional[sqlalchemy.schema.Table]:
1069 """Obtain an existing table with the given name and specification.
1071 Parameters
1072 ----------
1073 name : `str`
1074 Name of the table (not including namespace qualifiers).
1075 spec : `TableSpec`
1076 Specification for the table. This will be used when creating the
1077 SQLAlchemy representation of the table, and it is used to
1078 check that the actual table in the database is consistent.
1080 Returns
1081 -------
1082 table : `sqlalchemy.schema.Table` or `None`
1083 SQLAlchemy representation of the table, or `None` if it does not
1084 exist.
1086 Raises
1087 ------
1088 DatabaseConflictError
1089 Raised if the table exists but ``spec`` is inconsistent with its
1090 definition.
1092 Notes
1093 -----
1094 This method can be called within transactions and never modifies the
1095 database.
1097 Subclasses may override this method, but usually should not need to.
1098 """
1099 assert self._metadata is not None, "Static tables must be declared before dynamic tables."
1100 name = self._mangleTableName(name)
1101 table = self._metadata.tables.get(name if self.namespace is None else f"{self.namespace}.{name}")
1102 if table is not None:
1103 if spec.fields.names != set(table.columns.keys()):
1104 raise DatabaseConflictError(
1105 f"Table '{name}' has already been defined differently; the new "
1106 f"specification has columns {list(spec.fields.names)}, while "
1107 f"the previous definition has {list(table.columns.keys())}."
1108 )
1109 else:
1110 inspector = sqlalchemy.inspect(self._engine)
1111 if name in inspector.get_table_names(schema=self.namespace):
1112 _checkExistingTableDefinition(name, spec, inspector.get_columns(name, schema=self.namespace))
1113 table = self._convertTableSpec(name, spec, self._metadata)
1114 for foreignKeySpec in spec.foreignKeys:
1115 table.append_constraint(self._convertForeignKeySpec(name, foreignKeySpec, self._metadata))
1116 return table
1117 return table
1119 @classmethod
1120 def getTimespanRepresentation(cls) -> Type[TimespanDatabaseRepresentation]:
1121 """Return a `type` that encapsulates the way `Timespan` objects are
1122 stored in this database.
1124 `Database` does not automatically use the return type of this method
1125 anywhere else; calling code is responsible for making sure that DDL
1126 and queries are consistent with it.
1128 Returns
1129 -------
1130 TimespanReprClass : `type` (`TimespanDatabaseRepresention` subclass)
1131 A type that encapsulates the way `Timespan` objects should be
1132 stored in this database.
1134 Notes
1135 -----
1136 There are two big reasons we've decided to keep timespan-mangling logic
1137 outside the `Database` implementations, even though the choice of
1138 representation is ultimately up to a `Database` implementation:
1140 - Timespans appear in relatively few tables and queries in our
1141 typical usage, and the code that operates on them is already aware
1142 that it is working with timespans. In contrast, a
1143 timespan-representation-aware implementation of, say, `insert`,
1144 would need to have extra logic to identify when timespan-mangling
1145 needed to occur, which would usually be useless overhead.
1147 - SQLAlchemy's rich SELECT query expression system has no way to wrap
1148 multiple columns in a single expression object (the ORM does, but
1149 we are not using the ORM). So we would have to wrap _much_ more of
1150 that code in our own interfaces to encapsulate timespan
1151 representations there.
1152 """
1153 return TimespanDatabaseRepresentation.Compound
1155 def sync(
1156 self,
1157 table: sqlalchemy.schema.Table,
1158 *,
1159 keys: Dict[str, Any],
1160 compared: Optional[Dict[str, Any]] = None,
1161 extra: Optional[Dict[str, Any]] = None,
1162 returning: Optional[Sequence[str]] = None,
1163 update: bool = False,
1164 ) -> Tuple[Optional[Dict[str, Any]], Union[bool, Dict[str, Any]]]:
1165 """Insert into a table as necessary to ensure database contains
1166 values equivalent to the given ones.
1168 Parameters
1169 ----------
1170 table : `sqlalchemy.schema.Table`
1171 Table to be queried and possibly inserted into.
1172 keys : `dict`
1173 Column name-value pairs used to search for an existing row; must
1174 be a combination that can be used to select a single row if one
1175 exists. If such a row does not exist, these values are used in
1176 the insert.
1177 compared : `dict`, optional
1178 Column name-value pairs that are compared to those in any existing
1179 row. If such a row does not exist, these rows are used in the
1180 insert.
1181 extra : `dict`, optional
1182 Column name-value pairs that are ignored if a matching row exists,
1183 but used in an insert if one is necessary.
1184 returning : `~collections.abc.Sequence` of `str`, optional
1185 The names of columns whose values should be returned.
1186 update : `bool`, optional
1187 If `True` (`False` is default), update the existing row with the
1188 values in ``compared`` instead of raising `DatabaseConflictError`.
1190 Returns
1191 -------
1192 row : `dict`, optional
1193 The value of the fields indicated by ``returning``, or `None` if
1194 ``returning`` is `None`.
1195 inserted_or_updated : `bool` or `dict`
1196 If `True`, a new row was inserted; if `False`, a matching row
1197 already existed. If a `dict` (only possible if ``update=True``),
1198 then an existing row was updated, and the dict maps the names of
1199 the updated columns to their *old* values (new values can be
1200 obtained from ``compared``).
1202 Raises
1203 ------
1204 DatabaseConflictError
1205 Raised if the values in ``compared`` do not match the values in the
1206 database.
1207 ReadOnlyDatabaseError
1208 Raised if `isWriteable` returns `False`, and no matching record
1209 already exists.
1211 Notes
1212 -----
1213 May be used inside transaction contexts, so implementations may not
1214 perform operations that interrupt transactions.
1216 It may be called on read-only databases if and only if the matching row
1217 does in fact already exist.
1218 """
1220 def check() -> Tuple[int, Optional[Dict[str, Any]], Optional[List]]:
1221 """Query for a row that matches the ``key`` argument, and compare
1222 to what was given by the caller.
1224 Returns
1225 -------
1226 n : `int`
1227 Number of matching rows. ``n != 1`` is always an error, but
1228 it's a different kind of error depending on where `check` is
1229 being called.
1230 bad : `dict` or `None`
1231 The subset of the keys of ``compared`` for which the existing
1232 values did not match the given one, mapped to the existing
1233 values in the database. Once again, ``not bad`` is always an
1234 error, but a different kind on context. `None` if ``n != 1``
1235 result : `list` or `None`
1236 Results in the database that correspond to the columns given
1237 in ``returning``, or `None` if ``returning is None``.
1238 """
1239 toSelect: Set[str] = set()
1240 if compared is not None:
1241 toSelect.update(compared.keys())
1242 if returning is not None:
1243 toSelect.update(returning)
1244 if not toSelect:
1245 # Need to select some column, even if we just want to see
1246 # how many rows we get back.
1247 toSelect.add(next(iter(keys.keys())))
1248 selectSql = (
1249 sqlalchemy.sql.select(*[table.columns[k].label(k) for k in toSelect])
1250 .select_from(table)
1251 .where(sqlalchemy.sql.and_(*[table.columns[k] == v for k, v in keys.items()]))
1252 )
1253 with self._connection() as connection:
1254 fetched = list(connection.execute(selectSql).mappings())
1255 if len(fetched) != 1:
1256 return len(fetched), None, None
1257 existing = fetched[0]
1258 if compared is not None:
1260 def safeNotEqual(a: Any, b: Any) -> bool:
1261 if isinstance(a, astropy.time.Time):
1262 return not time_utils.TimeConverter().times_equal(a, b)
1263 return a != b
1265 inconsistencies = {
1266 k: existing[k] for k, v in compared.items() if safeNotEqual(existing[k], v)
1267 }
1268 else:
1269 inconsistencies = {}
1270 if returning is not None:
1271 toReturn: Optional[list] = [existing[k] for k in returning]
1272 else:
1273 toReturn = None
1274 return 1, inconsistencies, toReturn
1276 def format_bad(inconsistencies: Dict[str, Any]) -> str:
1277 """Format the 'bad' dictionary of existing values returned by
1278 ``check`` into a string suitable for an error message.
1279 """
1280 assert compared is not None, "Should not be able to get inconsistencies without comparing."
1281 return ", ".join(f"{k}: {v!r} != {compared[k]!r}" for k, v in inconsistencies.items())
1283 if self.isTableWriteable(table):
1284 # Try an insert first, but allow it to fail (in only specific
1285 # ways).
1286 row = keys.copy()
1287 if compared is not None:
1288 row.update(compared)
1289 if extra is not None:
1290 row.update(extra)
1291 with self.transaction():
1292 inserted = bool(self.ensure(table, row))
1293 inserted_or_updated: Union[bool, Dict[str, Any]]
1294 # Need to perform check() for this branch inside the
1295 # transaction, so we roll back an insert that didn't do
1296 # what we expected. That limits the extent to which we
1297 # can reduce duplication between this block and the other
1298 # ones that perform similar logic.
1299 n, bad, result = check()
1300 if n < 1:
1301 raise ConflictingDefinitionError(
1302 f"Attempted to ensure {row} exists by inserting it with ON CONFLICT IGNORE, "
1303 f"but a post-insert query on {keys} returned no results. "
1304 f"Insert was {'' if inserted else 'not '}reported as successful. "
1305 "This can occur if the insert violated a database constraint other than the "
1306 "unique constraint or primary key used to identify the row in this call."
1307 )
1308 elif n > 1:
1309 raise RuntimeError(
1310 f"Keys passed to sync {keys.keys()} do not comprise a "
1311 f"unique constraint for table {table.name}."
1312 )
1313 elif bad:
1314 assert (
1315 compared is not None
1316 ), "Should not be able to get inconsistencies without comparing."
1317 if inserted:
1318 raise RuntimeError(
1319 f"Conflict ({bad}) in sync after successful insert; this is "
1320 "possible if the same table is being updated by a concurrent "
1321 "process that isn't using sync, but it may also be a bug in "
1322 "daf_butler."
1323 )
1324 elif update:
1325 with self._connection() as connection:
1326 connection.execute(
1327 table.update()
1328 .where(sqlalchemy.sql.and_(*[table.columns[k] == v for k, v in keys.items()]))
1329 .values(**{k: compared[k] for k in bad.keys()})
1330 )
1331 inserted_or_updated = bad
1332 else:
1333 raise DatabaseConflictError(
1334 f"Conflict in sync for table {table.name} on column(s) {format_bad(bad)}."
1335 )
1336 else:
1337 inserted_or_updated = inserted
1338 else:
1339 # Database is not writeable; just see if the row exists.
1340 n, bad, result = check()
1341 if n < 1:
1342 raise ReadOnlyDatabaseError("sync needs to insert, but database is read-only.")
1343 elif n > 1:
1344 raise RuntimeError("Keys passed to sync do not comprise a unique constraint.")
1345 elif bad:
1346 if update:
1347 raise ReadOnlyDatabaseError("sync needs to update, but database is read-only.")
1348 else:
1349 raise DatabaseConflictError(
1350 f"Conflict in sync for table {table.name} on column(s) {format_bad(bad)}."
1351 )
1352 inserted_or_updated = False
1353 if returning is None:
1354 return None, inserted_or_updated
1355 else:
1356 assert result is not None
1357 return {k: v for k, v in zip(returning, result)}, inserted_or_updated
1359 def insert(
1360 self,
1361 table: sqlalchemy.schema.Table,
1362 *rows: dict,
1363 returnIds: bool = False,
1364 select: Optional[sqlalchemy.sql.expression.SelectBase] = None,
1365 names: Optional[Iterable[str]] = None,
1366 ) -> Optional[List[int]]:
1367 """Insert one or more rows into a table, optionally returning
1368 autoincrement primary key values.
1370 Parameters
1371 ----------
1372 table : `sqlalchemy.schema.Table`
1373 Table rows should be inserted into.
1374 returnIds: `bool`
1375 If `True` (`False` is default), return the values of the table's
1376 autoincrement primary key field (which much exist).
1377 select : `sqlalchemy.sql.SelectBase`, optional
1378 A SELECT query expression to insert rows from. Cannot be provided
1379 with either ``rows`` or ``returnIds=True``.
1380 names : `Iterable` [ `str` ], optional
1381 Names of columns in ``table`` to be populated, ordered to match the
1382 columns returned by ``select``. Ignored if ``select`` is `None`.
1383 If not provided, the columns returned by ``select`` must be named
1384 to match the desired columns of ``table``.
1385 *rows
1386 Positional arguments are the rows to be inserted, as dictionaries
1387 mapping column name to value. The keys in all dictionaries must
1388 be the same.
1390 Returns
1391 -------
1392 ids : `None`, or `list` of `int`
1393 If ``returnIds`` is `True`, a `list` containing the inserted
1394 values for the table's autoincrement primary key.
1396 Raises
1397 ------
1398 ReadOnlyDatabaseError
1399 Raised if `isWriteable` returns `False` when this method is called.
1401 Notes
1402 -----
1403 The default implementation uses bulk insert syntax when ``returnIds``
1404 is `False`, and a loop over single-row insert operations when it is
1405 `True`.
1407 Derived classes should reimplement when they can provide a more
1408 efficient implementation (especially for the latter case).
1410 May be used inside transaction contexts, so implementations may not
1411 perform operations that interrupt transactions.
1412 """
1413 self.assertTableWriteable(table, f"Cannot insert into read-only table {table}.")
1414 if select is not None and (rows or returnIds):
1415 raise TypeError("'select' is incompatible with passing value rows or returnIds=True.")
1416 if not rows and select is None:
1417 if returnIds:
1418 return []
1419 else:
1420 return None
1421 with self._connection() as connection:
1422 if not returnIds:
1423 if select is not None:
1424 if names is None:
1425 # columns() is deprecated since 1.4, but
1426 # selected_columns() method did not exist in 1.3.
1427 if hasattr(select, "selected_columns"):
1428 names = select.selected_columns.keys()
1429 else:
1430 names = select.columns.keys()
1431 connection.execute(table.insert().from_select(names, select))
1432 else:
1433 connection.execute(table.insert(), rows)
1434 return None
1435 else:
1436 sql = table.insert()
1437 return [connection.execute(sql, row).inserted_primary_key[0] for row in rows]
1439 @abstractmethod
1440 def replace(self, table: sqlalchemy.schema.Table, *rows: dict) -> None:
1441 """Insert one or more rows into a table, replacing any existing rows
1442 for which insertion of a new row would violate the primary key
1443 constraint.
1445 Parameters
1446 ----------
1447 table : `sqlalchemy.schema.Table`
1448 Table rows should be inserted into.
1449 *rows
1450 Positional arguments are the rows to be inserted, as dictionaries
1451 mapping column name to value. The keys in all dictionaries must
1452 be the same.
1454 Raises
1455 ------
1456 ReadOnlyDatabaseError
1457 Raised if `isWriteable` returns `False` when this method is called.
1459 Notes
1460 -----
1461 May be used inside transaction contexts, so implementations may not
1462 perform operations that interrupt transactions.
1464 Implementations should raise a `sqlalchemy.exc.IntegrityError`
1465 exception when a constraint other than the primary key would be
1466 violated.
1468 Implementations are not required to support `replace` on tables
1469 with autoincrement keys.
1470 """
1471 raise NotImplementedError()
1473 @abstractmethod
1474 def ensure(self, table: sqlalchemy.schema.Table, *rows: dict, primary_key_only: bool = False) -> int:
1475 """Insert one or more rows into a table, skipping any rows for which
1476 insertion would violate a unique constraint.
1478 Parameters
1479 ----------
1480 table : `sqlalchemy.schema.Table`
1481 Table rows should be inserted into.
1482 *rows
1483 Positional arguments are the rows to be inserted, as dictionaries
1484 mapping column name to value. The keys in all dictionaries must
1485 be the same.
1486 primary_key_only : `bool`, optional
1487 If `True` (`False` is default), only skip rows that violate the
1488 primary key constraint, and raise an exception (and rollback
1489 transactions) for other constraint violations.
1491 Returns
1492 -------
1493 count : `int`
1494 The number of rows actually inserted.
1496 Raises
1497 ------
1498 ReadOnlyDatabaseError
1499 Raised if `isWriteable` returns `False` when this method is called.
1500 This is raised even if the operation would do nothing even on a
1501 writeable database.
1503 Notes
1504 -----
1505 May be used inside transaction contexts, so implementations may not
1506 perform operations that interrupt transactions.
1508 Implementations are not required to support `ensure` on tables
1509 with autoincrement keys.
1510 """
1511 raise NotImplementedError()
1513 def delete(self, table: sqlalchemy.schema.Table, columns: Iterable[str], *rows: dict) -> int:
1514 """Delete one or more rows from a table.
1516 Parameters
1517 ----------
1518 table : `sqlalchemy.schema.Table`
1519 Table that rows should be deleted from.
1520 columns: `~collections.abc.Iterable` of `str`
1521 The names of columns that will be used to constrain the rows to
1522 be deleted; these will be combined via ``AND`` to form the
1523 ``WHERE`` clause of the delete query.
1524 *rows
1525 Positional arguments are the keys of rows to be deleted, as
1526 dictionaries mapping column name to value. The keys in all
1527 dictionaries must be exactly the names in ``columns``.
1529 Returns
1530 -------
1531 count : `int`
1532 Number of rows deleted.
1534 Raises
1535 ------
1536 ReadOnlyDatabaseError
1537 Raised if `isWriteable` returns `False` when this method is called.
1539 Notes
1540 -----
1541 May be used inside transaction contexts, so implementations may not
1542 perform operations that interrupt transactions.
1544 The default implementation should be sufficient for most derived
1545 classes.
1546 """
1547 self.assertTableWriteable(table, f"Cannot delete from read-only table {table}.")
1548 if columns and not rows:
1549 # If there are no columns, this operation is supposed to delete
1550 # everything (so we proceed as usual). But if there are columns,
1551 # but no rows, it was a constrained bulk operation where the
1552 # constraint is that no rows match, and we should short-circuit
1553 # while reporting that no rows were affected.
1554 return 0
1555 sql = table.delete()
1556 columns = list(columns) # Force iterators to list
1558 # More efficient to use IN operator if there is only one
1559 # variable changing across all rows.
1560 content: Dict[str, Set] = defaultdict(set)
1561 if len(columns) == 1:
1562 # Nothing to calculate since we can always use IN
1563 column = columns[0]
1564 changing_columns = [column]
1565 content[column] = set(row[column] for row in rows)
1566 else:
1567 for row in rows:
1568 for k, v in row.items():
1569 content[k].add(v)
1570 changing_columns = [col for col, values in content.items() if len(values) > 1]
1572 if len(changing_columns) != 1:
1573 # More than one column changes each time so do explicit bind
1574 # parameters and have each row processed separately.
1575 whereTerms = [table.columns[name] == sqlalchemy.sql.bindparam(name) for name in columns]
1576 if whereTerms:
1577 sql = sql.where(sqlalchemy.sql.and_(*whereTerms))
1578 with self._connection() as connection:
1579 return connection.execute(sql, rows).rowcount
1580 else:
1581 # One of the columns has changing values but any others are
1582 # fixed. In this case we can use an IN operator and be more
1583 # efficient.
1584 name = changing_columns.pop()
1586 # Simple where clause for the unchanging columns
1587 clauses = []
1588 for k, v in content.items():
1589 if k == name:
1590 continue
1591 column = table.columns[k]
1592 # The set only has one element
1593 clauses.append(column == v.pop())
1595 # The IN operator will not work for "infinite" numbers of
1596 # rows so must batch it up into distinct calls.
1597 in_content = list(content[name])
1598 n_elements = len(in_content)
1600 rowcount = 0
1601 iposn = 0
1602 n_per_loop = 1_000 # Controls how many items to put in IN clause
1603 with self._connection() as connection:
1604 for iposn in range(0, n_elements, n_per_loop):
1605 endpos = iposn + n_per_loop
1606 in_clause = table.columns[name].in_(in_content[iposn:endpos])
1608 newsql = sql.where(sqlalchemy.sql.and_(*clauses, in_clause))
1609 rowcount += connection.execute(newsql).rowcount
1610 return rowcount
1612 def deleteWhere(self, table: sqlalchemy.schema.Table, where: sqlalchemy.sql.ClauseElement) -> int:
1613 """Delete rows from a table with pre-constructed WHERE clause.
1615 Parameters
1616 ----------
1617 table : `sqlalchemy.schema.Table`
1618 Table that rows should be deleted from.
1619 where: `sqlalchemy.sql.ClauseElement`
1620 The names of columns that will be used to constrain the rows to
1621 be deleted; these will be combined via ``AND`` to form the
1622 ``WHERE`` clause of the delete query.
1624 Returns
1625 -------
1626 count : `int`
1627 Number of rows deleted.
1629 Raises
1630 ------
1631 ReadOnlyDatabaseError
1632 Raised if `isWriteable` returns `False` when this method is called.
1634 Notes
1635 -----
1636 May be used inside transaction contexts, so implementations may not
1637 perform operations that interrupt transactions.
1639 The default implementation should be sufficient for most derived
1640 classes.
1641 """
1642 self.assertTableWriteable(table, f"Cannot delete from read-only table {table}.")
1644 sql = table.delete().where(where)
1645 with self._connection() as connection:
1646 return connection.execute(sql).rowcount
1648 def update(self, table: sqlalchemy.schema.Table, where: Dict[str, str], *rows: dict) -> int:
1649 """Update one or more rows in a table.
1651 Parameters
1652 ----------
1653 table : `sqlalchemy.schema.Table`
1654 Table containing the rows to be updated.
1655 where : `dict` [`str`, `str`]
1656 A mapping from the names of columns that will be used to search for
1657 existing rows to the keys that will hold these values in the
1658 ``rows`` dictionaries. Note that these may not be the same due to
1659 SQLAlchemy limitations.
1660 *rows
1661 Positional arguments are the rows to be updated. The keys in all
1662 dictionaries must be the same, and may correspond to either a
1663 value in the ``where`` dictionary or the name of a column to be
1664 updated.
1666 Returns
1667 -------
1668 count : `int`
1669 Number of rows matched (regardless of whether the update actually
1670 modified them).
1672 Raises
1673 ------
1674 ReadOnlyDatabaseError
1675 Raised if `isWriteable` returns `False` when this method is called.
1677 Notes
1678 -----
1679 May be used inside transaction contexts, so implementations may not
1680 perform operations that interrupt transactions.
1682 The default implementation should be sufficient for most derived
1683 classes.
1684 """
1685 self.assertTableWriteable(table, f"Cannot update read-only table {table}.")
1686 if not rows:
1687 return 0
1688 sql = table.update().where(
1689 sqlalchemy.sql.and_(*[table.columns[k] == sqlalchemy.sql.bindparam(v) for k, v in where.items()])
1690 )
1691 with self._connection() as connection:
1692 return connection.execute(sql, rows).rowcount
1694 def query(
1695 self, sql: sqlalchemy.sql.Selectable, *args: Any, **kwargs: Any
1696 ) -> sqlalchemy.engine.ResultProxy:
1697 """Run a SELECT query against the database.
1699 Parameters
1700 ----------
1701 sql : `sqlalchemy.sql.Selectable`
1702 A SQLAlchemy representation of a ``SELECT`` query.
1703 *args
1704 Additional positional arguments are forwarded to
1705 `sqlalchemy.engine.Connection.execute`.
1706 **kwargs
1707 Additional keyword arguments are forwarded to
1708 `sqlalchemy.engine.Connection.execute`.
1710 Returns
1711 -------
1712 result : `sqlalchemy.engine.ResultProxy`
1713 Query results.
1715 Notes
1716 -----
1717 The default implementation should be sufficient for most derived
1718 classes.
1719 """
1720 # We are returning a Result object so we need to take care of
1721 # connection lifetime. If this is happening in transaction context
1722 # then just use existing connection, otherwise make a special
1723 # connection which will be closed when result is closed.
1724 #
1725 # TODO: May be better approach would be to make this method return a
1726 # context manager, but this means big changes for callers of this
1727 # method.
1728 if self._session_connection is not None:
1729 connection = self._session_connection
1730 else:
1731 connection = self._engine.connect(close_with_result=True)
1732 # TODO: should we guard against non-SELECT queries here?
1733 return connection.execute(sql, *args, **kwargs)
1735 @abstractmethod
1736 def constant_rows(
1737 self,
1738 fields: NamedValueAbstractSet[ddl.FieldSpec],
1739 *rows: dict,
1740 name: Optional[str] = None,
1741 ) -> sqlalchemy.sql.FromClause:
1742 """Return a SQLAlchemy object that represents a small number of
1743 constant-valued rows.
1745 Parameters
1746 ----------
1747 fields : `NamedValueAbstractSet` [ `ddl.FieldSpec` ]
1748 The columns of the rows. Unique and foreign key constraints are
1749 ignored.
1750 *rows : `dict`
1751 Values for the rows.
1752 name : `str`, optional
1753 If provided, the name of the SQL construct. If not provided, an
1754 opaque but unique identifier is generated.
1756 Returns
1757 -------
1758 from_clause : `sqlalchemy.sql.FromClause`
1759 SQLAlchemy object representing the given rows. This is guaranteed
1760 to be something that can be directly joined into a ``SELECT``
1761 query's ``FROM`` clause, and will not involve a temporary table
1762 that needs to be cleaned up later.
1764 Notes
1765 -----
1766 The default implementation uses the SQL-standard ``VALUES`` construct,
1767 but support for that construct is varied enough across popular RDBMSs
1768 that the method is still marked abstract to force explicit opt-in via
1769 delegation to `super`.
1770 """
1771 if name is None:
1772 name = f"tmp_{uuid.uuid4().hex}"
1773 return sqlalchemy.sql.values(
1774 *[sqlalchemy.Column(field.name, field.getSizedColumnType()) for field in fields],
1775 name=name,
1776 ).data([tuple(row[name] for name in fields.names) for row in rows])
1778 def get_constant_rows_max(self) -> int:
1779 """Return the maximum number of rows that should be passed to
1780 `constant_rows` for this backend.
1782 Returns
1783 -------
1784 max : `int`
1785 Maximum number of rows.
1787 Notes
1788 -----
1789 This should reflect typical performance profiles (or a guess at these),
1790 not just hard database engine limits.
1791 """
1792 return 100
1794 origin: int
1795 """An integer ID that should be used as the default for any datasets,
1796 quanta, or other entities that use a (autoincrement, origin) compound
1797 primary key (`int`).
1798 """
1800 namespace: Optional[str]
1801 """The schema or namespace this database instance is associated with
1802 (`str` or `None`).
1803 """