Coverage for python/lsst/daf/butler/registry/interfaces/_database.py: 14%
412 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-01-19 02:07 -0800
« prev ^ index » next coverage.py v6.5.0, created at 2023-01-19 02:07 -0800
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = [
24 "Database",
25 "ReadOnlyDatabaseError",
26 "DatabaseConflictError",
27 "SchemaAlreadyDefinedError",
28 "StaticTablesContext",
29]
31import uuid
32import warnings
33from abc import ABC, abstractmethod
34from collections import defaultdict
35from contextlib import contextmanager
36from typing import (
37 Any,
38 Callable,
39 Dict,
40 Iterable,
41 Iterator,
42 List,
43 Optional,
44 Sequence,
45 Set,
46 Tuple,
47 Type,
48 Union,
49 final,
50)
52import astropy.time
53import sqlalchemy
55from ...core import TimespanDatabaseRepresentation, ddl, time_utils
56from ...core.named import NamedValueAbstractSet
57from .._exceptions import ConflictingDefinitionError
60def _checkExistingTableDefinition(name: str, spec: ddl.TableSpec, inspection: List[Dict[str, Any]]) -> None:
61 """Test that the definition of a table in a `ddl.TableSpec` and from
62 database introspection are consistent.
64 Parameters
65 ----------
66 name : `str`
67 Name of the table (only used in error messages).
68 spec : `ddl.TableSpec`
69 Specification of the table.
70 inspection : `dict`
71 Dictionary returned by
72 `sqlalchemy.engine.reflection.Inspector.get_columns`.
74 Raises
75 ------
76 DatabaseConflictError
77 Raised if the definitions are inconsistent.
78 """
79 columnNames = [c["name"] for c in inspection]
80 if spec.fields.names != set(columnNames):
81 raise DatabaseConflictError(
82 f"Table '{name}' exists but is defined differently in the database; "
83 f"specification has columns {list(spec.fields.names)}, while the "
84 f"table in the database has {columnNames}."
85 )
88class ReadOnlyDatabaseError(RuntimeError):
89 """Exception raised when a write operation is called on a read-only
90 `Database`.
91 """
94class DatabaseConflictError(ConflictingDefinitionError):
95 """Exception raised when database content (row values or schema entities)
96 are inconsistent with what this client expects.
97 """
100class SchemaAlreadyDefinedError(RuntimeError):
101 """Exception raised when trying to initialize database schema when some
102 tables already exist.
103 """
106class StaticTablesContext:
107 """Helper class used to declare the static schema for a registry layer
108 in a database.
110 An instance of this class is returned by `Database.declareStaticTables`,
111 which should be the only way it should be constructed.
112 """
114 def __init__(self, db: Database, connection: sqlalchemy.engine.Connection):
115 self._db = db
116 self._foreignKeys: List[Tuple[sqlalchemy.schema.Table, sqlalchemy.schema.ForeignKeyConstraint]] = []
117 self._inspector = sqlalchemy.inspect(connection)
118 self._tableNames = frozenset(self._inspector.get_table_names(schema=self._db.namespace))
119 self._initializers: List[Callable[[Database], None]] = []
121 def addTable(self, name: str, spec: ddl.TableSpec) -> sqlalchemy.schema.Table:
122 """Add a new table to the schema, returning its sqlalchemy
123 representation.
125 The new table may not actually be created until the end of the
126 context created by `Database.declareStaticTables`, allowing tables
127 to be declared in any order even in the presence of foreign key
128 relationships.
129 """
130 name = self._db._mangleTableName(name)
131 if name in self._tableNames:
132 _checkExistingTableDefinition(
133 name, spec, self._inspector.get_columns(name, schema=self._db.namespace)
134 )
135 metadata = self._db._metadata
136 assert metadata is not None, "Guaranteed by context manager that returns this object."
137 table = self._db._convertTableSpec(name, spec, metadata)
138 for foreignKeySpec in spec.foreignKeys:
139 self._foreignKeys.append((table, self._db._convertForeignKeySpec(name, foreignKeySpec, metadata)))
140 return table
142 def addTableTuple(self, specs: Tuple[ddl.TableSpec, ...]) -> Tuple[sqlalchemy.schema.Table, ...]:
143 """Add a named tuple of tables to the schema, returning their
144 SQLAlchemy representations in a named tuple of the same type.
146 The new tables may not actually be created until the end of the
147 context created by `Database.declareStaticTables`, allowing tables
148 to be declared in any order even in the presence of foreign key
149 relationships.
151 Notes
152 -----
153 ``specs`` *must* be an instance of a type created by
154 `collections.namedtuple`, not just regular tuple, and the returned
155 object is guaranteed to be the same. Because `~collections.namedtuple`
156 is just a factory for `type` objects, not an actual type itself,
157 we cannot represent this with type annotations.
158 """
159 return specs._make( # type: ignore
160 self.addTable(name, spec) for name, spec in zip(specs._fields, specs) # type: ignore
161 )
163 def addInitializer(self, initializer: Callable[[Database], None]) -> None:
164 """Add a method that does one-time initialization of a database.
166 Initialization can mean anything that changes state of a database
167 and needs to be done exactly once after database schema was created.
168 An example for that could be population of schema attributes.
170 Parameters
171 ----------
172 initializer : callable
173 Method of a single argument which is a `Database` instance.
174 """
175 self._initializers.append(initializer)
178class Database(ABC):
179 """An abstract interface that represents a particular database engine's
180 representation of a single schema/namespace/database.
182 Parameters
183 ----------
184 origin : `int`
185 An integer ID that should be used as the default for any datasets,
186 quanta, or other entities that use a (autoincrement, origin) compound
187 primary key.
188 engine : `sqlalchemy.engine.Engine`
189 The SQLAlchemy engine for this `Database`.
190 namespace : `str`, optional
191 Name of the schema or namespace this instance is associated with.
192 This is passed as the ``schema`` argument when constructing a
193 `sqlalchemy.schema.MetaData` instance. We use ``namespace`` instead to
194 avoid confusion between "schema means namespace" and "schema means
195 table definitions".
197 Notes
198 -----
199 `Database` requires all write operations to go through its special named
200 methods. Our write patterns are sufficiently simple that we don't really
201 need the full flexibility of SQL insert/update/delete syntax, and we need
202 non-standard (but common) functionality in these operations sufficiently
203 often that it seems worthwhile to provide our own generic API.
205 In contrast, `Database.query` allows arbitrary ``SELECT`` queries (via
206 their SQLAlchemy representation) to be run, as we expect these to require
207 significantly more sophistication while still being limited to standard
208 SQL.
210 `Database` itself has several underscore-prefixed attributes:
212 - ``_engine``: SQLAlchemy object representing its engine.
213 - ``_connection``: method returning a context manager for
214 `sqlalchemy.engine.Connection` object.
215 - ``_metadata``: the `sqlalchemy.schema.MetaData` object representing
216 the tables and other schema entities.
218 These are considered protected (derived classes may access them, but other
219 code should not), and read-only, aside from executing SQL via
220 ``_connection``.
221 """
223 def __init__(self, *, origin: int, engine: sqlalchemy.engine.Engine, namespace: Optional[str] = None):
224 self.origin = origin
225 self.namespace = namespace
226 self._engine = engine
227 self._session_connection: Optional[sqlalchemy.engine.Connection] = None
228 self._metadata: Optional[sqlalchemy.schema.MetaData] = None
229 self._temp_tables: Set[str] = set()
231 def __repr__(self) -> str:
232 # Rather than try to reproduce all the parameters used to create
233 # the object, instead report the more useful information of the
234 # connection URL.
235 if self._engine.url.password is not None:
236 uri = str(self._engine.url.set(password="***"))
237 else:
238 uri = str(self._engine.url)
239 if self.namespace:
240 uri += f"#{self.namespace}"
241 return f'{type(self).__name__}("{uri}")'
243 @classmethod
244 def makeDefaultUri(cls, root: str) -> Optional[str]:
245 """Create a default connection URI appropriate for the given root
246 directory, or `None` if there can be no such default.
247 """
248 return None
250 @classmethod
251 def fromUri(
252 cls, uri: str, *, origin: int, namespace: Optional[str] = None, writeable: bool = True
253 ) -> Database:
254 """Construct a database from a SQLAlchemy URI.
256 Parameters
257 ----------
258 uri : `str`
259 A SQLAlchemy URI connection string.
260 origin : `int`
261 An integer ID that should be used as the default for any datasets,
262 quanta, or other entities that use a (autoincrement, origin)
263 compound primary key.
264 namespace : `str`, optional
265 A database namespace (i.e. schema) the new instance should be
266 associated with. If `None` (default), the namespace (if any) is
267 inferred from the URI.
268 writeable : `bool`, optional
269 If `True`, allow write operations on the database, including
270 ``CREATE TABLE``.
272 Returns
273 -------
274 db : `Database`
275 A new `Database` instance.
276 """
277 return cls.fromEngine(
278 cls.makeEngine(uri, writeable=writeable), origin=origin, namespace=namespace, writeable=writeable
279 )
281 @classmethod
282 @abstractmethod
283 def makeEngine(cls, uri: str, *, writeable: bool = True) -> sqlalchemy.engine.Engine:
284 """Create a `sqlalchemy.engine.Engine` from a SQLAlchemy URI.
286 Parameters
287 ----------
288 uri : `str`
289 A SQLAlchemy URI connection string.
290 writeable : `bool`, optional
291 If `True`, allow write operations on the database, including
292 ``CREATE TABLE``.
294 Returns
295 -------
296 engine : `sqlalchemy.engine.Engine`
297 A database engine.
299 Notes
300 -----
301 Subclasses that support other ways to connect to a database are
302 encouraged to add optional arguments to their implementation of this
303 method, as long as they maintain compatibility with the base class
304 call signature.
305 """
306 raise NotImplementedError()
308 @classmethod
309 @abstractmethod
310 def fromEngine(
311 cls,
312 engine: sqlalchemy.engine.Engine,
313 *,
314 origin: int,
315 namespace: Optional[str] = None,
316 writeable: bool = True,
317 ) -> Database:
318 """Create a new `Database` from an existing `sqlalchemy.engine.Engine`.
320 Parameters
321 ----------
322 engine : `sqlalchemy.engine.Engine`
323 The engine for the database. May be shared between `Database`
324 instances.
325 origin : `int`
326 An integer ID that should be used as the default for any datasets,
327 quanta, or other entities that use a (autoincrement, origin)
328 compound primary key.
329 namespace : `str`, optional
330 A different database namespace (i.e. schema) the new instance
331 should be associated with. If `None` (default), the namespace
332 (if any) is inferred from the connection.
333 writeable : `bool`, optional
334 If `True`, allow write operations on the database, including
335 ``CREATE TABLE``.
337 Returns
338 -------
339 db : `Database`
340 A new `Database` instance.
342 Notes
343 -----
344 This method allows different `Database` instances to share the same
345 engine, which is desirable when they represent different namespaces
346 can be queried together.
347 """
348 raise NotImplementedError()
350 @final
351 @contextmanager
352 def session(self) -> Iterator[None]:
353 """Return a context manager that represents a session (persistent
354 connection to a database).
356 Returns
357 -------
358 context : `AbstractContextManager` [ `None` ]
359 A context manager that does not return a value when entered.
361 Notes
362 -----
363 This method should be used when a sequence of read-only SQL operations
364 will be performed in rapid succession *without* a requirement that they
365 yield consistent results in the presence of concurrent writes (or, more
366 rarely, when conflicting concurrent writes are rare/impossible and the
367 session will be open long enough that a transaction is inadvisable).
368 """
369 with self._session():
370 yield
372 @final
373 @contextmanager
374 def transaction(
375 self,
376 *,
377 interrupting: bool = False,
378 savepoint: bool = False,
379 lock: Iterable[sqlalchemy.schema.Table] = (),
380 for_temp_tables: bool = False,
381 ) -> Iterator[None]:
382 """Return a context manager that represents a transaction.
384 Parameters
385 ----------
386 interrupting : `bool`, optional
387 If `True` (`False` is default), this transaction block may not be
388 nested without an outer one, and attempting to do so is a logic
389 (i.e. assertion) error.
390 savepoint : `bool`, optional
391 If `True` (`False` is default), create a `SAVEPOINT`, allowing
392 exceptions raised by the database (e.g. due to constraint
393 violations) during this transaction's context to be caught outside
394 it without also rolling back all operations in an outer transaction
395 block. If `False`, transactions may still be nested, but a
396 rollback may be generated at any level and affects all levels, and
397 commits are deferred until the outermost block completes. If any
398 outer transaction block was created with ``savepoint=True``, all
399 inner blocks will be as well (regardless of the actual value
400 passed). This has no effect if this is the outermost transaction.
401 lock : `Iterable` [ `sqlalchemy.schema.Table` ], optional
402 A list of tables to lock for the duration of this transaction.
403 These locks are guaranteed to prevent concurrent writes and allow
404 this transaction (only) to acquire the same locks (others should
405 block), but only prevent concurrent reads if the database engine
406 requires that in order to block concurrent writes.
407 for_temp_tables : `bool`, optional
408 If `True`, this transaction may involve creating temporary tables.
410 Returns
411 -------
412 context : `AbstractContextManager` [ `None` ]
413 A context manager that commits the transaction when it is exited
414 without error and rolls back the transactoin when it is exited via
415 an exception.
417 Notes
418 -----
419 All transactions on a connection managed by one or more `Database`
420 instances _must_ go through this method, or transaction state will not
421 be correctly managed.
422 """
423 with self._transaction(
424 interrupting=interrupting, savepoint=savepoint, lock=lock, for_temp_tables=for_temp_tables
425 ):
426 yield
428 @contextmanager
429 def temporary_table(
430 self, spec: ddl.TableSpec, name: Optional[str] = None
431 ) -> Iterator[sqlalchemy.schema.Table]:
432 """Return a context manager that creates and then drops a temporary
433 table.
435 Parameters
436 ----------
437 spec : `ddl.TableSpec`
438 Specification for the columns. Unique and foreign key constraints
439 may be ignored.
440 name : `str`, optional
441 If provided, the name of the SQL construct. If not provided, an
442 opaque but unique identifier is generated.
444 Returns
445 -------
446 context : `AbstractContextManager` [ `sqlalchemy.schema.Table` ]
447 A context manager that returns a SQLAlchemy representation of the
448 temporary table when entered.
450 Notes
451 -----
452 Temporary tables may be created, dropped, and written to even in
453 read-only databases - at least according to the Python-level
454 protections in the `Database` classes. Server permissions may say
455 otherwise, but in that case they probably need to be modified to
456 support the full range of expected read-only butler behavior.
457 """
458 with self._session() as connection:
459 table = self._make_temporary_table(connection, spec=spec, name=name)
460 self._temp_tables.add(table.key)
461 try:
462 yield table
463 finally:
464 table.drop(connection)
465 self._temp_tables.remove(table.key)
467 @contextmanager
468 def _session(self) -> Iterator[sqlalchemy.engine.Connection]:
469 """Protected implementation for `session` that actually returns the
470 connection.
472 This method is for internal `Database` calls that need the actual
473 SQLAlchemy connection object. It should be overridden by subclasses
474 instead of `session` itself.
476 Returns
477 -------
478 context : `AbstractContextManager` [ `sqlalchemy.engine.Connection` ]
479 A context manager that returns a SQLALchemy connection when
480 entered.
482 """
483 if self._session_connection is not None:
484 # session already started, just reuse that
485 yield self._session_connection
486 else:
487 try:
488 # open new connection and close it when done
489 self._session_connection = self._engine.connect()
490 yield self._session_connection
491 finally:
492 if self._session_connection is not None:
493 self._session_connection.close()
494 self._session_connection = None
495 # Temporary tables only live within session
496 self._temp_tables = set()
498 @contextmanager
499 def _transaction(
500 self,
501 *,
502 interrupting: bool = False,
503 savepoint: bool = False,
504 lock: Iterable[sqlalchemy.schema.Table] = (),
505 for_temp_tables: bool = False,
506 ) -> Iterator[tuple[bool, sqlalchemy.engine.Connection]]:
507 """Protected implementation for `transaction` that actually returns the
508 connection and whether this is a new outermost transaction.
510 This method is for internal `Database` calls that need the actual
511 SQLAlchemy connection object. It should be overridden by subclasses
512 instead of `transaction` itself.
514 Parameters
515 ----------
516 interrupting : `bool`, optional
517 If `True` (`False` is default), this transaction block may not be
518 nested without an outer one, and attempting to do so is a logic
519 (i.e. assertion) error.
520 savepoint : `bool`, optional
521 If `True` (`False` is default), create a `SAVEPOINT`, allowing
522 exceptions raised by the database (e.g. due to constraint
523 violations) during this transaction's context to be caught outside
524 it without also rolling back all operations in an outer transaction
525 block. If `False`, transactions may still be nested, but a
526 rollback may be generated at any level and affects all levels, and
527 commits are deferred until the outermost block completes. If any
528 outer transaction block was created with ``savepoint=True``, all
529 inner blocks will be as well (regardless of the actual value
530 passed). This has no effect if this is the outermost transaction.
531 lock : `Iterable` [ `sqlalchemy.schema.Table` ], optional
532 A list of tables to lock for the duration of this transaction.
533 These locks are guaranteed to prevent concurrent writes and allow
534 this transaction (only) to acquire the same locks (others should
535 block), but only prevent concurrent reads if the database engine
536 requires that in order to block concurrent writes.
537 for_temp_tables : `bool`, optional
538 If `True`, this transaction may involve creating temporary tables.
540 Returns
541 -------
542 context : `AbstractContextManager` [ `tuple` [ `bool`,
543 `sqlalchemy.engine.Connection` ] ]
544 A context manager that commits the transaction when it is exited
545 without error and rolls back the transactoin when it is exited via
546 an exception. When entered, it returns a tuple of:
548 - ``is_new`` (`bool`): whether this is a new (outermost)
549 transaction;
550 - ``connection`` (`sqlalchemy.engine.Connection`): the connection.
551 """
552 with self._session() as connection:
553 already_in_transaction = connection.in_transaction()
554 assert not (interrupting and already_in_transaction), (
555 "Logic error in transaction nesting: an operation that would "
556 "interrupt the active transaction context has been requested."
557 )
558 savepoint = savepoint or connection.in_nested_transaction()
559 trans: sqlalchemy.engine.Transaction | None
560 if already_in_transaction:
561 if savepoint:
562 trans = connection.begin_nested()
563 else:
564 # Nested non-savepoint transactions don't do anything.
565 trans = None
566 else:
567 # Use a regular (non-savepoint) transaction always for the
568 # outermost context.
569 trans = connection.begin()
570 self._lockTables(connection, lock)
571 try:
572 yield not already_in_transaction, connection
573 if trans is not None:
574 trans.commit()
575 except BaseException:
576 if trans is not None:
577 trans.rollback()
578 raise
580 @abstractmethod
581 def _lockTables(
582 self, connection: sqlalchemy.engine.Connection, tables: Iterable[sqlalchemy.schema.Table] = ()
583 ) -> None:
584 """Acquire locks on the given tables.
586 This is an implementation hook for subclasses, called by `transaction`.
587 It should not be called directly by other code.
589 Parameters
590 ----------
591 connection : `sqlalchemy.engine.Connection`
592 Database connection object. It is guaranteed that transaction is
593 already in a progress for this connection.
594 tables : `Iterable` [ `sqlalchemy.schema.Table` ], optional
595 A list of tables to lock for the duration of this transaction.
596 These locks are guaranteed to prevent concurrent writes and allow
597 this transaction (only) to acquire the same locks (others should
598 block), but only prevent concurrent reads if the database engine
599 requires that in order to block concurrent writes.
600 """
601 raise NotImplementedError()
603 def isTableWriteable(self, table: sqlalchemy.schema.Table) -> bool:
604 """Check whether a table is writeable, either because the database
605 connection is read-write or the table is a temporary table.
607 Parameters
608 ----------
609 table : `sqlalchemy.schema.Table`
610 SQLAlchemy table object to check.
612 Returns
613 -------
614 writeable : `bool`
615 Whether this table is writeable.
616 """
617 return self.isWriteable() or table.key in self._temp_tables
619 def assertTableWriteable(self, table: sqlalchemy.schema.Table, msg: str) -> None:
620 """Raise if the given table is not writeable, either because the
621 database connection is read-write or the table is a temporary table.
623 Parameters
624 ----------
625 table : `sqlalchemy.schema.Table`
626 SQLAlchemy table object to check.
627 msg : `str`, optional
628 If provided, raise `ReadOnlyDatabaseError` instead of returning
629 `False`, with this message.
630 """
631 if not self.isTableWriteable(table):
632 raise ReadOnlyDatabaseError(msg)
634 @contextmanager
635 def declareStaticTables(self, *, create: bool) -> Iterator[StaticTablesContext]:
636 """Return a context manager in which the database's static DDL schema
637 can be declared.
639 Parameters
640 ----------
641 create : `bool`
642 If `True`, attempt to create all tables at the end of the context.
643 If `False`, they will be assumed to already exist.
645 Returns
646 -------
647 schema : `StaticTablesContext`
648 A helper object that is used to add new tables.
650 Raises
651 ------
652 ReadOnlyDatabaseError
653 Raised if ``create`` is `True`, `Database.isWriteable` is `False`,
654 and one or more declared tables do not already exist.
656 Examples
657 --------
658 Given a `Database` instance ``db``::
660 with db.declareStaticTables(create=True) as schema:
661 schema.addTable("table1", TableSpec(...))
662 schema.addTable("table2", TableSpec(...))
664 Notes
665 -----
666 A database's static DDL schema must be declared before any dynamic
667 tables are managed via calls to `ensureTableExists` or
668 `getExistingTable`. The order in which static schema tables are added
669 inside the context block is unimportant; they will automatically be
670 sorted and added in an order consistent with their foreign key
671 relationships.
672 """
673 if create and not self.isWriteable():
674 raise ReadOnlyDatabaseError(f"Cannot create tables in read-only database {self}.")
675 self._metadata = sqlalchemy.MetaData(schema=self.namespace)
676 try:
677 with self._session() as connection:
678 context = StaticTablesContext(self, connection)
679 if create and context._tableNames:
680 # Looks like database is already initalized, to avoid
681 # danger of modifying/destroying valid schema we refuse to
682 # do anything in this case
683 raise SchemaAlreadyDefinedError(f"Cannot create tables in non-empty database {self}.")
684 yield context
685 for table, foreignKey in context._foreignKeys:
686 table.append_constraint(foreignKey)
687 if create:
688 if self.namespace is not None:
689 if self.namespace not in context._inspector.get_schema_names():
690 with self.transaction():
691 connection.execute(sqlalchemy.schema.CreateSchema(self.namespace))
692 # In our tables we have columns that make use of sqlalchemy
693 # Sequence objects. There is currently a bug in sqlalchemy
694 # that causes a deprecation warning to be thrown on a
695 # property of the Sequence object when the repr for the
696 # sequence is created. Here a filter is used to catch these
697 # deprecation warnings when tables are created.
698 with warnings.catch_warnings():
699 warnings.simplefilter("ignore", category=sqlalchemy.exc.SADeprecationWarning)
700 self._metadata.create_all(self._engine)
701 # call all initializer methods sequentially
702 for init in context._initializers:
703 init(self)
704 except BaseException:
705 self._metadata = None
706 raise
708 @abstractmethod
709 def isWriteable(self) -> bool:
710 """Return `True` if this database can be modified by this client."""
711 raise NotImplementedError()
713 @abstractmethod
714 def __str__(self) -> str:
715 """Return a human-readable identifier for this `Database`, including
716 any namespace or schema that identifies its names within a `Registry`.
717 """
718 raise NotImplementedError()
720 @property
721 def dialect(self) -> sqlalchemy.engine.Dialect:
722 """The SQLAlchemy dialect for this database engine
723 (`sqlalchemy.engine.Dialect`).
724 """
725 return self._engine.dialect
727 def shrinkDatabaseEntityName(self, original: str) -> str:
728 """Return a version of the given name that fits within this database
729 engine's length limits for table, constraint, indexes, and sequence
730 names.
732 Implementations should not assume that simple truncation is safe,
733 because multiple long names often begin with the same prefix.
735 The default implementation simply returns the given name.
737 Parameters
738 ----------
739 original : `str`
740 The original name.
742 Returns
743 -------
744 shrunk : `str`
745 The new, possibly shortened name.
746 """
747 return original
749 def expandDatabaseEntityName(self, shrunk: str) -> str:
750 """Retrieve the original name for a database entity that was too long
751 to fit within the database engine's limits.
753 Parameters
754 ----------
755 original : `str`
756 The original name.
758 Returns
759 -------
760 shrunk : `str`
761 The new, possibly shortened name.
762 """
763 return shrunk
765 def _mangleTableName(self, name: str) -> str:
766 """Map a logical, user-visible table name to the true table name used
767 in the database.
769 The default implementation returns the given name unchanged.
771 Parameters
772 ----------
773 name : `str`
774 Input table name. Should not include a namespace (i.e. schema)
775 prefix.
777 Returns
778 -------
779 mangled : `str`
780 Mangled version of the table name (still with no namespace prefix).
782 Notes
783 -----
784 Reimplementations of this method must be idempotent - mangling an
785 already-mangled name must have no effect.
786 """
787 return name
789 def _makeColumnConstraints(self, table: str, spec: ddl.FieldSpec) -> List[sqlalchemy.CheckConstraint]:
790 """Create constraints based on this spec.
792 Parameters
793 ----------
794 table : `str`
795 Name of the table this column is being added to.
796 spec : `FieldSpec`
797 Specification for the field to be added.
799 Returns
800 -------
801 constraint : `list` of `sqlalchemy.CheckConstraint`
802 Constraint added for this column.
803 """
804 # By default we return no additional constraints
805 return []
807 def _convertFieldSpec(
808 self, table: str, spec: ddl.FieldSpec, metadata: sqlalchemy.MetaData, **kwargs: Any
809 ) -> sqlalchemy.schema.Column:
810 """Convert a `FieldSpec` to a `sqlalchemy.schema.Column`.
812 Parameters
813 ----------
814 table : `str`
815 Name of the table this column is being added to.
816 spec : `FieldSpec`
817 Specification for the field to be added.
818 metadata : `sqlalchemy.MetaData`
819 SQLAlchemy representation of the DDL schema this field's table is
820 being added to.
821 **kwargs
822 Additional keyword arguments to forward to the
823 `sqlalchemy.schema.Column` constructor. This is provided to make
824 it easier for derived classes to delegate to ``super()`` while
825 making only minor changes.
827 Returns
828 -------
829 column : `sqlalchemy.schema.Column`
830 SQLAlchemy representation of the field.
831 """
832 args = [spec.name, spec.getSizedColumnType()]
833 if spec.autoincrement:
834 # Generate a sequence to use for auto incrementing for databases
835 # that do not support it natively. This will be ignored by
836 # sqlalchemy for databases that do support it.
837 args.append(
838 sqlalchemy.Sequence(
839 self.shrinkDatabaseEntityName(f"{table}_seq_{spec.name}"), metadata=metadata
840 )
841 )
842 assert spec.doc is None or isinstance(spec.doc, str), f"Bad doc for {table}.{spec.name}."
843 return sqlalchemy.schema.Column(
844 *args,
845 nullable=spec.nullable,
846 primary_key=spec.primaryKey,
847 comment=spec.doc,
848 server_default=spec.default,
849 **kwargs,
850 )
852 def _convertForeignKeySpec(
853 self, table: str, spec: ddl.ForeignKeySpec, metadata: sqlalchemy.MetaData, **kwargs: Any
854 ) -> sqlalchemy.schema.ForeignKeyConstraint:
855 """Convert a `ForeignKeySpec` to a
856 `sqlalchemy.schema.ForeignKeyConstraint`.
858 Parameters
859 ----------
860 table : `str`
861 Name of the table this foreign key is being added to.
862 spec : `ForeignKeySpec`
863 Specification for the foreign key to be added.
864 metadata : `sqlalchemy.MetaData`
865 SQLAlchemy representation of the DDL schema this constraint is
866 being added to.
867 **kwargs
868 Additional keyword arguments to forward to the
869 `sqlalchemy.schema.ForeignKeyConstraint` constructor. This is
870 provided to make it easier for derived classes to delegate to
871 ``super()`` while making only minor changes.
873 Returns
874 -------
875 constraint : `sqlalchemy.schema.ForeignKeyConstraint`
876 SQLAlchemy representation of the constraint.
877 """
878 name = self.shrinkDatabaseEntityName(
879 "_".join(
880 ["fkey", table, self._mangleTableName(spec.table)] + list(spec.target) + list(spec.source)
881 )
882 )
883 return sqlalchemy.schema.ForeignKeyConstraint(
884 spec.source,
885 [f"{self._mangleTableName(spec.table)}.{col}" for col in spec.target],
886 name=name,
887 ondelete=spec.onDelete,
888 )
890 def _convertExclusionConstraintSpec(
891 self,
892 table: str,
893 spec: Tuple[Union[str, Type[TimespanDatabaseRepresentation]], ...],
894 metadata: sqlalchemy.MetaData,
895 ) -> sqlalchemy.schema.Constraint:
896 """Convert a `tuple` from `ddl.TableSpec.exclusion` into a SQLAlchemy
897 constraint representation.
899 Parameters
900 ----------
901 table : `str`
902 Name of the table this constraint is being added to.
903 spec : `tuple` [ `str` or `type` ]
904 A tuple of `str` column names and the `type` object returned by
905 `getTimespanRepresentation` (which must appear exactly once),
906 indicating the order of the columns in the index used to back the
907 constraint.
908 metadata : `sqlalchemy.MetaData`
909 SQLAlchemy representation of the DDL schema this constraint is
910 being added to.
912 Returns
913 -------
914 constraint : `sqlalchemy.schema.Constraint`
915 SQLAlchemy representation of the constraint.
917 Raises
918 ------
919 NotImplementedError
920 Raised if this database does not support exclusion constraints.
921 """
922 raise NotImplementedError(f"Database {self} does not support exclusion constraints.")
924 def _convertTableSpec(
925 self, name: str, spec: ddl.TableSpec, metadata: sqlalchemy.MetaData, **kwargs: Any
926 ) -> sqlalchemy.schema.Table:
927 """Convert a `TableSpec` to a `sqlalchemy.schema.Table`.
929 Parameters
930 ----------
931 spec : `TableSpec`
932 Specification for the foreign key to be added.
933 metadata : `sqlalchemy.MetaData`
934 SQLAlchemy representation of the DDL schema this table is being
935 added to.
936 **kwargs
937 Additional keyword arguments to forward to the
938 `sqlalchemy.schema.Table` constructor. This is provided to make it
939 easier for derived classes to delegate to ``super()`` while making
940 only minor changes.
942 Returns
943 -------
944 table : `sqlalchemy.schema.Table`
945 SQLAlchemy representation of the table.
947 Notes
948 -----
949 This method does not handle ``spec.foreignKeys`` at all, in order to
950 avoid circular dependencies. These are added by higher-level logic in
951 `ensureTableExists`, `getExistingTable`, and `declareStaticTables`.
952 """
953 name = self._mangleTableName(name)
954 args = [self._convertFieldSpec(name, fieldSpec, metadata) for fieldSpec in spec.fields]
956 # Add any column constraints
957 for fieldSpec in spec.fields:
958 args.extend(self._makeColumnConstraints(name, fieldSpec))
960 # Track indexes added for primary key and unique constraints, to make
961 # sure we don't add duplicate explicit or foreign key indexes for
962 # those.
963 allIndexes = {tuple(fieldSpec.name for fieldSpec in spec.fields if fieldSpec.primaryKey)}
964 args.extend(
965 sqlalchemy.schema.UniqueConstraint(
966 *columns, name=self.shrinkDatabaseEntityName("_".join([name, "unq"] + list(columns)))
967 )
968 for columns in spec.unique
969 )
970 allIndexes.update(spec.unique)
971 args.extend(
972 sqlalchemy.schema.Index(
973 self.shrinkDatabaseEntityName("_".join([name, "idx"] + list(index.columns))),
974 *index.columns,
975 unique=(index.columns in spec.unique),
976 **index.kwargs,
977 )
978 for index in spec.indexes
979 if index.columns not in allIndexes
980 )
981 allIndexes.update(index.columns for index in spec.indexes)
982 args.extend(
983 sqlalchemy.schema.Index(
984 self.shrinkDatabaseEntityName("_".join((name, "fkidx") + fk.source)),
985 *fk.source,
986 )
987 for fk in spec.foreignKeys
988 if fk.addIndex and fk.source not in allIndexes
989 )
991 args.extend(self._convertExclusionConstraintSpec(name, excl, metadata) for excl in spec.exclusion)
993 assert spec.doc is None or isinstance(spec.doc, str), f"Bad doc for {name}."
994 return sqlalchemy.schema.Table(name, metadata, *args, comment=spec.doc, info=spec, **kwargs)
996 def ensureTableExists(self, name: str, spec: ddl.TableSpec) -> sqlalchemy.schema.Table:
997 """Ensure that a table with the given name and specification exists,
998 creating it if necessary.
1000 Parameters
1001 ----------
1002 name : `str`
1003 Name of the table (not including namespace qualifiers).
1004 spec : `TableSpec`
1005 Specification for the table. This will be used when creating the
1006 table, and *may* be used when obtaining an existing table to check
1007 for consistency, but no such check is guaranteed.
1009 Returns
1010 -------
1011 table : `sqlalchemy.schema.Table`
1012 SQLAlchemy representation of the table.
1014 Raises
1015 ------
1016 ReadOnlyDatabaseError
1017 Raised if `isWriteable` returns `False`, and the table does not
1018 already exist.
1019 DatabaseConflictError
1020 Raised if the table exists but ``spec`` is inconsistent with its
1021 definition.
1023 Notes
1024 -----
1025 This method may not be called within transactions. It may be called on
1026 read-only databases if and only if the table does in fact already
1027 exist.
1029 Subclasses may override this method, but usually should not need to.
1030 """
1031 # TODO: if _engine is used to make a table then it uses separate
1032 # connection and should not interfere with current transaction
1033 assert (
1034 self._session_connection is None or not self._session_connection.in_transaction()
1035 ), "Table creation interrupts transactions."
1036 assert self._metadata is not None, "Static tables must be declared before dynamic tables."
1037 table = self.getExistingTable(name, spec)
1038 if table is not None:
1039 return table
1040 if not self.isWriteable():
1041 raise ReadOnlyDatabaseError(
1042 f"Table {name} does not exist, and cannot be created "
1043 f"because database {self} is read-only."
1044 )
1045 table = self._convertTableSpec(name, spec, self._metadata)
1046 for foreignKeySpec in spec.foreignKeys:
1047 table.append_constraint(self._convertForeignKeySpec(name, foreignKeySpec, self._metadata))
1048 try:
1049 with self._transaction() as (_, connection):
1050 table.create(connection)
1051 except sqlalchemy.exc.DatabaseError:
1052 # Some other process could have created the table meanwhile, which
1053 # usually causes OperationalError or ProgrammingError. We cannot
1054 # use IF NOT EXISTS clause in this case due to PostgreSQL race
1055 # condition on server side which causes IntegrityError. Instead we
1056 # catch these exceptions (they all inherit DatabaseError) and
1057 # re-check whether table is now there.
1058 table = self.getExistingTable(name, spec)
1059 if table is None:
1060 raise
1061 return table
1063 def getExistingTable(self, name: str, spec: ddl.TableSpec) -> Optional[sqlalchemy.schema.Table]:
1064 """Obtain an existing table with the given name and specification.
1066 Parameters
1067 ----------
1068 name : `str`
1069 Name of the table (not including namespace qualifiers).
1070 spec : `TableSpec`
1071 Specification for the table. This will be used when creating the
1072 SQLAlchemy representation of the table, and it is used to
1073 check that the actual table in the database is consistent.
1075 Returns
1076 -------
1077 table : `sqlalchemy.schema.Table` or `None`
1078 SQLAlchemy representation of the table, or `None` if it does not
1079 exist.
1081 Raises
1082 ------
1083 DatabaseConflictError
1084 Raised if the table exists but ``spec`` is inconsistent with its
1085 definition.
1087 Notes
1088 -----
1089 This method can be called within transactions and never modifies the
1090 database.
1092 Subclasses may override this method, but usually should not need to.
1093 """
1094 assert self._metadata is not None, "Static tables must be declared before dynamic tables."
1095 name = self._mangleTableName(name)
1096 table = self._metadata.tables.get(name if self.namespace is None else f"{self.namespace}.{name}")
1097 if table is not None:
1098 if spec.fields.names != set(table.columns.keys()):
1099 raise DatabaseConflictError(
1100 f"Table '{name}' has already been defined differently; the new "
1101 f"specification has columns {list(spec.fields.names)}, while "
1102 f"the previous definition has {list(table.columns.keys())}."
1103 )
1104 else:
1105 inspector = sqlalchemy.inspect(
1106 self._engine if self._session_connection is None else self._session_connection
1107 )
1108 if name in inspector.get_table_names(schema=self.namespace):
1109 _checkExistingTableDefinition(name, spec, inspector.get_columns(name, schema=self.namespace))
1110 table = self._convertTableSpec(name, spec, self._metadata)
1111 for foreignKeySpec in spec.foreignKeys:
1112 table.append_constraint(self._convertForeignKeySpec(name, foreignKeySpec, self._metadata))
1113 return table
1114 return table
1116 def _make_temporary_table(
1117 self,
1118 connection: sqlalchemy.engine.Connection,
1119 spec: ddl.TableSpec,
1120 name: Optional[str] = None,
1121 **kwargs: Any,
1122 ) -> sqlalchemy.schema.Table:
1123 """Create a temporary table.
1125 Parameters
1126 ----------
1127 connection : `sqlalchemy.engine.Connection`
1128 Connection to use when creating the table.
1129 spec : `TableSpec`
1130 Specification for the table.
1131 name : `str`, optional
1132 A unique (within this session/connetion) name for the table.
1133 Subclasses may override to modify the actual name used. If not
1134 provided, a unique name will be generated.
1135 **kwargs
1136 Additional keyword arguments to forward to the
1137 `sqlalchemy.schema.Table` constructor. This is provided to make it
1138 easier for derived classes to delegate to ``super()`` while making
1139 only minor changes.
1141 Returns
1142 -------
1143 table : `sqlalchemy.schema.Table`
1144 SQLAlchemy representation of the table.
1145 """
1146 if name is None:
1147 name = f"tmp_{uuid.uuid4().hex}"
1148 metadata = self._metadata
1149 if metadata is None:
1150 raise RuntimeError("Cannot create temporary table before static schema is defined.")
1151 table = self._convertTableSpec(
1152 name, spec, metadata, prefixes=["TEMPORARY"], schema=sqlalchemy.schema.BLANK_SCHEMA, **kwargs
1153 )
1154 if table.key in self._temp_tables:
1155 if table.key != name:
1156 raise ValueError(
1157 f"A temporary table with name {name} (transformed to {table.key} by "
1158 f"Database) already exists."
1159 )
1160 for foreignKeySpec in spec.foreignKeys:
1161 table.append_constraint(self._convertForeignKeySpec(name, foreignKeySpec, metadata))
1162 table.create(connection)
1163 return table
1165 @classmethod
1166 def getTimespanRepresentation(cls) -> Type[TimespanDatabaseRepresentation]:
1167 """Return a `type` that encapsulates the way `Timespan` objects are
1168 stored in this database.
1170 `Database` does not automatically use the return type of this method
1171 anywhere else; calling code is responsible for making sure that DDL
1172 and queries are consistent with it.
1174 Returns
1175 -------
1176 TimespanReprClass : `type` (`TimespanDatabaseRepresention` subclass)
1177 A type that encapsulates the way `Timespan` objects should be
1178 stored in this database.
1180 Notes
1181 -----
1182 There are two big reasons we've decided to keep timespan-mangling logic
1183 outside the `Database` implementations, even though the choice of
1184 representation is ultimately up to a `Database` implementation:
1186 - Timespans appear in relatively few tables and queries in our
1187 typical usage, and the code that operates on them is already aware
1188 that it is working with timespans. In contrast, a
1189 timespan-representation-aware implementation of, say, `insert`,
1190 would need to have extra logic to identify when timespan-mangling
1191 needed to occur, which would usually be useless overhead.
1193 - SQLAlchemy's rich SELECT query expression system has no way to wrap
1194 multiple columns in a single expression object (the ORM does, but
1195 we are not using the ORM). So we would have to wrap _much_ more of
1196 that code in our own interfaces to encapsulate timespan
1197 representations there.
1198 """
1199 return TimespanDatabaseRepresentation.Compound
1201 def sync(
1202 self,
1203 table: sqlalchemy.schema.Table,
1204 *,
1205 keys: Dict[str, Any],
1206 compared: Optional[Dict[str, Any]] = None,
1207 extra: Optional[Dict[str, Any]] = None,
1208 returning: Optional[Sequence[str]] = None,
1209 update: bool = False,
1210 ) -> Tuple[Optional[Dict[str, Any]], Union[bool, Dict[str, Any]]]:
1211 """Insert into a table as necessary to ensure database contains
1212 values equivalent to the given ones.
1214 Parameters
1215 ----------
1216 table : `sqlalchemy.schema.Table`
1217 Table to be queried and possibly inserted into.
1218 keys : `dict`
1219 Column name-value pairs used to search for an existing row; must
1220 be a combination that can be used to select a single row if one
1221 exists. If such a row does not exist, these values are used in
1222 the insert.
1223 compared : `dict`, optional
1224 Column name-value pairs that are compared to those in any existing
1225 row. If such a row does not exist, these rows are used in the
1226 insert.
1227 extra : `dict`, optional
1228 Column name-value pairs that are ignored if a matching row exists,
1229 but used in an insert if one is necessary.
1230 returning : `~collections.abc.Sequence` of `str`, optional
1231 The names of columns whose values should be returned.
1232 update : `bool`, optional
1233 If `True` (`False` is default), update the existing row with the
1234 values in ``compared`` instead of raising `DatabaseConflictError`.
1236 Returns
1237 -------
1238 row : `dict`, optional
1239 The value of the fields indicated by ``returning``, or `None` if
1240 ``returning`` is `None`.
1241 inserted_or_updated : `bool` or `dict`
1242 If `True`, a new row was inserted; if `False`, a matching row
1243 already existed. If a `dict` (only possible if ``update=True``),
1244 then an existing row was updated, and the dict maps the names of
1245 the updated columns to their *old* values (new values can be
1246 obtained from ``compared``).
1248 Raises
1249 ------
1250 DatabaseConflictError
1251 Raised if the values in ``compared`` do not match the values in the
1252 database.
1253 ReadOnlyDatabaseError
1254 Raised if `isWriteable` returns `False`, and no matching record
1255 already exists.
1257 Notes
1258 -----
1259 May be used inside transaction contexts, so implementations may not
1260 perform operations that interrupt transactions.
1262 It may be called on read-only databases if and only if the matching row
1263 does in fact already exist.
1264 """
1266 def check() -> Tuple[int, Optional[Dict[str, Any]], Optional[List]]:
1267 """Query for a row that matches the ``key`` argument, and compare
1268 to what was given by the caller.
1270 Returns
1271 -------
1272 n : `int`
1273 Number of matching rows. ``n != 1`` is always an error, but
1274 it's a different kind of error depending on where `check` is
1275 being called.
1276 bad : `dict` or `None`
1277 The subset of the keys of ``compared`` for which the existing
1278 values did not match the given one, mapped to the existing
1279 values in the database. Once again, ``not bad`` is always an
1280 error, but a different kind on context. `None` if ``n != 1``
1281 result : `list` or `None`
1282 Results in the database that correspond to the columns given
1283 in ``returning``, or `None` if ``returning is None``.
1284 """
1285 toSelect: Set[str] = set()
1286 if compared is not None:
1287 toSelect.update(compared.keys())
1288 if returning is not None:
1289 toSelect.update(returning)
1290 if not toSelect:
1291 # Need to select some column, even if we just want to see
1292 # how many rows we get back.
1293 toSelect.add(next(iter(keys.keys())))
1294 selectSql = (
1295 sqlalchemy.sql.select(*[table.columns[k].label(k) for k in toSelect])
1296 .select_from(table)
1297 .where(sqlalchemy.sql.and_(*[table.columns[k] == v for k, v in keys.items()]))
1298 )
1299 with self._transaction() as (_, connection):
1300 fetched = list(connection.execute(selectSql).mappings())
1301 if len(fetched) != 1:
1302 return len(fetched), None, None
1303 existing = fetched[0]
1304 if compared is not None:
1306 def safeNotEqual(a: Any, b: Any) -> bool:
1307 if isinstance(a, astropy.time.Time):
1308 return not time_utils.TimeConverter().times_equal(a, b)
1309 return a != b
1311 inconsistencies = {
1312 k: existing[k] for k, v in compared.items() if safeNotEqual(existing[k], v)
1313 }
1314 else:
1315 inconsistencies = {}
1316 if returning is not None:
1317 toReturn: Optional[list] = [existing[k] for k in returning]
1318 else:
1319 toReturn = None
1320 return 1, inconsistencies, toReturn
1322 def format_bad(inconsistencies: Dict[str, Any]) -> str:
1323 """Format the 'bad' dictionary of existing values returned by
1324 ``check`` into a string suitable for an error message.
1325 """
1326 assert compared is not None, "Should not be able to get inconsistencies without comparing."
1327 return ", ".join(f"{k}: {v!r} != {compared[k]!r}" for k, v in inconsistencies.items())
1329 if self.isTableWriteable(table):
1330 # Try an insert first, but allow it to fail (in only specific
1331 # ways).
1332 row = keys.copy()
1333 if compared is not None:
1334 row.update(compared)
1335 if extra is not None:
1336 row.update(extra)
1337 with self.transaction():
1338 inserted = bool(self.ensure(table, row))
1339 inserted_or_updated: Union[bool, Dict[str, Any]]
1340 # Need to perform check() for this branch inside the
1341 # transaction, so we roll back an insert that didn't do
1342 # what we expected. That limits the extent to which we
1343 # can reduce duplication between this block and the other
1344 # ones that perform similar logic.
1345 n, bad, result = check()
1346 if n < 1:
1347 raise ConflictingDefinitionError(
1348 f"Attempted to ensure {row} exists by inserting it with ON CONFLICT IGNORE, "
1349 f"but a post-insert query on {keys} returned no results. "
1350 f"Insert was {'' if inserted else 'not '}reported as successful. "
1351 "This can occur if the insert violated a database constraint other than the "
1352 "unique constraint or primary key used to identify the row in this call."
1353 )
1354 elif n > 1:
1355 raise RuntimeError(
1356 f"Keys passed to sync {keys.keys()} do not comprise a "
1357 f"unique constraint for table {table.name}."
1358 )
1359 elif bad:
1360 assert (
1361 compared is not None
1362 ), "Should not be able to get inconsistencies without comparing."
1363 if inserted:
1364 raise RuntimeError(
1365 f"Conflict ({bad}) in sync after successful insert; this is "
1366 "possible if the same table is being updated by a concurrent "
1367 "process that isn't using sync, but it may also be a bug in "
1368 "daf_butler."
1369 )
1370 elif update:
1371 with self._transaction() as (_, connection):
1372 connection.execute(
1373 table.update()
1374 .where(sqlalchemy.sql.and_(*[table.columns[k] == v for k, v in keys.items()]))
1375 .values(**{k: compared[k] for k in bad.keys()})
1376 )
1377 inserted_or_updated = bad
1378 else:
1379 raise DatabaseConflictError(
1380 f"Conflict in sync for table {table.name} on column(s) {format_bad(bad)}."
1381 )
1382 else:
1383 inserted_or_updated = inserted
1384 else:
1385 # Database is not writeable; just see if the row exists.
1386 n, bad, result = check()
1387 if n < 1:
1388 raise ReadOnlyDatabaseError("sync needs to insert, but database is read-only.")
1389 elif n > 1:
1390 raise RuntimeError("Keys passed to sync do not comprise a unique constraint.")
1391 elif bad:
1392 if update:
1393 raise ReadOnlyDatabaseError("sync needs to update, but database is read-only.")
1394 else:
1395 raise DatabaseConflictError(
1396 f"Conflict in sync for table {table.name} on column(s) {format_bad(bad)}."
1397 )
1398 inserted_or_updated = False
1399 if returning is None:
1400 return None, inserted_or_updated
1401 else:
1402 assert result is not None
1403 return {k: v for k, v in zip(returning, result)}, inserted_or_updated
1405 def insert(
1406 self,
1407 table: sqlalchemy.schema.Table,
1408 *rows: dict,
1409 returnIds: bool = False,
1410 select: Optional[sqlalchemy.sql.expression.SelectBase] = None,
1411 names: Optional[Iterable[str]] = None,
1412 ) -> Optional[List[int]]:
1413 """Insert one or more rows into a table, optionally returning
1414 autoincrement primary key values.
1416 Parameters
1417 ----------
1418 table : `sqlalchemy.schema.Table`
1419 Table rows should be inserted into.
1420 returnIds: `bool`
1421 If `True` (`False` is default), return the values of the table's
1422 autoincrement primary key field (which much exist).
1423 select : `sqlalchemy.sql.SelectBase`, optional
1424 A SELECT query expression to insert rows from. Cannot be provided
1425 with either ``rows`` or ``returnIds=True``.
1426 names : `Iterable` [ `str` ], optional
1427 Names of columns in ``table`` to be populated, ordered to match the
1428 columns returned by ``select``. Ignored if ``select`` is `None`.
1429 If not provided, the columns returned by ``select`` must be named
1430 to match the desired columns of ``table``.
1431 *rows
1432 Positional arguments are the rows to be inserted, as dictionaries
1433 mapping column name to value. The keys in all dictionaries must
1434 be the same.
1436 Returns
1437 -------
1438 ids : `None`, or `list` of `int`
1439 If ``returnIds`` is `True`, a `list` containing the inserted
1440 values for the table's autoincrement primary key.
1442 Raises
1443 ------
1444 ReadOnlyDatabaseError
1445 Raised if `isWriteable` returns `False` when this method is called.
1447 Notes
1448 -----
1449 The default implementation uses bulk insert syntax when ``returnIds``
1450 is `False`, and a loop over single-row insert operations when it is
1451 `True`.
1453 Derived classes should reimplement when they can provide a more
1454 efficient implementation (especially for the latter case).
1456 May be used inside transaction contexts, so implementations may not
1457 perform operations that interrupt transactions.
1458 """
1459 self.assertTableWriteable(table, f"Cannot insert into read-only table {table}.")
1460 if select is not None and (rows or returnIds):
1461 raise TypeError("'select' is incompatible with passing value rows or returnIds=True.")
1462 if not rows and select is None:
1463 if returnIds:
1464 return []
1465 else:
1466 return None
1467 with self._transaction() as (_, connection):
1468 if not returnIds:
1469 if select is not None:
1470 if names is None:
1471 # columns() is deprecated since 1.4, but
1472 # selected_columns() method did not exist in 1.3.
1473 if hasattr(select, "selected_columns"):
1474 names = select.selected_columns.keys()
1475 else:
1476 names = select.columns.keys()
1477 connection.execute(table.insert().from_select(names, select))
1478 else:
1479 connection.execute(table.insert(), rows)
1480 return None
1481 else:
1482 sql = table.insert()
1483 return [connection.execute(sql, row).inserted_primary_key[0] for row in rows]
1485 @abstractmethod
1486 def replace(self, table: sqlalchemy.schema.Table, *rows: dict) -> None:
1487 """Insert one or more rows into a table, replacing any existing rows
1488 for which insertion of a new row would violate the primary key
1489 constraint.
1491 Parameters
1492 ----------
1493 table : `sqlalchemy.schema.Table`
1494 Table rows should be inserted into.
1495 *rows
1496 Positional arguments are the rows to be inserted, as dictionaries
1497 mapping column name to value. The keys in all dictionaries must
1498 be the same.
1500 Raises
1501 ------
1502 ReadOnlyDatabaseError
1503 Raised if `isWriteable` returns `False` when this method is called.
1505 Notes
1506 -----
1507 May be used inside transaction contexts, so implementations may not
1508 perform operations that interrupt transactions.
1510 Implementations should raise a `sqlalchemy.exc.IntegrityError`
1511 exception when a constraint other than the primary key would be
1512 violated.
1514 Implementations are not required to support `replace` on tables
1515 with autoincrement keys.
1516 """
1517 raise NotImplementedError()
1519 @abstractmethod
1520 def ensure(self, table: sqlalchemy.schema.Table, *rows: dict, primary_key_only: bool = False) -> int:
1521 """Insert one or more rows into a table, skipping any rows for which
1522 insertion would violate a unique constraint.
1524 Parameters
1525 ----------
1526 table : `sqlalchemy.schema.Table`
1527 Table rows should be inserted into.
1528 *rows
1529 Positional arguments are the rows to be inserted, as dictionaries
1530 mapping column name to value. The keys in all dictionaries must
1531 be the same.
1532 primary_key_only : `bool`, optional
1533 If `True` (`False` is default), only skip rows that violate the
1534 primary key constraint, and raise an exception (and rollback
1535 transactions) for other constraint violations.
1537 Returns
1538 -------
1539 count : `int`
1540 The number of rows actually inserted.
1542 Raises
1543 ------
1544 ReadOnlyDatabaseError
1545 Raised if `isWriteable` returns `False` when this method is called.
1546 This is raised even if the operation would do nothing even on a
1547 writeable database.
1549 Notes
1550 -----
1551 May be used inside transaction contexts, so implementations may not
1552 perform operations that interrupt transactions.
1554 Implementations are not required to support `ensure` on tables
1555 with autoincrement keys.
1556 """
1557 raise NotImplementedError()
1559 def delete(self, table: sqlalchemy.schema.Table, columns: Iterable[str], *rows: dict) -> int:
1560 """Delete one or more rows from a table.
1562 Parameters
1563 ----------
1564 table : `sqlalchemy.schema.Table`
1565 Table that rows should be deleted from.
1566 columns: `~collections.abc.Iterable` of `str`
1567 The names of columns that will be used to constrain the rows to
1568 be deleted; these will be combined via ``AND`` to form the
1569 ``WHERE`` clause of the delete query.
1570 *rows
1571 Positional arguments are the keys of rows to be deleted, as
1572 dictionaries mapping column name to value. The keys in all
1573 dictionaries must be exactly the names in ``columns``.
1575 Returns
1576 -------
1577 count : `int`
1578 Number of rows deleted.
1580 Raises
1581 ------
1582 ReadOnlyDatabaseError
1583 Raised if `isWriteable` returns `False` when this method is called.
1585 Notes
1586 -----
1587 May be used inside transaction contexts, so implementations may not
1588 perform operations that interrupt transactions.
1590 The default implementation should be sufficient for most derived
1591 classes.
1592 """
1593 self.assertTableWriteable(table, f"Cannot delete from read-only table {table}.")
1594 if columns and not rows:
1595 # If there are no columns, this operation is supposed to delete
1596 # everything (so we proceed as usual). But if there are columns,
1597 # but no rows, it was a constrained bulk operation where the
1598 # constraint is that no rows match, and we should short-circuit
1599 # while reporting that no rows were affected.
1600 return 0
1601 sql = table.delete()
1602 columns = list(columns) # Force iterators to list
1604 # More efficient to use IN operator if there is only one
1605 # variable changing across all rows.
1606 content: Dict[str, Set] = defaultdict(set)
1607 if len(columns) == 1:
1608 # Nothing to calculate since we can always use IN
1609 column = columns[0]
1610 changing_columns = [column]
1611 content[column] = set(row[column] for row in rows)
1612 else:
1613 for row in rows:
1614 for k, v in row.items():
1615 content[k].add(v)
1616 changing_columns = [col for col, values in content.items() if len(values) > 1]
1618 if len(changing_columns) != 1:
1619 # More than one column changes each time so do explicit bind
1620 # parameters and have each row processed separately.
1621 whereTerms = [table.columns[name] == sqlalchemy.sql.bindparam(name) for name in columns]
1622 if whereTerms:
1623 sql = sql.where(sqlalchemy.sql.and_(*whereTerms))
1624 with self._transaction() as (_, connection):
1625 return connection.execute(sql, rows).rowcount
1626 else:
1627 # One of the columns has changing values but any others are
1628 # fixed. In this case we can use an IN operator and be more
1629 # efficient.
1630 name = changing_columns.pop()
1632 # Simple where clause for the unchanging columns
1633 clauses = []
1634 for k, v in content.items():
1635 if k == name:
1636 continue
1637 column = table.columns[k]
1638 # The set only has one element
1639 clauses.append(column == v.pop())
1641 # The IN operator will not work for "infinite" numbers of
1642 # rows so must batch it up into distinct calls.
1643 in_content = list(content[name])
1644 n_elements = len(in_content)
1646 rowcount = 0
1647 iposn = 0
1648 n_per_loop = 1_000 # Controls how many items to put in IN clause
1649 with self._transaction() as (_, connection):
1650 for iposn in range(0, n_elements, n_per_loop):
1651 endpos = iposn + n_per_loop
1652 in_clause = table.columns[name].in_(in_content[iposn:endpos])
1654 newsql = sql.where(sqlalchemy.sql.and_(*clauses, in_clause))
1655 rowcount += connection.execute(newsql).rowcount
1656 return rowcount
1658 def deleteWhere(self, table: sqlalchemy.schema.Table, where: sqlalchemy.sql.ClauseElement) -> int:
1659 """Delete rows from a table with pre-constructed WHERE clause.
1661 Parameters
1662 ----------
1663 table : `sqlalchemy.schema.Table`
1664 Table that rows should be deleted from.
1665 where: `sqlalchemy.sql.ClauseElement`
1666 The names of columns that will be used to constrain the rows to
1667 be deleted; these will be combined via ``AND`` to form the
1668 ``WHERE`` clause of the delete query.
1670 Returns
1671 -------
1672 count : `int`
1673 Number of rows deleted.
1675 Raises
1676 ------
1677 ReadOnlyDatabaseError
1678 Raised if `isWriteable` returns `False` when this method is called.
1680 Notes
1681 -----
1682 May be used inside transaction contexts, so implementations may not
1683 perform operations that interrupt transactions.
1685 The default implementation should be sufficient for most derived
1686 classes.
1687 """
1688 self.assertTableWriteable(table, f"Cannot delete from read-only table {table}.")
1690 sql = table.delete().where(where)
1691 with self._transaction() as (_, connection):
1692 return connection.execute(sql).rowcount
1694 def update(self, table: sqlalchemy.schema.Table, where: Dict[str, str], *rows: dict) -> int:
1695 """Update one or more rows in a table.
1697 Parameters
1698 ----------
1699 table : `sqlalchemy.schema.Table`
1700 Table containing the rows to be updated.
1701 where : `dict` [`str`, `str`]
1702 A mapping from the names of columns that will be used to search for
1703 existing rows to the keys that will hold these values in the
1704 ``rows`` dictionaries. Note that these may not be the same due to
1705 SQLAlchemy limitations.
1706 *rows
1707 Positional arguments are the rows to be updated. The keys in all
1708 dictionaries must be the same, and may correspond to either a
1709 value in the ``where`` dictionary or the name of a column to be
1710 updated.
1712 Returns
1713 -------
1714 count : `int`
1715 Number of rows matched (regardless of whether the update actually
1716 modified them).
1718 Raises
1719 ------
1720 ReadOnlyDatabaseError
1721 Raised if `isWriteable` returns `False` when this method is called.
1723 Notes
1724 -----
1725 May be used inside transaction contexts, so implementations may not
1726 perform operations that interrupt transactions.
1728 The default implementation should be sufficient for most derived
1729 classes.
1730 """
1731 self.assertTableWriteable(table, f"Cannot update read-only table {table}.")
1732 if not rows:
1733 return 0
1734 sql = table.update().where(
1735 sqlalchemy.sql.and_(*[table.columns[k] == sqlalchemy.sql.bindparam(v) for k, v in where.items()])
1736 )
1737 with self._transaction() as (_, connection):
1738 return connection.execute(sql, rows).rowcount
1740 @contextmanager
1741 def query(
1742 self, sql: sqlalchemy.sql.expression.SelectBase, *args: Any, **kwargs: Any
1743 ) -> Iterator[sqlalchemy.engine.CursorResult]:
1744 """Run a SELECT query against the database.
1746 Parameters
1747 ----------
1748 sql : `sqlalchemy.sql.expression.SelectBase`
1749 A SQLAlchemy representation of a ``SELECT`` query.
1750 *args
1751 Additional positional arguments are forwarded to
1752 `sqlalchemy.engine.Connection.execute`.
1753 **kwargs
1754 Additional keyword arguments are forwarded to
1755 `sqlalchemy.engine.Connection.execute`.
1757 Returns
1758 -------
1759 result_context : `sqlalchemy.engine.CursorResults`
1760 Context manager that returns the query result object when entered.
1761 These results are invalidated when the context is exited.
1762 """
1763 if self._session_connection is None:
1764 connection = self._engine.connect()
1765 else:
1766 connection = self._session_connection
1767 result = connection.execute(sql, *args, **kwargs)
1768 try:
1769 yield result
1770 finally:
1771 if connection is not self._session_connection:
1772 connection.close()
1774 @abstractmethod
1775 def constant_rows(
1776 self,
1777 fields: NamedValueAbstractSet[ddl.FieldSpec],
1778 *rows: dict,
1779 name: Optional[str] = None,
1780 ) -> sqlalchemy.sql.FromClause:
1781 """Return a SQLAlchemy object that represents a small number of
1782 constant-valued rows.
1784 Parameters
1785 ----------
1786 fields : `NamedValueAbstractSet` [ `ddl.FieldSpec` ]
1787 The columns of the rows. Unique and foreign key constraints are
1788 ignored.
1789 *rows : `dict`
1790 Values for the rows.
1791 name : `str`, optional
1792 If provided, the name of the SQL construct. If not provided, an
1793 opaque but unique identifier is generated.
1795 Returns
1796 -------
1797 from_clause : `sqlalchemy.sql.FromClause`
1798 SQLAlchemy object representing the given rows. This is guaranteed
1799 to be something that can be directly joined into a ``SELECT``
1800 query's ``FROM`` clause, and will not involve a temporary table
1801 that needs to be cleaned up later.
1803 Notes
1804 -----
1805 The default implementation uses the SQL-standard ``VALUES`` construct,
1806 but support for that construct is varied enough across popular RDBMSs
1807 that the method is still marked abstract to force explicit opt-in via
1808 delegation to `super`.
1809 """
1810 if name is None:
1811 name = f"tmp_{uuid.uuid4().hex}"
1812 return sqlalchemy.sql.values(
1813 *[sqlalchemy.Column(field.name, field.getSizedColumnType()) for field in fields],
1814 name=name,
1815 ).data([tuple(row[name] for name in fields.names) for row in rows])
1817 def get_constant_rows_max(self) -> int:
1818 """Return the maximum number of rows that should be passed to
1819 `constant_rows` for this backend.
1821 Returns
1822 -------
1823 max : `int`
1824 Maximum number of rows.
1826 Notes
1827 -----
1828 This should reflect typical performance profiles (or a guess at these),
1829 not just hard database engine limits.
1830 """
1831 return 100
1833 origin: int
1834 """An integer ID that should be used as the default for any datasets,
1835 quanta, or other entities that use a (autoincrement, origin) compound
1836 primary key (`int`).
1837 """
1839 namespace: Optional[str]
1840 """The schema or namespace this database instance is associated with
1841 (`str` or `None`).
1842 """