Coverage for python/lsst/daf/butler/registry/interfaces/_database.py: 14%
412 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-02-05 02:04 -0800
« prev ^ index » next coverage.py v6.5.0, created at 2023-02-05 02:04 -0800
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = [
24 "Database",
25 "ReadOnlyDatabaseError",
26 "DatabaseConflictError",
27 "SchemaAlreadyDefinedError",
28 "StaticTablesContext",
29]
31import uuid
32import warnings
33from abc import ABC, abstractmethod
34from collections import defaultdict
35from contextlib import contextmanager
36from typing import (
37 Any,
38 Callable,
39 Dict,
40 Iterable,
41 Iterator,
42 List,
43 Optional,
44 Sequence,
45 Set,
46 Tuple,
47 Type,
48 Union,
49 final,
50)
52import astropy.time
53import sqlalchemy
55from ...core import TimespanDatabaseRepresentation, ddl, time_utils
56from ...core.named import NamedValueAbstractSet
57from .._exceptions import ConflictingDefinitionError
60def _checkExistingTableDefinition(name: str, spec: ddl.TableSpec, inspection: List[Dict[str, Any]]) -> None:
61 """Test that the definition of a table in a `ddl.TableSpec` and from
62 database introspection are consistent.
64 Parameters
65 ----------
66 name : `str`
67 Name of the table (only used in error messages).
68 spec : `ddl.TableSpec`
69 Specification of the table.
70 inspection : `dict`
71 Dictionary returned by
72 `sqlalchemy.engine.reflection.Inspector.get_columns`.
74 Raises
75 ------
76 DatabaseConflictError
77 Raised if the definitions are inconsistent.
78 """
79 columnNames = [c["name"] for c in inspection]
80 if spec.fields.names != set(columnNames):
81 raise DatabaseConflictError(
82 f"Table '{name}' exists but is defined differently in the database; "
83 f"specification has columns {list(spec.fields.names)}, while the "
84 f"table in the database has {columnNames}."
85 )
88class ReadOnlyDatabaseError(RuntimeError):
89 """Exception raised when a write operation is called on a read-only
90 `Database`.
91 """
94class DatabaseConflictError(ConflictingDefinitionError):
95 """Exception raised when database content (row values or schema entities)
96 are inconsistent with what this client expects.
97 """
100class SchemaAlreadyDefinedError(RuntimeError):
101 """Exception raised when trying to initialize database schema when some
102 tables already exist.
103 """
106class StaticTablesContext:
107 """Helper class used to declare the static schema for a registry layer
108 in a database.
110 An instance of this class is returned by `Database.declareStaticTables`,
111 which should be the only way it should be constructed.
112 """
114 def __init__(self, db: Database, connection: sqlalchemy.engine.Connection):
115 self._db = db
116 self._foreignKeys: List[Tuple[sqlalchemy.schema.Table, sqlalchemy.schema.ForeignKeyConstraint]] = []
117 self._inspector = sqlalchemy.inspect(connection)
118 self._tableNames = frozenset(self._inspector.get_table_names(schema=self._db.namespace))
119 self._initializers: List[Callable[[Database], None]] = []
121 def addTable(self, name: str, spec: ddl.TableSpec) -> sqlalchemy.schema.Table:
122 """Add a new table to the schema, returning its sqlalchemy
123 representation.
125 The new table may not actually be created until the end of the
126 context created by `Database.declareStaticTables`, allowing tables
127 to be declared in any order even in the presence of foreign key
128 relationships.
129 """
130 name = self._db._mangleTableName(name)
131 if name in self._tableNames:
132 _checkExistingTableDefinition(
133 name, spec, self._inspector.get_columns(name, schema=self._db.namespace)
134 )
135 metadata = self._db._metadata
136 assert metadata is not None, "Guaranteed by context manager that returns this object."
137 table = self._db._convertTableSpec(name, spec, metadata)
138 for foreignKeySpec in spec.foreignKeys:
139 self._foreignKeys.append((table, self._db._convertForeignKeySpec(name, foreignKeySpec, metadata)))
140 return table
142 def addTableTuple(self, specs: Tuple[ddl.TableSpec, ...]) -> Tuple[sqlalchemy.schema.Table, ...]:
143 """Add a named tuple of tables to the schema, returning their
144 SQLAlchemy representations in a named tuple of the same type.
146 The new tables may not actually be created until the end of the
147 context created by `Database.declareStaticTables`, allowing tables
148 to be declared in any order even in the presence of foreign key
149 relationships.
151 Notes
152 -----
153 ``specs`` *must* be an instance of a type created by
154 `collections.namedtuple`, not just regular tuple, and the returned
155 object is guaranteed to be the same. Because `~collections.namedtuple`
156 is just a factory for `type` objects, not an actual type itself,
157 we cannot represent this with type annotations.
158 """
159 return specs._make( # type: ignore
160 self.addTable(name, spec) for name, spec in zip(specs._fields, specs) # type: ignore
161 )
163 def addInitializer(self, initializer: Callable[[Database], None]) -> None:
164 """Add a method that does one-time initialization of a database.
166 Initialization can mean anything that changes state of a database
167 and needs to be done exactly once after database schema was created.
168 An example for that could be population of schema attributes.
170 Parameters
171 ----------
172 initializer : callable
173 Method of a single argument which is a `Database` instance.
174 """
175 self._initializers.append(initializer)
178class Database(ABC):
179 """An abstract interface that represents a particular database engine's
180 representation of a single schema/namespace/database.
182 Parameters
183 ----------
184 origin : `int`
185 An integer ID that should be used as the default for any datasets,
186 quanta, or other entities that use a (autoincrement, origin) compound
187 primary key.
188 engine : `sqlalchemy.engine.Engine`
189 The SQLAlchemy engine for this `Database`.
190 namespace : `str`, optional
191 Name of the schema or namespace this instance is associated with.
192 This is passed as the ``schema`` argument when constructing a
193 `sqlalchemy.schema.MetaData` instance. We use ``namespace`` instead to
194 avoid confusion between "schema means namespace" and "schema means
195 table definitions".
197 Notes
198 -----
199 `Database` requires all write operations to go through its special named
200 methods. Our write patterns are sufficiently simple that we don't really
201 need the full flexibility of SQL insert/update/delete syntax, and we need
202 non-standard (but common) functionality in these operations sufficiently
203 often that it seems worthwhile to provide our own generic API.
205 In contrast, `Database.query` allows arbitrary ``SELECT`` queries (via
206 their SQLAlchemy representation) to be run, as we expect these to require
207 significantly more sophistication while still being limited to standard
208 SQL.
210 `Database` itself has several underscore-prefixed attributes:
212 - ``_engine``: SQLAlchemy object representing its engine.
213 - ``_connection``: method returning a context manager for
214 `sqlalchemy.engine.Connection` object.
215 - ``_metadata``: the `sqlalchemy.schema.MetaData` object representing
216 the tables and other schema entities.
218 These are considered protected (derived classes may access them, but other
219 code should not), and read-only, aside from executing SQL via
220 ``_connection``.
221 """
223 def __init__(self, *, origin: int, engine: sqlalchemy.engine.Engine, namespace: Optional[str] = None):
224 self.origin = origin
225 self.namespace = namespace
226 self._engine = engine
227 self._session_connection: Optional[sqlalchemy.engine.Connection] = None
228 self._metadata: Optional[sqlalchemy.schema.MetaData] = None
229 self._temp_tables: Set[str] = set()
231 def __repr__(self) -> str:
232 # Rather than try to reproduce all the parameters used to create
233 # the object, instead report the more useful information of the
234 # connection URL.
235 if self._engine.url.password is not None:
236 uri = str(self._engine.url.set(password="***"))
237 else:
238 uri = str(self._engine.url)
239 if self.namespace:
240 uri += f"#{self.namespace}"
241 return f'{type(self).__name__}("{uri}")'
243 @classmethod
244 def makeDefaultUri(cls, root: str) -> Optional[str]:
245 """Create a default connection URI appropriate for the given root
246 directory, or `None` if there can be no such default.
247 """
248 return None
250 @classmethod
251 def fromUri(
252 cls, uri: str, *, origin: int, namespace: Optional[str] = None, writeable: bool = True
253 ) -> Database:
254 """Construct a database from a SQLAlchemy URI.
256 Parameters
257 ----------
258 uri : `str`
259 A SQLAlchemy URI connection string.
260 origin : `int`
261 An integer ID that should be used as the default for any datasets,
262 quanta, or other entities that use a (autoincrement, origin)
263 compound primary key.
264 namespace : `str`, optional
265 A database namespace (i.e. schema) the new instance should be
266 associated with. If `None` (default), the namespace (if any) is
267 inferred from the URI.
268 writeable : `bool`, optional
269 If `True`, allow write operations on the database, including
270 ``CREATE TABLE``.
272 Returns
273 -------
274 db : `Database`
275 A new `Database` instance.
276 """
277 return cls.fromEngine(
278 cls.makeEngine(uri, writeable=writeable), origin=origin, namespace=namespace, writeable=writeable
279 )
281 @classmethod
282 @abstractmethod
283 def makeEngine(cls, uri: str, *, writeable: bool = True) -> sqlalchemy.engine.Engine:
284 """Create a `sqlalchemy.engine.Engine` from a SQLAlchemy URI.
286 Parameters
287 ----------
288 uri : `str`
289 A SQLAlchemy URI connection string.
290 writeable : `bool`, optional
291 If `True`, allow write operations on the database, including
292 ``CREATE TABLE``.
294 Returns
295 -------
296 engine : `sqlalchemy.engine.Engine`
297 A database engine.
299 Notes
300 -----
301 Subclasses that support other ways to connect to a database are
302 encouraged to add optional arguments to their implementation of this
303 method, as long as they maintain compatibility with the base class
304 call signature.
305 """
306 raise NotImplementedError()
308 @classmethod
309 @abstractmethod
310 def fromEngine(
311 cls,
312 engine: sqlalchemy.engine.Engine,
313 *,
314 origin: int,
315 namespace: Optional[str] = None,
316 writeable: bool = True,
317 ) -> Database:
318 """Create a new `Database` from an existing `sqlalchemy.engine.Engine`.
320 Parameters
321 ----------
322 engine : `sqlalchemy.engine.Engine`
323 The engine for the database. May be shared between `Database`
324 instances.
325 origin : `int`
326 An integer ID that should be used as the default for any datasets,
327 quanta, or other entities that use a (autoincrement, origin)
328 compound primary key.
329 namespace : `str`, optional
330 A different database namespace (i.e. schema) the new instance
331 should be associated with. If `None` (default), the namespace
332 (if any) is inferred from the connection.
333 writeable : `bool`, optional
334 If `True`, allow write operations on the database, including
335 ``CREATE TABLE``.
337 Returns
338 -------
339 db : `Database`
340 A new `Database` instance.
342 Notes
343 -----
344 This method allows different `Database` instances to share the same
345 engine, which is desirable when they represent different namespaces
346 can be queried together.
347 """
348 raise NotImplementedError()
350 @final
351 @contextmanager
352 def session(self) -> Iterator[None]:
353 """Return a context manager that represents a session (persistent
354 connection to a database).
356 Returns
357 -------
358 context : `AbstractContextManager` [ `None` ]
359 A context manager that does not return a value when entered.
361 Notes
362 -----
363 This method should be used when a sequence of read-only SQL operations
364 will be performed in rapid succession *without* a requirement that they
365 yield consistent results in the presence of concurrent writes (or, more
366 rarely, when conflicting concurrent writes are rare/impossible and the
367 session will be open long enough that a transaction is inadvisable).
368 """
369 with self._session():
370 yield
372 @final
373 @contextmanager
374 def transaction(
375 self,
376 *,
377 interrupting: bool = False,
378 savepoint: bool = False,
379 lock: Iterable[sqlalchemy.schema.Table] = (),
380 for_temp_tables: bool = False,
381 ) -> Iterator[None]:
382 """Return a context manager that represents a transaction.
384 Parameters
385 ----------
386 interrupting : `bool`, optional
387 If `True` (`False` is default), this transaction block may not be
388 nested without an outer one, and attempting to do so is a logic
389 (i.e. assertion) error.
390 savepoint : `bool`, optional
391 If `True` (`False` is default), create a `SAVEPOINT`, allowing
392 exceptions raised by the database (e.g. due to constraint
393 violations) during this transaction's context to be caught outside
394 it without also rolling back all operations in an outer transaction
395 block. If `False`, transactions may still be nested, but a
396 rollback may be generated at any level and affects all levels, and
397 commits are deferred until the outermost block completes. If any
398 outer transaction block was created with ``savepoint=True``, all
399 inner blocks will be as well (regardless of the actual value
400 passed). This has no effect if this is the outermost transaction.
401 lock : `Iterable` [ `sqlalchemy.schema.Table` ], optional
402 A list of tables to lock for the duration of this transaction.
403 These locks are guaranteed to prevent concurrent writes and allow
404 this transaction (only) to acquire the same locks (others should
405 block), but only prevent concurrent reads if the database engine
406 requires that in order to block concurrent writes.
407 for_temp_tables : `bool`, optional
408 If `True`, this transaction may involve creating temporary tables.
410 Returns
411 -------
412 context : `AbstractContextManager` [ `None` ]
413 A context manager that commits the transaction when it is exited
414 without error and rolls back the transactoin when it is exited via
415 an exception.
417 Notes
418 -----
419 All transactions on a connection managed by one or more `Database`
420 instances _must_ go through this method, or transaction state will not
421 be correctly managed.
422 """
423 with self._transaction(
424 interrupting=interrupting, savepoint=savepoint, lock=lock, for_temp_tables=for_temp_tables
425 ):
426 yield
428 @contextmanager
429 def temporary_table(
430 self, spec: ddl.TableSpec, name: Optional[str] = None
431 ) -> Iterator[sqlalchemy.schema.Table]:
432 """Return a context manager that creates and then drops a temporary
433 table.
435 Parameters
436 ----------
437 spec : `ddl.TableSpec`
438 Specification for the columns. Unique and foreign key constraints
439 may be ignored.
440 name : `str`, optional
441 If provided, the name of the SQL construct. If not provided, an
442 opaque but unique identifier is generated.
444 Returns
445 -------
446 context : `AbstractContextManager` [ `sqlalchemy.schema.Table` ]
447 A context manager that returns a SQLAlchemy representation of the
448 temporary table when entered.
450 Notes
451 -----
452 Temporary tables may be created, dropped, and written to even in
453 read-only databases - at least according to the Python-level
454 protections in the `Database` classes. Server permissions may say
455 otherwise, but in that case they probably need to be modified to
456 support the full range of expected read-only butler behavior.
457 """
458 with self._session() as connection:
459 table = self._make_temporary_table(connection, spec=spec, name=name)
460 self._temp_tables.add(table.key)
461 try:
462 yield table
463 finally:
464 table.drop(connection)
465 self._temp_tables.remove(table.key)
467 @contextmanager
468 def _session(self) -> Iterator[sqlalchemy.engine.Connection]:
469 """Protected implementation for `session` that actually returns the
470 connection.
472 This method is for internal `Database` calls that need the actual
473 SQLAlchemy connection object. It should be overridden by subclasses
474 instead of `session` itself.
476 Returns
477 -------
478 context : `AbstractContextManager` [ `sqlalchemy.engine.Connection` ]
479 A context manager that returns a SQLALchemy connection when
480 entered.
482 """
483 if self._session_connection is not None:
484 # session already started, just reuse that
485 yield self._session_connection
486 else:
487 try:
488 # open new connection and close it when done
489 self._session_connection = self._engine.connect()
490 yield self._session_connection
491 finally:
492 if self._session_connection is not None:
493 self._session_connection.close()
494 self._session_connection = None
495 # Temporary tables only live within session
496 self._temp_tables = set()
498 @contextmanager
499 def _transaction(
500 self,
501 *,
502 interrupting: bool = False,
503 savepoint: bool = False,
504 lock: Iterable[sqlalchemy.schema.Table] = (),
505 for_temp_tables: bool = False,
506 ) -> Iterator[tuple[bool, sqlalchemy.engine.Connection]]:
507 """Protected implementation for `transaction` that actually returns the
508 connection and whether this is a new outermost transaction.
510 This method is for internal `Database` calls that need the actual
511 SQLAlchemy connection object. It should be overridden by subclasses
512 instead of `transaction` itself.
514 Parameters
515 ----------
516 interrupting : `bool`, optional
517 If `True` (`False` is default), this transaction block may not be
518 nested without an outer one, and attempting to do so is a logic
519 (i.e. assertion) error.
520 savepoint : `bool`, optional
521 If `True` (`False` is default), create a `SAVEPOINT`, allowing
522 exceptions raised by the database (e.g. due to constraint
523 violations) during this transaction's context to be caught outside
524 it without also rolling back all operations in an outer transaction
525 block. If `False`, transactions may still be nested, but a
526 rollback may be generated at any level and affects all levels, and
527 commits are deferred until the outermost block completes. If any
528 outer transaction block was created with ``savepoint=True``, all
529 inner blocks will be as well (regardless of the actual value
530 passed). This has no effect if this is the outermost transaction.
531 lock : `Iterable` [ `sqlalchemy.schema.Table` ], optional
532 A list of tables to lock for the duration of this transaction.
533 These locks are guaranteed to prevent concurrent writes and allow
534 this transaction (only) to acquire the same locks (others should
535 block), but only prevent concurrent reads if the database engine
536 requires that in order to block concurrent writes.
537 for_temp_tables : `bool`, optional
538 If `True`, this transaction may involve creating temporary tables.
540 Returns
541 -------
542 context : `AbstractContextManager` [ `tuple` [ `bool`,
543 `sqlalchemy.engine.Connection` ] ]
544 A context manager that commits the transaction when it is exited
545 without error and rolls back the transactoin when it is exited via
546 an exception. When entered, it returns a tuple of:
548 - ``is_new`` (`bool`): whether this is a new (outermost)
549 transaction;
550 - ``connection`` (`sqlalchemy.engine.Connection`): the connection.
551 """
552 with self._session() as connection:
553 already_in_transaction = connection.in_transaction()
554 assert not (interrupting and already_in_transaction), (
555 "Logic error in transaction nesting: an operation that would "
556 "interrupt the active transaction context has been requested."
557 )
558 savepoint = savepoint or connection.in_nested_transaction()
559 trans: sqlalchemy.engine.Transaction | None
560 if already_in_transaction:
561 if savepoint:
562 trans = connection.begin_nested()
563 else:
564 # Nested non-savepoint transactions don't do anything.
565 trans = None
566 else:
567 # Use a regular (non-savepoint) transaction always for the
568 # outermost context.
569 trans = connection.begin()
570 self._lockTables(connection, lock)
571 try:
572 yield not already_in_transaction, connection
573 if trans is not None:
574 trans.commit()
575 except BaseException:
576 if trans is not None:
577 trans.rollback()
578 raise
580 @abstractmethod
581 def _lockTables(
582 self, connection: sqlalchemy.engine.Connection, tables: Iterable[sqlalchemy.schema.Table] = ()
583 ) -> None:
584 """Acquire locks on the given tables.
586 This is an implementation hook for subclasses, called by `transaction`.
587 It should not be called directly by other code.
589 Parameters
590 ----------
591 connection : `sqlalchemy.engine.Connection`
592 Database connection object. It is guaranteed that transaction is
593 already in a progress for this connection.
594 tables : `Iterable` [ `sqlalchemy.schema.Table` ], optional
595 A list of tables to lock for the duration of this transaction.
596 These locks are guaranteed to prevent concurrent writes and allow
597 this transaction (only) to acquire the same locks (others should
598 block), but only prevent concurrent reads if the database engine
599 requires that in order to block concurrent writes.
600 """
601 raise NotImplementedError()
603 def isTableWriteable(self, table: sqlalchemy.schema.Table) -> bool:
604 """Check whether a table is writeable, either because the database
605 connection is read-write or the table is a temporary table.
607 Parameters
608 ----------
609 table : `sqlalchemy.schema.Table`
610 SQLAlchemy table object to check.
612 Returns
613 -------
614 writeable : `bool`
615 Whether this table is writeable.
616 """
617 return self.isWriteable() or table.key in self._temp_tables
619 def assertTableWriteable(self, table: sqlalchemy.schema.Table, msg: str) -> None:
620 """Raise if the given table is not writeable, either because the
621 database connection is read-write or the table is a temporary table.
623 Parameters
624 ----------
625 table : `sqlalchemy.schema.Table`
626 SQLAlchemy table object to check.
627 msg : `str`, optional
628 If provided, raise `ReadOnlyDatabaseError` instead of returning
629 `False`, with this message.
630 """
631 if not self.isTableWriteable(table):
632 raise ReadOnlyDatabaseError(msg)
634 @contextmanager
635 def declareStaticTables(self, *, create: bool) -> Iterator[StaticTablesContext]:
636 """Return a context manager in which the database's static DDL schema
637 can be declared.
639 Parameters
640 ----------
641 create : `bool`
642 If `True`, attempt to create all tables at the end of the context.
643 If `False`, they will be assumed to already exist.
645 Returns
646 -------
647 schema : `StaticTablesContext`
648 A helper object that is used to add new tables.
650 Raises
651 ------
652 ReadOnlyDatabaseError
653 Raised if ``create`` is `True`, `Database.isWriteable` is `False`,
654 and one or more declared tables do not already exist.
656 Examples
657 --------
658 Given a `Database` instance ``db``::
660 with db.declareStaticTables(create=True) as schema:
661 schema.addTable("table1", TableSpec(...))
662 schema.addTable("table2", TableSpec(...))
664 Notes
665 -----
666 A database's static DDL schema must be declared before any dynamic
667 tables are managed via calls to `ensureTableExists` or
668 `getExistingTable`. The order in which static schema tables are added
669 inside the context block is unimportant; they will automatically be
670 sorted and added in an order consistent with their foreign key
671 relationships.
672 """
673 if create and not self.isWriteable():
674 raise ReadOnlyDatabaseError(f"Cannot create tables in read-only database {self}.")
675 self._metadata = sqlalchemy.MetaData(schema=self.namespace)
676 try:
677 with self._session() as connection:
678 context = StaticTablesContext(self, connection)
679 if create and context._tableNames:
680 # Looks like database is already initalized, to avoid
681 # danger of modifying/destroying valid schema we refuse to
682 # do anything in this case
683 raise SchemaAlreadyDefinedError(f"Cannot create tables in non-empty database {self}.")
684 yield context
685 for table, foreignKey in context._foreignKeys:
686 table.append_constraint(foreignKey)
687 if create:
688 if self.namespace is not None:
689 if self.namespace not in context._inspector.get_schema_names():
690 with self.transaction():
691 connection.execute(sqlalchemy.schema.CreateSchema(self.namespace))
692 # In our tables we have columns that make use of sqlalchemy
693 # Sequence objects. There is currently a bug in sqlalchemy
694 # that causes a deprecation warning to be thrown on a
695 # property of the Sequence object when the repr for the
696 # sequence is created. Here a filter is used to catch these
697 # deprecation warnings when tables are created.
698 with warnings.catch_warnings():
699 warnings.simplefilter("ignore", category=sqlalchemy.exc.SADeprecationWarning)
700 self._metadata.create_all(self._engine)
701 # call all initializer methods sequentially
702 for init in context._initializers:
703 init(self)
704 except BaseException:
705 self._metadata = None
706 raise
708 @abstractmethod
709 def isWriteable(self) -> bool:
710 """Return `True` if this database can be modified by this client."""
711 raise NotImplementedError()
713 @abstractmethod
714 def __str__(self) -> str:
715 """Return a human-readable identifier for this `Database`, including
716 any namespace or schema that identifies its names within a `Registry`.
717 """
718 raise NotImplementedError()
720 @property
721 def dialect(self) -> sqlalchemy.engine.Dialect:
722 """The SQLAlchemy dialect for this database engine
723 (`sqlalchemy.engine.Dialect`).
724 """
725 return self._engine.dialect
727 def shrinkDatabaseEntityName(self, original: str) -> str:
728 """Return a version of the given name that fits within this database
729 engine's length limits for table, constraint, indexes, and sequence
730 names.
732 Implementations should not assume that simple truncation is safe,
733 because multiple long names often begin with the same prefix.
735 The default implementation simply returns the given name.
737 Parameters
738 ----------
739 original : `str`
740 The original name.
742 Returns
743 -------
744 shrunk : `str`
745 The new, possibly shortened name.
746 """
747 return original
749 def expandDatabaseEntityName(self, shrunk: str) -> str:
750 """Retrieve the original name for a database entity that was too long
751 to fit within the database engine's limits.
753 Parameters
754 ----------
755 original : `str`
756 The original name.
758 Returns
759 -------
760 shrunk : `str`
761 The new, possibly shortened name.
762 """
763 return shrunk
765 def _mangleTableName(self, name: str) -> str:
766 """Map a logical, user-visible table name to the true table name used
767 in the database.
769 The default implementation returns the given name unchanged.
771 Parameters
772 ----------
773 name : `str`
774 Input table name. Should not include a namespace (i.e. schema)
775 prefix.
777 Returns
778 -------
779 mangled : `str`
780 Mangled version of the table name (still with no namespace prefix).
782 Notes
783 -----
784 Reimplementations of this method must be idempotent - mangling an
785 already-mangled name must have no effect.
786 """
787 return name
789 def _makeColumnConstraints(self, table: str, spec: ddl.FieldSpec) -> List[sqlalchemy.CheckConstraint]:
790 """Create constraints based on this spec.
792 Parameters
793 ----------
794 table : `str`
795 Name of the table this column is being added to.
796 spec : `FieldSpec`
797 Specification for the field to be added.
799 Returns
800 -------
801 constraint : `list` of `sqlalchemy.CheckConstraint`
802 Constraint added for this column.
803 """
804 # By default we return no additional constraints
805 return []
807 def _convertFieldSpec(
808 self, table: str, spec: ddl.FieldSpec, metadata: sqlalchemy.MetaData, **kwargs: Any
809 ) -> sqlalchemy.schema.Column:
810 """Convert a `FieldSpec` to a `sqlalchemy.schema.Column`.
812 Parameters
813 ----------
814 table : `str`
815 Name of the table this column is being added to.
816 spec : `FieldSpec`
817 Specification for the field to be added.
818 metadata : `sqlalchemy.MetaData`
819 SQLAlchemy representation of the DDL schema this field's table is
820 being added to.
821 **kwargs
822 Additional keyword arguments to forward to the
823 `sqlalchemy.schema.Column` constructor. This is provided to make
824 it easier for derived classes to delegate to ``super()`` while
825 making only minor changes.
827 Returns
828 -------
829 column : `sqlalchemy.schema.Column`
830 SQLAlchemy representation of the field.
831 """
832 args = [spec.name, spec.getSizedColumnType()]
833 if spec.autoincrement:
834 # Generate a sequence to use for auto incrementing for databases
835 # that do not support it natively. This will be ignored by
836 # sqlalchemy for databases that do support it.
837 args.append(
838 sqlalchemy.Sequence(
839 self.shrinkDatabaseEntityName(f"{table}_seq_{spec.name}"), metadata=metadata
840 )
841 )
842 assert spec.doc is None or isinstance(spec.doc, str), f"Bad doc for {table}.{spec.name}."
843 return sqlalchemy.schema.Column(
844 *args,
845 nullable=spec.nullable,
846 primary_key=spec.primaryKey,
847 comment=spec.doc,
848 server_default=spec.default,
849 **kwargs,
850 )
852 def _convertForeignKeySpec(
853 self, table: str, spec: ddl.ForeignKeySpec, metadata: sqlalchemy.MetaData, **kwargs: Any
854 ) -> sqlalchemy.schema.ForeignKeyConstraint:
855 """Convert a `ForeignKeySpec` to a
856 `sqlalchemy.schema.ForeignKeyConstraint`.
858 Parameters
859 ----------
860 table : `str`
861 Name of the table this foreign key is being added to.
862 spec : `ForeignKeySpec`
863 Specification for the foreign key to be added.
864 metadata : `sqlalchemy.MetaData`
865 SQLAlchemy representation of the DDL schema this constraint is
866 being added to.
867 **kwargs
868 Additional keyword arguments to forward to the
869 `sqlalchemy.schema.ForeignKeyConstraint` constructor. This is
870 provided to make it easier for derived classes to delegate to
871 ``super()`` while making only minor changes.
873 Returns
874 -------
875 constraint : `sqlalchemy.schema.ForeignKeyConstraint`
876 SQLAlchemy representation of the constraint.
877 """
878 name = self.shrinkDatabaseEntityName(
879 "_".join(
880 ["fkey", table, self._mangleTableName(spec.table)] + list(spec.target) + list(spec.source)
881 )
882 )
883 return sqlalchemy.schema.ForeignKeyConstraint(
884 spec.source,
885 [f"{self._mangleTableName(spec.table)}.{col}" for col in spec.target],
886 name=name,
887 ondelete=spec.onDelete,
888 )
890 def _convertExclusionConstraintSpec(
891 self,
892 table: str,
893 spec: Tuple[Union[str, Type[TimespanDatabaseRepresentation]], ...],
894 metadata: sqlalchemy.MetaData,
895 ) -> sqlalchemy.schema.Constraint:
896 """Convert a `tuple` from `ddl.TableSpec.exclusion` into a SQLAlchemy
897 constraint representation.
899 Parameters
900 ----------
901 table : `str`
902 Name of the table this constraint is being added to.
903 spec : `tuple` [ `str` or `type` ]
904 A tuple of `str` column names and the `type` object returned by
905 `getTimespanRepresentation` (which must appear exactly once),
906 indicating the order of the columns in the index used to back the
907 constraint.
908 metadata : `sqlalchemy.MetaData`
909 SQLAlchemy representation of the DDL schema this constraint is
910 being added to.
912 Returns
913 -------
914 constraint : `sqlalchemy.schema.Constraint`
915 SQLAlchemy representation of the constraint.
917 Raises
918 ------
919 NotImplementedError
920 Raised if this database does not support exclusion constraints.
921 """
922 raise NotImplementedError(f"Database {self} does not support exclusion constraints.")
924 def _convertTableSpec(
925 self, name: str, spec: ddl.TableSpec, metadata: sqlalchemy.MetaData, **kwargs: Any
926 ) -> sqlalchemy.schema.Table:
927 """Convert a `TableSpec` to a `sqlalchemy.schema.Table`.
929 Parameters
930 ----------
931 spec : `TableSpec`
932 Specification for the foreign key to be added.
933 metadata : `sqlalchemy.MetaData`
934 SQLAlchemy representation of the DDL schema this table is being
935 added to.
936 **kwargs
937 Additional keyword arguments to forward to the
938 `sqlalchemy.schema.Table` constructor. This is provided to make it
939 easier for derived classes to delegate to ``super()`` while making
940 only minor changes.
942 Returns
943 -------
944 table : `sqlalchemy.schema.Table`
945 SQLAlchemy representation of the table.
947 Notes
948 -----
949 This method does not handle ``spec.foreignKeys`` at all, in order to
950 avoid circular dependencies. These are added by higher-level logic in
951 `ensureTableExists`, `getExistingTable`, and `declareStaticTables`.
952 """
953 name = self._mangleTableName(name)
954 args = [self._convertFieldSpec(name, fieldSpec, metadata) for fieldSpec in spec.fields]
956 # Add any column constraints
957 for fieldSpec in spec.fields:
958 args.extend(self._makeColumnConstraints(name, fieldSpec))
960 # Track indexes added for primary key and unique constraints, to make
961 # sure we don't add duplicate explicit or foreign key indexes for
962 # those.
963 allIndexes = {tuple(fieldSpec.name for fieldSpec in spec.fields if fieldSpec.primaryKey)}
964 args.extend(
965 sqlalchemy.schema.UniqueConstraint(
966 *columns, name=self.shrinkDatabaseEntityName("_".join([name, "unq"] + list(columns)))
967 )
968 for columns in spec.unique
969 )
970 allIndexes.update(spec.unique)
971 args.extend(
972 sqlalchemy.schema.Index(
973 self.shrinkDatabaseEntityName("_".join([name, "idx"] + list(index.columns))),
974 *index.columns,
975 unique=(index.columns in spec.unique),
976 **index.kwargs,
977 )
978 for index in spec.indexes
979 if index.columns not in allIndexes
980 )
981 allIndexes.update(index.columns for index in spec.indexes)
982 args.extend(
983 sqlalchemy.schema.Index(
984 self.shrinkDatabaseEntityName("_".join((name, "fkidx") + fk.source)),
985 *fk.source,
986 )
987 for fk in spec.foreignKeys
988 if fk.addIndex and fk.source not in allIndexes
989 )
991 args.extend(self._convertExclusionConstraintSpec(name, excl, metadata) for excl in spec.exclusion)
993 assert spec.doc is None or isinstance(spec.doc, str), f"Bad doc for {name}."
994 return sqlalchemy.schema.Table(name, metadata, *args, comment=spec.doc, info=spec, **kwargs)
996 def ensureTableExists(self, name: str, spec: ddl.TableSpec) -> sqlalchemy.schema.Table:
997 """Ensure that a table with the given name and specification exists,
998 creating it if necessary.
1000 Parameters
1001 ----------
1002 name : `str`
1003 Name of the table (not including namespace qualifiers).
1004 spec : `TableSpec`
1005 Specification for the table. This will be used when creating the
1006 table, and *may* be used when obtaining an existing table to check
1007 for consistency, but no such check is guaranteed.
1009 Returns
1010 -------
1011 table : `sqlalchemy.schema.Table`
1012 SQLAlchemy representation of the table.
1014 Raises
1015 ------
1016 ReadOnlyDatabaseError
1017 Raised if `isWriteable` returns `False`, and the table does not
1018 already exist.
1019 DatabaseConflictError
1020 Raised if the table exists but ``spec`` is inconsistent with its
1021 definition.
1023 Notes
1024 -----
1025 This method may not be called within transactions. It may be called on
1026 read-only databases if and only if the table does in fact already
1027 exist.
1029 Subclasses may override this method, but usually should not need to.
1030 """
1031 # TODO: if _engine is used to make a table then it uses separate
1032 # connection and should not interfere with current transaction
1033 assert (
1034 self._session_connection is None or not self._session_connection.in_transaction()
1035 ), "Table creation interrupts transactions."
1036 assert self._metadata is not None, "Static tables must be declared before dynamic tables."
1037 table = self.getExistingTable(name, spec)
1038 if table is not None:
1039 return table
1040 if not self.isWriteable():
1041 raise ReadOnlyDatabaseError(
1042 f"Table {name} does not exist, and cannot be created because database {self} is read-only."
1043 )
1044 table = self._convertTableSpec(name, spec, self._metadata)
1045 for foreignKeySpec in spec.foreignKeys:
1046 table.append_constraint(self._convertForeignKeySpec(name, foreignKeySpec, self._metadata))
1047 try:
1048 with self._transaction() as (_, connection):
1049 table.create(connection)
1050 except sqlalchemy.exc.DatabaseError:
1051 # Some other process could have created the table meanwhile, which
1052 # usually causes OperationalError or ProgrammingError. We cannot
1053 # use IF NOT EXISTS clause in this case due to PostgreSQL race
1054 # condition on server side which causes IntegrityError. Instead we
1055 # catch these exceptions (they all inherit DatabaseError) and
1056 # re-check whether table is now there.
1057 table = self.getExistingTable(name, spec)
1058 if table is None:
1059 raise
1060 return table
1062 def getExistingTable(self, name: str, spec: ddl.TableSpec) -> Optional[sqlalchemy.schema.Table]:
1063 """Obtain an existing table with the given name and specification.
1065 Parameters
1066 ----------
1067 name : `str`
1068 Name of the table (not including namespace qualifiers).
1069 spec : `TableSpec`
1070 Specification for the table. This will be used when creating the
1071 SQLAlchemy representation of the table, and it is used to
1072 check that the actual table in the database is consistent.
1074 Returns
1075 -------
1076 table : `sqlalchemy.schema.Table` or `None`
1077 SQLAlchemy representation of the table, or `None` if it does not
1078 exist.
1080 Raises
1081 ------
1082 DatabaseConflictError
1083 Raised if the table exists but ``spec`` is inconsistent with its
1084 definition.
1086 Notes
1087 -----
1088 This method can be called within transactions and never modifies the
1089 database.
1091 Subclasses may override this method, but usually should not need to.
1092 """
1093 assert self._metadata is not None, "Static tables must be declared before dynamic tables."
1094 name = self._mangleTableName(name)
1095 table = self._metadata.tables.get(name if self.namespace is None else f"{self.namespace}.{name}")
1096 if table is not None:
1097 if spec.fields.names != set(table.columns.keys()):
1098 raise DatabaseConflictError(
1099 f"Table '{name}' has already been defined differently; the new "
1100 f"specification has columns {list(spec.fields.names)}, while "
1101 f"the previous definition has {list(table.columns.keys())}."
1102 )
1103 else:
1104 inspector = sqlalchemy.inspect(
1105 self._engine if self._session_connection is None else self._session_connection
1106 )
1107 if name in inspector.get_table_names(schema=self.namespace):
1108 _checkExistingTableDefinition(name, spec, inspector.get_columns(name, schema=self.namespace))
1109 table = self._convertTableSpec(name, spec, self._metadata)
1110 for foreignKeySpec in spec.foreignKeys:
1111 table.append_constraint(self._convertForeignKeySpec(name, foreignKeySpec, self._metadata))
1112 return table
1113 return table
1115 def _make_temporary_table(
1116 self,
1117 connection: sqlalchemy.engine.Connection,
1118 spec: ddl.TableSpec,
1119 name: Optional[str] = None,
1120 **kwargs: Any,
1121 ) -> sqlalchemy.schema.Table:
1122 """Create a temporary table.
1124 Parameters
1125 ----------
1126 connection : `sqlalchemy.engine.Connection`
1127 Connection to use when creating the table.
1128 spec : `TableSpec`
1129 Specification for the table.
1130 name : `str`, optional
1131 A unique (within this session/connetion) name for the table.
1132 Subclasses may override to modify the actual name used. If not
1133 provided, a unique name will be generated.
1134 **kwargs
1135 Additional keyword arguments to forward to the
1136 `sqlalchemy.schema.Table` constructor. This is provided to make it
1137 easier for derived classes to delegate to ``super()`` while making
1138 only minor changes.
1140 Returns
1141 -------
1142 table : `sqlalchemy.schema.Table`
1143 SQLAlchemy representation of the table.
1144 """
1145 if name is None:
1146 name = f"tmp_{uuid.uuid4().hex}"
1147 metadata = self._metadata
1148 if metadata is None:
1149 raise RuntimeError("Cannot create temporary table before static schema is defined.")
1150 table = self._convertTableSpec(
1151 name, spec, metadata, prefixes=["TEMPORARY"], schema=sqlalchemy.schema.BLANK_SCHEMA, **kwargs
1152 )
1153 if table.key in self._temp_tables:
1154 if table.key != name:
1155 raise ValueError(
1156 f"A temporary table with name {name} (transformed to {table.key} by "
1157 "Database) already exists."
1158 )
1159 for foreignKeySpec in spec.foreignKeys:
1160 table.append_constraint(self._convertForeignKeySpec(name, foreignKeySpec, metadata))
1161 table.create(connection)
1162 return table
1164 @classmethod
1165 def getTimespanRepresentation(cls) -> Type[TimespanDatabaseRepresentation]:
1166 """Return a `type` that encapsulates the way `Timespan` objects are
1167 stored in this database.
1169 `Database` does not automatically use the return type of this method
1170 anywhere else; calling code is responsible for making sure that DDL
1171 and queries are consistent with it.
1173 Returns
1174 -------
1175 TimespanReprClass : `type` (`TimespanDatabaseRepresention` subclass)
1176 A type that encapsulates the way `Timespan` objects should be
1177 stored in this database.
1179 Notes
1180 -----
1181 There are two big reasons we've decided to keep timespan-mangling logic
1182 outside the `Database` implementations, even though the choice of
1183 representation is ultimately up to a `Database` implementation:
1185 - Timespans appear in relatively few tables and queries in our
1186 typical usage, and the code that operates on them is already aware
1187 that it is working with timespans. In contrast, a
1188 timespan-representation-aware implementation of, say, `insert`,
1189 would need to have extra logic to identify when timespan-mangling
1190 needed to occur, which would usually be useless overhead.
1192 - SQLAlchemy's rich SELECT query expression system has no way to wrap
1193 multiple columns in a single expression object (the ORM does, but
1194 we are not using the ORM). So we would have to wrap _much_ more of
1195 that code in our own interfaces to encapsulate timespan
1196 representations there.
1197 """
1198 return TimespanDatabaseRepresentation.Compound
1200 def sync(
1201 self,
1202 table: sqlalchemy.schema.Table,
1203 *,
1204 keys: Dict[str, Any],
1205 compared: Optional[Dict[str, Any]] = None,
1206 extra: Optional[Dict[str, Any]] = None,
1207 returning: Optional[Sequence[str]] = None,
1208 update: bool = False,
1209 ) -> Tuple[Optional[Dict[str, Any]], Union[bool, Dict[str, Any]]]:
1210 """Insert into a table as necessary to ensure database contains
1211 values equivalent to the given ones.
1213 Parameters
1214 ----------
1215 table : `sqlalchemy.schema.Table`
1216 Table to be queried and possibly inserted into.
1217 keys : `dict`
1218 Column name-value pairs used to search for an existing row; must
1219 be a combination that can be used to select a single row if one
1220 exists. If such a row does not exist, these values are used in
1221 the insert.
1222 compared : `dict`, optional
1223 Column name-value pairs that are compared to those in any existing
1224 row. If such a row does not exist, these rows are used in the
1225 insert.
1226 extra : `dict`, optional
1227 Column name-value pairs that are ignored if a matching row exists,
1228 but used in an insert if one is necessary.
1229 returning : `~collections.abc.Sequence` of `str`, optional
1230 The names of columns whose values should be returned.
1231 update : `bool`, optional
1232 If `True` (`False` is default), update the existing row with the
1233 values in ``compared`` instead of raising `DatabaseConflictError`.
1235 Returns
1236 -------
1237 row : `dict`, optional
1238 The value of the fields indicated by ``returning``, or `None` if
1239 ``returning`` is `None`.
1240 inserted_or_updated : `bool` or `dict`
1241 If `True`, a new row was inserted; if `False`, a matching row
1242 already existed. If a `dict` (only possible if ``update=True``),
1243 then an existing row was updated, and the dict maps the names of
1244 the updated columns to their *old* values (new values can be
1245 obtained from ``compared``).
1247 Raises
1248 ------
1249 DatabaseConflictError
1250 Raised if the values in ``compared`` do not match the values in the
1251 database.
1252 ReadOnlyDatabaseError
1253 Raised if `isWriteable` returns `False`, and no matching record
1254 already exists.
1256 Notes
1257 -----
1258 May be used inside transaction contexts, so implementations may not
1259 perform operations that interrupt transactions.
1261 It may be called on read-only databases if and only if the matching row
1262 does in fact already exist.
1263 """
1265 def check() -> Tuple[int, Optional[Dict[str, Any]], Optional[List]]:
1266 """Query for a row that matches the ``key`` argument, and compare
1267 to what was given by the caller.
1269 Returns
1270 -------
1271 n : `int`
1272 Number of matching rows. ``n != 1`` is always an error, but
1273 it's a different kind of error depending on where `check` is
1274 being called.
1275 bad : `dict` or `None`
1276 The subset of the keys of ``compared`` for which the existing
1277 values did not match the given one, mapped to the existing
1278 values in the database. Once again, ``not bad`` is always an
1279 error, but a different kind on context. `None` if ``n != 1``
1280 result : `list` or `None`
1281 Results in the database that correspond to the columns given
1282 in ``returning``, or `None` if ``returning is None``.
1283 """
1284 toSelect: Set[str] = set()
1285 if compared is not None:
1286 toSelect.update(compared.keys())
1287 if returning is not None:
1288 toSelect.update(returning)
1289 if not toSelect:
1290 # Need to select some column, even if we just want to see
1291 # how many rows we get back.
1292 toSelect.add(next(iter(keys.keys())))
1293 selectSql = (
1294 sqlalchemy.sql.select(*[table.columns[k].label(k) for k in toSelect])
1295 .select_from(table)
1296 .where(sqlalchemy.sql.and_(*[table.columns[k] == v for k, v in keys.items()]))
1297 )
1298 with self._transaction() as (_, connection):
1299 fetched = list(connection.execute(selectSql).mappings())
1300 if len(fetched) != 1:
1301 return len(fetched), None, None
1302 existing = fetched[0]
1303 if compared is not None:
1305 def safeNotEqual(a: Any, b: Any) -> bool:
1306 if isinstance(a, astropy.time.Time):
1307 return not time_utils.TimeConverter().times_equal(a, b)
1308 return a != b
1310 inconsistencies = {
1311 k: existing[k] for k, v in compared.items() if safeNotEqual(existing[k], v)
1312 }
1313 else:
1314 inconsistencies = {}
1315 if returning is not None:
1316 toReturn: Optional[list] = [existing[k] for k in returning]
1317 else:
1318 toReturn = None
1319 return 1, inconsistencies, toReturn
1321 def format_bad(inconsistencies: Dict[str, Any]) -> str:
1322 """Format the 'bad' dictionary of existing values returned by
1323 ``check`` into a string suitable for an error message.
1324 """
1325 assert compared is not None, "Should not be able to get inconsistencies without comparing."
1326 return ", ".join(f"{k}: {v!r} != {compared[k]!r}" for k, v in inconsistencies.items())
1328 if self.isTableWriteable(table):
1329 # Try an insert first, but allow it to fail (in only specific
1330 # ways).
1331 row = keys.copy()
1332 if compared is not None:
1333 row.update(compared)
1334 if extra is not None:
1335 row.update(extra)
1336 with self.transaction():
1337 inserted = bool(self.ensure(table, row))
1338 inserted_or_updated: Union[bool, Dict[str, Any]]
1339 # Need to perform check() for this branch inside the
1340 # transaction, so we roll back an insert that didn't do
1341 # what we expected. That limits the extent to which we
1342 # can reduce duplication between this block and the other
1343 # ones that perform similar logic.
1344 n, bad, result = check()
1345 if n < 1:
1346 raise ConflictingDefinitionError(
1347 f"Attempted to ensure {row} exists by inserting it with ON CONFLICT IGNORE, "
1348 f"but a post-insert query on {keys} returned no results. "
1349 f"Insert was {'' if inserted else 'not '}reported as successful. "
1350 "This can occur if the insert violated a database constraint other than the "
1351 "unique constraint or primary key used to identify the row in this call."
1352 )
1353 elif n > 1:
1354 raise RuntimeError(
1355 f"Keys passed to sync {keys.keys()} do not comprise a "
1356 f"unique constraint for table {table.name}."
1357 )
1358 elif bad:
1359 assert (
1360 compared is not None
1361 ), "Should not be able to get inconsistencies without comparing."
1362 if inserted:
1363 raise RuntimeError(
1364 f"Conflict ({bad}) in sync after successful insert; this is "
1365 "possible if the same table is being updated by a concurrent "
1366 "process that isn't using sync, but it may also be a bug in "
1367 "daf_butler."
1368 )
1369 elif update:
1370 with self._transaction() as (_, connection):
1371 connection.execute(
1372 table.update()
1373 .where(sqlalchemy.sql.and_(*[table.columns[k] == v for k, v in keys.items()]))
1374 .values(**{k: compared[k] for k in bad.keys()})
1375 )
1376 inserted_or_updated = bad
1377 else:
1378 raise DatabaseConflictError(
1379 f"Conflict in sync for table {table.name} on column(s) {format_bad(bad)}."
1380 )
1381 else:
1382 inserted_or_updated = inserted
1383 else:
1384 # Database is not writeable; just see if the row exists.
1385 n, bad, result = check()
1386 if n < 1:
1387 raise ReadOnlyDatabaseError("sync needs to insert, but database is read-only.")
1388 elif n > 1:
1389 raise RuntimeError("Keys passed to sync do not comprise a unique constraint.")
1390 elif bad:
1391 if update:
1392 raise ReadOnlyDatabaseError("sync needs to update, but database is read-only.")
1393 else:
1394 raise DatabaseConflictError(
1395 f"Conflict in sync for table {table.name} on column(s) {format_bad(bad)}."
1396 )
1397 inserted_or_updated = False
1398 if returning is None:
1399 return None, inserted_or_updated
1400 else:
1401 assert result is not None
1402 return {k: v for k, v in zip(returning, result)}, inserted_or_updated
1404 def insert(
1405 self,
1406 table: sqlalchemy.schema.Table,
1407 *rows: dict,
1408 returnIds: bool = False,
1409 select: Optional[sqlalchemy.sql.expression.SelectBase] = None,
1410 names: Optional[Iterable[str]] = None,
1411 ) -> Optional[List[int]]:
1412 """Insert one or more rows into a table, optionally returning
1413 autoincrement primary key values.
1415 Parameters
1416 ----------
1417 table : `sqlalchemy.schema.Table`
1418 Table rows should be inserted into.
1419 returnIds: `bool`
1420 If `True` (`False` is default), return the values of the table's
1421 autoincrement primary key field (which much exist).
1422 select : `sqlalchemy.sql.SelectBase`, optional
1423 A SELECT query expression to insert rows from. Cannot be provided
1424 with either ``rows`` or ``returnIds=True``.
1425 names : `Iterable` [ `str` ], optional
1426 Names of columns in ``table`` to be populated, ordered to match the
1427 columns returned by ``select``. Ignored if ``select`` is `None`.
1428 If not provided, the columns returned by ``select`` must be named
1429 to match the desired columns of ``table``.
1430 *rows
1431 Positional arguments are the rows to be inserted, as dictionaries
1432 mapping column name to value. The keys in all dictionaries must
1433 be the same.
1435 Returns
1436 -------
1437 ids : `None`, or `list` of `int`
1438 If ``returnIds`` is `True`, a `list` containing the inserted
1439 values for the table's autoincrement primary key.
1441 Raises
1442 ------
1443 ReadOnlyDatabaseError
1444 Raised if `isWriteable` returns `False` when this method is called.
1446 Notes
1447 -----
1448 The default implementation uses bulk insert syntax when ``returnIds``
1449 is `False`, and a loop over single-row insert operations when it is
1450 `True`.
1452 Derived classes should reimplement when they can provide a more
1453 efficient implementation (especially for the latter case).
1455 May be used inside transaction contexts, so implementations may not
1456 perform operations that interrupt transactions.
1457 """
1458 self.assertTableWriteable(table, f"Cannot insert into read-only table {table}.")
1459 if select is not None and (rows or returnIds):
1460 raise TypeError("'select' is incompatible with passing value rows or returnIds=True.")
1461 if not rows and select is None:
1462 if returnIds:
1463 return []
1464 else:
1465 return None
1466 with self._transaction() as (_, connection):
1467 if not returnIds:
1468 if select is not None:
1469 if names is None:
1470 # columns() is deprecated since 1.4, but
1471 # selected_columns() method did not exist in 1.3.
1472 if hasattr(select, "selected_columns"):
1473 names = select.selected_columns.keys()
1474 else:
1475 names = select.columns.keys()
1476 connection.execute(table.insert().from_select(names, select))
1477 else:
1478 connection.execute(table.insert(), rows)
1479 return None
1480 else:
1481 sql = table.insert()
1482 return [connection.execute(sql, row).inserted_primary_key[0] for row in rows]
1484 @abstractmethod
1485 def replace(self, table: sqlalchemy.schema.Table, *rows: dict) -> None:
1486 """Insert one or more rows into a table, replacing any existing rows
1487 for which insertion of a new row would violate the primary key
1488 constraint.
1490 Parameters
1491 ----------
1492 table : `sqlalchemy.schema.Table`
1493 Table rows should be inserted into.
1494 *rows
1495 Positional arguments are the rows to be inserted, as dictionaries
1496 mapping column name to value. The keys in all dictionaries must
1497 be the same.
1499 Raises
1500 ------
1501 ReadOnlyDatabaseError
1502 Raised if `isWriteable` returns `False` when this method is called.
1504 Notes
1505 -----
1506 May be used inside transaction contexts, so implementations may not
1507 perform operations that interrupt transactions.
1509 Implementations should raise a `sqlalchemy.exc.IntegrityError`
1510 exception when a constraint other than the primary key would be
1511 violated.
1513 Implementations are not required to support `replace` on tables
1514 with autoincrement keys.
1515 """
1516 raise NotImplementedError()
1518 @abstractmethod
1519 def ensure(self, table: sqlalchemy.schema.Table, *rows: dict, primary_key_only: bool = False) -> int:
1520 """Insert one or more rows into a table, skipping any rows for which
1521 insertion would violate a unique constraint.
1523 Parameters
1524 ----------
1525 table : `sqlalchemy.schema.Table`
1526 Table rows should be inserted into.
1527 *rows
1528 Positional arguments are the rows to be inserted, as dictionaries
1529 mapping column name to value. The keys in all dictionaries must
1530 be the same.
1531 primary_key_only : `bool`, optional
1532 If `True` (`False` is default), only skip rows that violate the
1533 primary key constraint, and raise an exception (and rollback
1534 transactions) for other constraint violations.
1536 Returns
1537 -------
1538 count : `int`
1539 The number of rows actually inserted.
1541 Raises
1542 ------
1543 ReadOnlyDatabaseError
1544 Raised if `isWriteable` returns `False` when this method is called.
1545 This is raised even if the operation would do nothing even on a
1546 writeable database.
1548 Notes
1549 -----
1550 May be used inside transaction contexts, so implementations may not
1551 perform operations that interrupt transactions.
1553 Implementations are not required to support `ensure` on tables
1554 with autoincrement keys.
1555 """
1556 raise NotImplementedError()
1558 def delete(self, table: sqlalchemy.schema.Table, columns: Iterable[str], *rows: dict) -> int:
1559 """Delete one or more rows from a table.
1561 Parameters
1562 ----------
1563 table : `sqlalchemy.schema.Table`
1564 Table that rows should be deleted from.
1565 columns: `~collections.abc.Iterable` of `str`
1566 The names of columns that will be used to constrain the rows to
1567 be deleted; these will be combined via ``AND`` to form the
1568 ``WHERE`` clause of the delete query.
1569 *rows
1570 Positional arguments are the keys of rows to be deleted, as
1571 dictionaries mapping column name to value. The keys in all
1572 dictionaries must be exactly the names in ``columns``.
1574 Returns
1575 -------
1576 count : `int`
1577 Number of rows deleted.
1579 Raises
1580 ------
1581 ReadOnlyDatabaseError
1582 Raised if `isWriteable` returns `False` when this method is called.
1584 Notes
1585 -----
1586 May be used inside transaction contexts, so implementations may not
1587 perform operations that interrupt transactions.
1589 The default implementation should be sufficient for most derived
1590 classes.
1591 """
1592 self.assertTableWriteable(table, f"Cannot delete from read-only table {table}.")
1593 if columns and not rows:
1594 # If there are no columns, this operation is supposed to delete
1595 # everything (so we proceed as usual). But if there are columns,
1596 # but no rows, it was a constrained bulk operation where the
1597 # constraint is that no rows match, and we should short-circuit
1598 # while reporting that no rows were affected.
1599 return 0
1600 sql = table.delete()
1601 columns = list(columns) # Force iterators to list
1603 # More efficient to use IN operator if there is only one
1604 # variable changing across all rows.
1605 content: Dict[str, Set] = defaultdict(set)
1606 if len(columns) == 1:
1607 # Nothing to calculate since we can always use IN
1608 column = columns[0]
1609 changing_columns = [column]
1610 content[column] = set(row[column] for row in rows)
1611 else:
1612 for row in rows:
1613 for k, v in row.items():
1614 content[k].add(v)
1615 changing_columns = [col for col, values in content.items() if len(values) > 1]
1617 if len(changing_columns) != 1:
1618 # More than one column changes each time so do explicit bind
1619 # parameters and have each row processed separately.
1620 whereTerms = [table.columns[name] == sqlalchemy.sql.bindparam(name) for name in columns]
1621 if whereTerms:
1622 sql = sql.where(sqlalchemy.sql.and_(*whereTerms))
1623 with self._transaction() as (_, connection):
1624 return connection.execute(sql, rows).rowcount
1625 else:
1626 # One of the columns has changing values but any others are
1627 # fixed. In this case we can use an IN operator and be more
1628 # efficient.
1629 name = changing_columns.pop()
1631 # Simple where clause for the unchanging columns
1632 clauses = []
1633 for k, v in content.items():
1634 if k == name:
1635 continue
1636 column = table.columns[k]
1637 # The set only has one element
1638 clauses.append(column == v.pop())
1640 # The IN operator will not work for "infinite" numbers of
1641 # rows so must batch it up into distinct calls.
1642 in_content = list(content[name])
1643 n_elements = len(in_content)
1645 rowcount = 0
1646 iposn = 0
1647 n_per_loop = 1_000 # Controls how many items to put in IN clause
1648 with self._transaction() as (_, connection):
1649 for iposn in range(0, n_elements, n_per_loop):
1650 endpos = iposn + n_per_loop
1651 in_clause = table.columns[name].in_(in_content[iposn:endpos])
1653 newsql = sql.where(sqlalchemy.sql.and_(*clauses, in_clause))
1654 rowcount += connection.execute(newsql).rowcount
1655 return rowcount
1657 def deleteWhere(self, table: sqlalchemy.schema.Table, where: sqlalchemy.sql.ClauseElement) -> int:
1658 """Delete rows from a table with pre-constructed WHERE clause.
1660 Parameters
1661 ----------
1662 table : `sqlalchemy.schema.Table`
1663 Table that rows should be deleted from.
1664 where: `sqlalchemy.sql.ClauseElement`
1665 The names of columns that will be used to constrain the rows to
1666 be deleted; these will be combined via ``AND`` to form the
1667 ``WHERE`` clause of the delete query.
1669 Returns
1670 -------
1671 count : `int`
1672 Number of rows deleted.
1674 Raises
1675 ------
1676 ReadOnlyDatabaseError
1677 Raised if `isWriteable` returns `False` when this method is called.
1679 Notes
1680 -----
1681 May be used inside transaction contexts, so implementations may not
1682 perform operations that interrupt transactions.
1684 The default implementation should be sufficient for most derived
1685 classes.
1686 """
1687 self.assertTableWriteable(table, f"Cannot delete from read-only table {table}.")
1689 sql = table.delete().where(where)
1690 with self._transaction() as (_, connection):
1691 return connection.execute(sql).rowcount
1693 def update(self, table: sqlalchemy.schema.Table, where: Dict[str, str], *rows: dict) -> int:
1694 """Update one or more rows in a table.
1696 Parameters
1697 ----------
1698 table : `sqlalchemy.schema.Table`
1699 Table containing the rows to be updated.
1700 where : `dict` [`str`, `str`]
1701 A mapping from the names of columns that will be used to search for
1702 existing rows to the keys that will hold these values in the
1703 ``rows`` dictionaries. Note that these may not be the same due to
1704 SQLAlchemy limitations.
1705 *rows
1706 Positional arguments are the rows to be updated. The keys in all
1707 dictionaries must be the same, and may correspond to either a
1708 value in the ``where`` dictionary or the name of a column to be
1709 updated.
1711 Returns
1712 -------
1713 count : `int`
1714 Number of rows matched (regardless of whether the update actually
1715 modified them).
1717 Raises
1718 ------
1719 ReadOnlyDatabaseError
1720 Raised if `isWriteable` returns `False` when this method is called.
1722 Notes
1723 -----
1724 May be used inside transaction contexts, so implementations may not
1725 perform operations that interrupt transactions.
1727 The default implementation should be sufficient for most derived
1728 classes.
1729 """
1730 self.assertTableWriteable(table, f"Cannot update read-only table {table}.")
1731 if not rows:
1732 return 0
1733 sql = table.update().where(
1734 sqlalchemy.sql.and_(*[table.columns[k] == sqlalchemy.sql.bindparam(v) for k, v in where.items()])
1735 )
1736 with self._transaction() as (_, connection):
1737 return connection.execute(sql, rows).rowcount
1739 @contextmanager
1740 def query(
1741 self, sql: sqlalchemy.sql.expression.SelectBase, *args: Any, **kwargs: Any
1742 ) -> Iterator[sqlalchemy.engine.CursorResult]:
1743 """Run a SELECT query against the database.
1745 Parameters
1746 ----------
1747 sql : `sqlalchemy.sql.expression.SelectBase`
1748 A SQLAlchemy representation of a ``SELECT`` query.
1749 *args
1750 Additional positional arguments are forwarded to
1751 `sqlalchemy.engine.Connection.execute`.
1752 **kwargs
1753 Additional keyword arguments are forwarded to
1754 `sqlalchemy.engine.Connection.execute`.
1756 Returns
1757 -------
1758 result_context : `sqlalchemy.engine.CursorResults`
1759 Context manager that returns the query result object when entered.
1760 These results are invalidated when the context is exited.
1761 """
1762 if self._session_connection is None:
1763 connection = self._engine.connect()
1764 else:
1765 connection = self._session_connection
1766 result = connection.execute(sql, *args, **kwargs)
1767 try:
1768 yield result
1769 finally:
1770 if connection is not self._session_connection:
1771 connection.close()
1773 @abstractmethod
1774 def constant_rows(
1775 self,
1776 fields: NamedValueAbstractSet[ddl.FieldSpec],
1777 *rows: dict,
1778 name: Optional[str] = None,
1779 ) -> sqlalchemy.sql.FromClause:
1780 """Return a SQLAlchemy object that represents a small number of
1781 constant-valued rows.
1783 Parameters
1784 ----------
1785 fields : `NamedValueAbstractSet` [ `ddl.FieldSpec` ]
1786 The columns of the rows. Unique and foreign key constraints are
1787 ignored.
1788 *rows : `dict`
1789 Values for the rows.
1790 name : `str`, optional
1791 If provided, the name of the SQL construct. If not provided, an
1792 opaque but unique identifier is generated.
1794 Returns
1795 -------
1796 from_clause : `sqlalchemy.sql.FromClause`
1797 SQLAlchemy object representing the given rows. This is guaranteed
1798 to be something that can be directly joined into a ``SELECT``
1799 query's ``FROM`` clause, and will not involve a temporary table
1800 that needs to be cleaned up later.
1802 Notes
1803 -----
1804 The default implementation uses the SQL-standard ``VALUES`` construct,
1805 but support for that construct is varied enough across popular RDBMSs
1806 that the method is still marked abstract to force explicit opt-in via
1807 delegation to `super`.
1808 """
1809 if name is None:
1810 name = f"tmp_{uuid.uuid4().hex}"
1811 return sqlalchemy.sql.values(
1812 *[sqlalchemy.Column(field.name, field.getSizedColumnType()) for field in fields],
1813 name=name,
1814 ).data([tuple(row[name] for name in fields.names) for row in rows])
1816 def get_constant_rows_max(self) -> int:
1817 """Return the maximum number of rows that should be passed to
1818 `constant_rows` for this backend.
1820 Returns
1821 -------
1822 max : `int`
1823 Maximum number of rows.
1825 Notes
1826 -----
1827 This should reflect typical performance profiles (or a guess at these),
1828 not just hard database engine limits.
1829 """
1830 return 100
1832 origin: int
1833 """An integer ID that should be used as the default for any datasets,
1834 quanta, or other entities that use a (autoincrement, origin) compound
1835 primary key (`int`).
1836 """
1838 namespace: Optional[str]
1839 """The schema or namespace this database instance is associated with
1840 (`str` or `None`).
1841 """