Coverage for python/lsst/daf/butler/registry/interfaces/_database.py: 21%
404 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-08-05 01:26 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-08-05 01:26 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = [
24 "Database",
25 "ReadOnlyDatabaseError",
26 "DatabaseConflictError",
27 "SchemaAlreadyDefinedError",
28 "StaticTablesContext",
29]
31import uuid
32import warnings
33from abc import ABC, abstractmethod
34from collections import defaultdict
35from collections.abc import Callable, Iterable, Iterator, Sequence
36from contextlib import contextmanager
37from typing import Any, cast, final
39import astropy.time
40import sqlalchemy
42from ...core import TimespanDatabaseRepresentation, ddl, time_utils
43from ...core.named import NamedValueAbstractSet
44from .._exceptions import ConflictingDefinitionError
47# TODO: method is called with list[ReflectedColumn] in SA 2, and
48# ReflectedColumn does not exist in 1.4.
49def _checkExistingTableDefinition(name: str, spec: ddl.TableSpec, inspection: list) -> None:
50 """Test that the definition of a table in a `ddl.TableSpec` and from
51 database introspection are consistent.
53 Parameters
54 ----------
55 name : `str`
56 Name of the table (only used in error messages).
57 spec : `ddl.TableSpec`
58 Specification of the table.
59 inspection : `dict`
60 Dictionary returned by
61 `sqlalchemy.engine.reflection.Inspector.get_columns`.
63 Raises
64 ------
65 DatabaseConflictError
66 Raised if the definitions are inconsistent.
67 """
68 columnNames = [c["name"] for c in inspection]
69 if spec.fields.names != set(columnNames):
70 raise DatabaseConflictError(
71 f"Table '{name}' exists but is defined differently in the database; "
72 f"specification has columns {list(spec.fields.names)}, while the "
73 f"table in the database has {columnNames}."
74 )
77class ReadOnlyDatabaseError(RuntimeError):
78 """Exception raised when a write operation is called on a read-only
79 `Database`.
80 """
83class DatabaseConflictError(ConflictingDefinitionError):
84 """Exception raised when database content (row values or schema entities)
85 are inconsistent with what this client expects.
86 """
89class SchemaAlreadyDefinedError(RuntimeError):
90 """Exception raised when trying to initialize database schema when some
91 tables already exist.
92 """
95class StaticTablesContext:
96 """Helper class used to declare the static schema for a registry layer
97 in a database.
99 An instance of this class is returned by `Database.declareStaticTables`,
100 which should be the only way it should be constructed.
101 """
103 def __init__(self, db: Database, connection: sqlalchemy.engine.Connection):
104 self._db = db
105 self._foreignKeys: list[tuple[sqlalchemy.schema.Table, sqlalchemy.schema.ForeignKeyConstraint]] = []
106 self._inspector = sqlalchemy.inspect(connection)
107 self._tableNames = frozenset(self._inspector.get_table_names(schema=self._db.namespace))
108 self._initializers: list[Callable[[Database], None]] = []
110 def addTable(self, name: str, spec: ddl.TableSpec) -> sqlalchemy.schema.Table:
111 """Add a new table to the schema, returning its sqlalchemy
112 representation.
114 The new table may not actually be created until the end of the
115 context created by `Database.declareStaticTables`, allowing tables
116 to be declared in any order even in the presence of foreign key
117 relationships.
118 """
119 name = self._db._mangleTableName(name)
120 if name in self._tableNames:
121 _checkExistingTableDefinition(
122 name, spec, self._inspector.get_columns(name, schema=self._db.namespace)
123 )
124 metadata = self._db._metadata
125 assert metadata is not None, "Guaranteed by context manager that returns this object."
126 table = self._db._convertTableSpec(name, spec, metadata)
127 for foreignKeySpec in spec.foreignKeys:
128 self._foreignKeys.append((table, self._db._convertForeignKeySpec(name, foreignKeySpec, metadata)))
129 return table
131 def addTableTuple(self, specs: tuple[ddl.TableSpec, ...]) -> tuple[sqlalchemy.schema.Table, ...]:
132 """Add a named tuple of tables to the schema, returning their
133 SQLAlchemy representations in a named tuple of the same type.
135 The new tables may not actually be created until the end of the
136 context created by `Database.declareStaticTables`, allowing tables
137 to be declared in any order even in the presence of foreign key
138 relationships.
140 Notes
141 -----
142 ``specs`` *must* be an instance of a type created by
143 `collections.namedtuple`, not just regular tuple, and the returned
144 object is guaranteed to be the same. Because `~collections.namedtuple`
145 is just a factory for `type` objects, not an actual type itself,
146 we cannot represent this with type annotations.
147 """
148 return specs._make( # type: ignore
149 self.addTable(name, spec) for name, spec in zip(specs._fields, specs, strict=True) # type: ignore
150 )
152 def addInitializer(self, initializer: Callable[[Database], None]) -> None:
153 """Add a method that does one-time initialization of a database.
155 Initialization can mean anything that changes state of a database
156 and needs to be done exactly once after database schema was created.
157 An example for that could be population of schema attributes.
159 Parameters
160 ----------
161 initializer : callable
162 Method of a single argument which is a `Database` instance.
163 """
164 self._initializers.append(initializer)
167class Database(ABC):
168 """An abstract interface that represents a particular database engine's
169 representation of a single schema/namespace/database.
171 Parameters
172 ----------
173 origin : `int`
174 An integer ID that should be used as the default for any datasets,
175 quanta, or other entities that use a (autoincrement, origin) compound
176 primary key.
177 engine : `sqlalchemy.engine.Engine`
178 The SQLAlchemy engine for this `Database`.
179 namespace : `str`, optional
180 Name of the schema or namespace this instance is associated with.
181 This is passed as the ``schema`` argument when constructing a
182 `sqlalchemy.schema.MetaData` instance. We use ``namespace`` instead to
183 avoid confusion between "schema means namespace" and "schema means
184 table definitions".
186 Notes
187 -----
188 `Database` requires all write operations to go through its special named
189 methods. Our write patterns are sufficiently simple that we don't really
190 need the full flexibility of SQL insert/update/delete syntax, and we need
191 non-standard (but common) functionality in these operations sufficiently
192 often that it seems worthwhile to provide our own generic API.
194 In contrast, `Database.query` allows arbitrary ``SELECT`` queries (via
195 their SQLAlchemy representation) to be run, as we expect these to require
196 significantly more sophistication while still being limited to standard
197 SQL.
199 `Database` itself has several underscore-prefixed attributes:
201 - ``_engine``: SQLAlchemy object representing its engine.
202 - ``_connection``: method returning a context manager for
203 `sqlalchemy.engine.Connection` object.
204 - ``_metadata``: the `sqlalchemy.schema.MetaData` object representing
205 the tables and other schema entities.
207 These are considered protected (derived classes may access them, but other
208 code should not), and read-only, aside from executing SQL via
209 ``_connection``.
210 """
212 def __init__(self, *, origin: int, engine: sqlalchemy.engine.Engine, namespace: str | None = None):
213 self.origin = origin
214 self.namespace = namespace
215 self._engine = engine
216 self._session_connection: sqlalchemy.engine.Connection | None = None
217 self._metadata: sqlalchemy.schema.MetaData | None = None
218 self._temp_tables: set[str] = set()
220 def __repr__(self) -> str:
221 # Rather than try to reproduce all the parameters used to create
222 # the object, instead report the more useful information of the
223 # connection URL.
224 if self._engine.url.password is not None:
225 uri = str(self._engine.url.set(password="***"))
226 else:
227 uri = str(self._engine.url)
228 if self.namespace:
229 uri += f"#{self.namespace}"
230 return f'{type(self).__name__}("{uri}")'
232 @classmethod
233 def makeDefaultUri(cls, root: str) -> str | None:
234 """Create a default connection URI appropriate for the given root
235 directory, or `None` if there can be no such default.
236 """
237 return None
239 @classmethod
240 def fromUri(
241 cls,
242 uri: str | sqlalchemy.engine.URL,
243 *,
244 origin: int,
245 namespace: str | None = None,
246 writeable: bool = True,
247 ) -> Database:
248 """Construct a database from a SQLAlchemy URI.
250 Parameters
251 ----------
252 uri : `str` or `sqlalchemy.engine.URL`
253 A SQLAlchemy URI connection string.
254 origin : `int`
255 An integer ID that should be used as the default for any datasets,
256 quanta, or other entities that use a (autoincrement, origin)
257 compound primary key.
258 namespace : `str`, optional
259 A database namespace (i.e. schema) the new instance should be
260 associated with. If `None` (default), the namespace (if any) is
261 inferred from the URI.
262 writeable : `bool`, optional
263 If `True`, allow write operations on the database, including
264 ``CREATE TABLE``.
266 Returns
267 -------
268 db : `Database`
269 A new `Database` instance.
270 """
271 return cls.fromEngine(
272 cls.makeEngine(uri, writeable=writeable), origin=origin, namespace=namespace, writeable=writeable
273 )
275 @classmethod
276 @abstractmethod
277 def makeEngine(
278 cls, uri: str | sqlalchemy.engine.URL, *, writeable: bool = True
279 ) -> sqlalchemy.engine.Engine:
280 """Create a `sqlalchemy.engine.Engine` from a SQLAlchemy URI.
282 Parameters
283 ----------
284 uri : `str` or `sqlalchemy.engine.URL`
285 A SQLAlchemy URI connection string.
286 writeable : `bool`, optional
287 If `True`, allow write operations on the database, including
288 ``CREATE TABLE``.
290 Returns
291 -------
292 engine : `sqlalchemy.engine.Engine`
293 A database engine.
295 Notes
296 -----
297 Subclasses that support other ways to connect to a database are
298 encouraged to add optional arguments to their implementation of this
299 method, as long as they maintain compatibility with the base class
300 call signature.
301 """
302 raise NotImplementedError()
304 @classmethod
305 @abstractmethod
306 def fromEngine(
307 cls,
308 engine: sqlalchemy.engine.Engine,
309 *,
310 origin: int,
311 namespace: str | None = None,
312 writeable: bool = True,
313 ) -> Database:
314 """Create a new `Database` from an existing `sqlalchemy.engine.Engine`.
316 Parameters
317 ----------
318 engine : `sqlalchemy.engine.Engine`
319 The engine for the database. May be shared between `Database`
320 instances.
321 origin : `int`
322 An integer ID that should be used as the default for any datasets,
323 quanta, or other entities that use a (autoincrement, origin)
324 compound primary key.
325 namespace : `str`, optional
326 A different database namespace (i.e. schema) the new instance
327 should be associated with. If `None` (default), the namespace
328 (if any) is inferred from the connection.
329 writeable : `bool`, optional
330 If `True`, allow write operations on the database, including
331 ``CREATE TABLE``.
333 Returns
334 -------
335 db : `Database`
336 A new `Database` instance.
338 Notes
339 -----
340 This method allows different `Database` instances to share the same
341 engine, which is desirable when they represent different namespaces
342 can be queried together.
343 """
344 raise NotImplementedError()
346 @final
347 @contextmanager
348 def session(self) -> Iterator[None]:
349 """Return a context manager that represents a session (persistent
350 connection to a database).
352 Returns
353 -------
354 context : `AbstractContextManager` [ `None` ]
355 A context manager that does not return a value when entered.
357 Notes
358 -----
359 This method should be used when a sequence of read-only SQL operations
360 will be performed in rapid succession *without* a requirement that they
361 yield consistent results in the presence of concurrent writes (or, more
362 rarely, when conflicting concurrent writes are rare/impossible and the
363 session will be open long enough that a transaction is inadvisable).
364 """
365 with self._session():
366 yield
368 @final
369 @contextmanager
370 def transaction(
371 self,
372 *,
373 interrupting: bool = False,
374 savepoint: bool = False,
375 lock: Iterable[sqlalchemy.schema.Table] = (),
376 for_temp_tables: bool = False,
377 ) -> Iterator[None]:
378 """Return a context manager that represents a transaction.
380 Parameters
381 ----------
382 interrupting : `bool`, optional
383 If `True` (`False` is default), this transaction block may not be
384 nested without an outer one, and attempting to do so is a logic
385 (i.e. assertion) error.
386 savepoint : `bool`, optional
387 If `True` (`False` is default), create a `SAVEPOINT`, allowing
388 exceptions raised by the database (e.g. due to constraint
389 violations) during this transaction's context to be caught outside
390 it without also rolling back all operations in an outer transaction
391 block. If `False`, transactions may still be nested, but a
392 rollback may be generated at any level and affects all levels, and
393 commits are deferred until the outermost block completes. If any
394 outer transaction block was created with ``savepoint=True``, all
395 inner blocks will be as well (regardless of the actual value
396 passed). This has no effect if this is the outermost transaction.
397 lock : `~collections.abc.Iterable` [ `sqlalchemy.schema.Table` ], \
398 optional
399 A list of tables to lock for the duration of this transaction.
400 These locks are guaranteed to prevent concurrent writes and allow
401 this transaction (only) to acquire the same locks (others should
402 block), but only prevent concurrent reads if the database engine
403 requires that in order to block concurrent writes.
404 for_temp_tables : `bool`, optional
405 If `True`, this transaction may involve creating temporary tables.
407 Returns
408 -------
409 context : `AbstractContextManager` [ `None` ]
410 A context manager that commits the transaction when it is exited
411 without error and rolls back the transactoin when it is exited via
412 an exception.
414 Notes
415 -----
416 All transactions on a connection managed by one or more `Database`
417 instances _must_ go through this method, or transaction state will not
418 be correctly managed.
419 """
420 with self._transaction(
421 interrupting=interrupting, savepoint=savepoint, lock=lock, for_temp_tables=for_temp_tables
422 ):
423 yield
425 @contextmanager
426 def temporary_table(
427 self, spec: ddl.TableSpec, name: str | None = None
428 ) -> Iterator[sqlalchemy.schema.Table]:
429 """Return a context manager that creates and then drops a temporary
430 table.
432 Parameters
433 ----------
434 spec : `ddl.TableSpec`
435 Specification for the columns. Unique and foreign key constraints
436 may be ignored.
437 name : `str`, optional
438 If provided, the name of the SQL construct. If not provided, an
439 opaque but unique identifier is generated.
441 Returns
442 -------
443 context : `AbstractContextManager` [ `sqlalchemy.schema.Table` ]
444 A context manager that returns a SQLAlchemy representation of the
445 temporary table when entered.
447 Notes
448 -----
449 Temporary tables may be created, dropped, and written to even in
450 read-only databases - at least according to the Python-level
451 protections in the `Database` classes. Server permissions may say
452 otherwise, but in that case they probably need to be modified to
453 support the full range of expected read-only butler behavior.
454 """
455 with self._session() as connection:
456 table = self._make_temporary_table(connection, spec=spec, name=name)
457 self._temp_tables.add(table.key)
458 try:
459 yield table
460 finally:
461 with self._transaction():
462 table.drop(connection)
463 self._temp_tables.remove(table.key)
465 @contextmanager
466 def _session(self) -> Iterator[sqlalchemy.engine.Connection]:
467 """Protected implementation for `session` that actually returns the
468 connection.
470 This method is for internal `Database` calls that need the actual
471 SQLAlchemy connection object. It should be overridden by subclasses
472 instead of `session` itself.
474 Returns
475 -------
476 context : `AbstractContextManager` [ `sqlalchemy.engine.Connection` ]
477 A context manager that returns a SQLALchemy connection when
478 entered.
480 """
481 if self._session_connection is not None:
482 # session already started, just reuse that
483 yield self._session_connection
484 else:
485 try:
486 # open new connection and close it when done
487 self._session_connection = self._engine.connect()
488 yield self._session_connection
489 finally:
490 if self._session_connection is not None:
491 self._session_connection.close()
492 self._session_connection = None
493 # Temporary tables only live within session
494 self._temp_tables = set()
496 @contextmanager
497 def _transaction(
498 self,
499 *,
500 interrupting: bool = False,
501 savepoint: bool = False,
502 lock: Iterable[sqlalchemy.schema.Table] = (),
503 for_temp_tables: bool = False,
504 ) -> Iterator[tuple[bool, sqlalchemy.engine.Connection]]:
505 """Protected implementation for `transaction` that actually returns the
506 connection and whether this is a new outermost transaction.
508 This method is for internal `Database` calls that need the actual
509 SQLAlchemy connection object. It should be overridden by subclasses
510 instead of `transaction` itself.
512 Parameters
513 ----------
514 interrupting : `bool`, optional
515 If `True` (`False` is default), this transaction block may not be
516 nested without an outer one, and attempting to do so is a logic
517 (i.e. assertion) error.
518 savepoint : `bool`, optional
519 If `True` (`False` is default), create a `SAVEPOINT`, allowing
520 exceptions raised by the database (e.g. due to constraint
521 violations) during this transaction's context to be caught outside
522 it without also rolling back all operations in an outer transaction
523 block. If `False`, transactions may still be nested, but a
524 rollback may be generated at any level and affects all levels, and
525 commits are deferred until the outermost block completes. If any
526 outer transaction block was created with ``savepoint=True``, all
527 inner blocks will be as well (regardless of the actual value
528 passed). This has no effect if this is the outermost transaction.
529 lock : `~collections.abc.Iterable` [ `sqlalchemy.schema.Table` ], \
530 optional
531 A list of tables to lock for the duration of this transaction.
532 These locks are guaranteed to prevent concurrent writes and allow
533 this transaction (only) to acquire the same locks (others should
534 block), but only prevent concurrent reads if the database engine
535 requires that in order to block concurrent writes.
536 for_temp_tables : `bool`, optional
537 If `True`, this transaction may involve creating temporary tables.
539 Returns
540 -------
541 context : `AbstractContextManager` [ `tuple` [ `bool`,
542 `sqlalchemy.engine.Connection` ] ]
543 A context manager that commits the transaction when it is exited
544 without error and rolls back the transactoin when it is exited via
545 an exception. When entered, it returns a tuple of:
547 - ``is_new`` (`bool`): whether this is a new (outermost)
548 transaction;
549 - ``connection`` (`sqlalchemy.engine.Connection`): the connection.
550 """
551 with self._session() as connection:
552 already_in_transaction = connection.in_transaction()
553 assert not (interrupting and already_in_transaction), (
554 "Logic error in transaction nesting: an operation that would "
555 "interrupt the active transaction context has been requested."
556 )
557 savepoint = savepoint or connection.in_nested_transaction()
558 trans: sqlalchemy.engine.Transaction | None
559 if already_in_transaction:
560 if savepoint:
561 trans = connection.begin_nested()
562 else:
563 # Nested non-savepoint transactions don't do anything.
564 trans = None
565 else:
566 # Use a regular (non-savepoint) transaction always for the
567 # outermost context.
568 trans = connection.begin()
569 self._lockTables(connection, lock)
570 try:
571 yield not already_in_transaction, connection
572 if trans is not None:
573 trans.commit()
574 except BaseException:
575 if trans is not None:
576 trans.rollback()
577 raise
579 @abstractmethod
580 def _lockTables(
581 self, connection: sqlalchemy.engine.Connection, tables: Iterable[sqlalchemy.schema.Table] = ()
582 ) -> None:
583 """Acquire locks on the given tables.
585 This is an implementation hook for subclasses, called by `transaction`.
586 It should not be called directly by other code.
588 Parameters
589 ----------
590 connection : `sqlalchemy.engine.Connection`
591 Database connection object. It is guaranteed that transaction is
592 already in a progress for this connection.
593 tables : `~collections.abc.Iterable` [ `sqlalchemy.schema.Table` ], \
594 optional
595 A list of tables to lock for the duration of this transaction.
596 These locks are guaranteed to prevent concurrent writes and allow
597 this transaction (only) to acquire the same locks (others should
598 block), but only prevent concurrent reads if the database engine
599 requires that in order to block concurrent writes.
600 """
601 raise NotImplementedError()
603 def isTableWriteable(self, table: sqlalchemy.schema.Table) -> bool:
604 """Check whether a table is writeable, either because the database
605 connection is read-write or the table is a temporary table.
607 Parameters
608 ----------
609 table : `sqlalchemy.schema.Table`
610 SQLAlchemy table object to check.
612 Returns
613 -------
614 writeable : `bool`
615 Whether this table is writeable.
616 """
617 return self.isWriteable() or table.key in self._temp_tables
619 def assertTableWriteable(self, table: sqlalchemy.schema.Table, msg: str) -> None:
620 """Raise if the given table is not writeable, either because the
621 database connection is read-write or the table is a temporary table.
623 Parameters
624 ----------
625 table : `sqlalchemy.schema.Table`
626 SQLAlchemy table object to check.
627 msg : `str`, optional
628 If provided, raise `ReadOnlyDatabaseError` instead of returning
629 `False`, with this message.
630 """
631 if not self.isTableWriteable(table):
632 raise ReadOnlyDatabaseError(msg)
634 @contextmanager
635 def declareStaticTables(self, *, create: bool) -> Iterator[StaticTablesContext]:
636 """Return a context manager in which the database's static DDL schema
637 can be declared.
639 Parameters
640 ----------
641 create : `bool`
642 If `True`, attempt to create all tables at the end of the context.
643 If `False`, they will be assumed to already exist.
645 Returns
646 -------
647 schema : `StaticTablesContext`
648 A helper object that is used to add new tables.
650 Raises
651 ------
652 ReadOnlyDatabaseError
653 Raised if ``create`` is `True`, `Database.isWriteable` is `False`,
654 and one or more declared tables do not already exist.
656 Examples
657 --------
658 Given a `Database` instance ``db``::
660 with db.declareStaticTables(create=True) as schema:
661 schema.addTable("table1", TableSpec(...))
662 schema.addTable("table2", TableSpec(...))
664 Notes
665 -----
666 A database's static DDL schema must be declared before any dynamic
667 tables are managed via calls to `ensureTableExists` or
668 `getExistingTable`. The order in which static schema tables are added
669 inside the context block is unimportant; they will automatically be
670 sorted and added in an order consistent with their foreign key
671 relationships.
672 """
673 if create and not self.isWriteable():
674 raise ReadOnlyDatabaseError(f"Cannot create tables in read-only database {self}.")
675 self._metadata = sqlalchemy.MetaData(schema=self.namespace)
676 try:
677 with self._transaction() as (_, connection):
678 context = StaticTablesContext(self, connection)
679 if create and context._tableNames:
680 # Looks like database is already initalized, to avoid
681 # danger of modifying/destroying valid schema we refuse to
682 # do anything in this case
683 raise SchemaAlreadyDefinedError(f"Cannot create tables in non-empty database {self}.")
684 yield context
685 for table, foreignKey in context._foreignKeys:
686 table.append_constraint(foreignKey)
687 if create:
688 if (
689 self.namespace is not None
690 and self.namespace not in context._inspector.get_schema_names()
691 ):
692 connection.execute(sqlalchemy.schema.CreateSchema(self.namespace))
693 # In our tables we have columns that make use of sqlalchemy
694 # Sequence objects. There is currently a bug in sqlalchemy
695 # that causes a deprecation warning to be thrown on a
696 # property of the Sequence object when the repr for the
697 # sequence is created. Here a filter is used to catch these
698 # deprecation warnings when tables are created.
699 with warnings.catch_warnings():
700 warnings.simplefilter("ignore", category=sqlalchemy.exc.SADeprecationWarning)
701 self._metadata.create_all(connection)
702 # call all initializer methods sequentially
703 for init in context._initializers:
704 init(self)
705 except BaseException:
706 self._metadata = None
707 raise
709 @abstractmethod
710 def isWriteable(self) -> bool:
711 """Return `True` if this database can be modified by this client."""
712 raise NotImplementedError()
714 @abstractmethod
715 def __str__(self) -> str:
716 """Return a human-readable identifier for this `Database`, including
717 any namespace or schema that identifies its names within a `Registry`.
718 """
719 raise NotImplementedError()
721 @property
722 def dialect(self) -> sqlalchemy.engine.Dialect:
723 """The SQLAlchemy dialect for this database engine
724 (`sqlalchemy.engine.Dialect`).
725 """
726 return self._engine.dialect
728 def shrinkDatabaseEntityName(self, original: str) -> str:
729 """Return a version of the given name that fits within this database
730 engine's length limits for table, constraint, indexes, and sequence
731 names.
733 Implementations should not assume that simple truncation is safe,
734 because multiple long names often begin with the same prefix.
736 The default implementation simply returns the given name.
738 Parameters
739 ----------
740 original : `str`
741 The original name.
743 Returns
744 -------
745 shrunk : `str`
746 The new, possibly shortened name.
747 """
748 return original
750 def expandDatabaseEntityName(self, shrunk: str) -> str:
751 """Retrieve the original name for a database entity that was too long
752 to fit within the database engine's limits.
754 Parameters
755 ----------
756 original : `str`
757 The original name.
759 Returns
760 -------
761 shrunk : `str`
762 The new, possibly shortened name.
763 """
764 return shrunk
766 def _mangleTableName(self, name: str) -> str:
767 """Map a logical, user-visible table name to the true table name used
768 in the database.
770 The default implementation returns the given name unchanged.
772 Parameters
773 ----------
774 name : `str`
775 Input table name. Should not include a namespace (i.e. schema)
776 prefix.
778 Returns
779 -------
780 mangled : `str`
781 Mangled version of the table name (still with no namespace prefix).
783 Notes
784 -----
785 Reimplementations of this method must be idempotent - mangling an
786 already-mangled name must have no effect.
787 """
788 return name
790 def _makeColumnConstraints(self, table: str, spec: ddl.FieldSpec) -> list[sqlalchemy.CheckConstraint]:
791 """Create constraints based on this spec.
793 Parameters
794 ----------
795 table : `str`
796 Name of the table this column is being added to.
797 spec : `FieldSpec`
798 Specification for the field to be added.
800 Returns
801 -------
802 constraint : `list` of `sqlalchemy.CheckConstraint`
803 Constraint added for this column.
804 """
805 # By default we return no additional constraints
806 return []
808 def _convertFieldSpec(
809 self, table: str, spec: ddl.FieldSpec, metadata: sqlalchemy.MetaData, **kwargs: Any
810 ) -> sqlalchemy.schema.Column:
811 """Convert a `FieldSpec` to a `sqlalchemy.schema.Column`.
813 Parameters
814 ----------
815 table : `str`
816 Name of the table this column is being added to.
817 spec : `FieldSpec`
818 Specification for the field to be added.
819 metadata : `sqlalchemy.MetaData`
820 SQLAlchemy representation of the DDL schema this field's table is
821 being added to.
822 **kwargs
823 Additional keyword arguments to forward to the
824 `sqlalchemy.schema.Column` constructor. This is provided to make
825 it easier for derived classes to delegate to ``super()`` while
826 making only minor changes.
828 Returns
829 -------
830 column : `sqlalchemy.schema.Column`
831 SQLAlchemy representation of the field.
832 """
833 args = []
834 if spec.autoincrement:
835 # Generate a sequence to use for auto incrementing for databases
836 # that do not support it natively. This will be ignored by
837 # sqlalchemy for databases that do support it.
838 args.append(
839 sqlalchemy.Sequence(
840 self.shrinkDatabaseEntityName(f"{table}_seq_{spec.name}"), metadata=metadata
841 )
842 )
843 assert spec.doc is None or isinstance(spec.doc, str), f"Bad doc for {table}.{spec.name}."
844 return sqlalchemy.schema.Column(
845 spec.name,
846 spec.getSizedColumnType(),
847 *args,
848 nullable=spec.nullable,
849 primary_key=spec.primaryKey,
850 comment=spec.doc,
851 server_default=spec.default,
852 **kwargs,
853 )
855 def _convertForeignKeySpec(
856 self, table: str, spec: ddl.ForeignKeySpec, metadata: sqlalchemy.MetaData, **kwargs: Any
857 ) -> sqlalchemy.schema.ForeignKeyConstraint:
858 """Convert a `ForeignKeySpec` to a
859 `sqlalchemy.schema.ForeignKeyConstraint`.
861 Parameters
862 ----------
863 table : `str`
864 Name of the table this foreign key is being added to.
865 spec : `ForeignKeySpec`
866 Specification for the foreign key to be added.
867 metadata : `sqlalchemy.MetaData`
868 SQLAlchemy representation of the DDL schema this constraint is
869 being added to.
870 **kwargs
871 Additional keyword arguments to forward to the
872 `sqlalchemy.schema.ForeignKeyConstraint` constructor. This is
873 provided to make it easier for derived classes to delegate to
874 ``super()`` while making only minor changes.
876 Returns
877 -------
878 constraint : `sqlalchemy.schema.ForeignKeyConstraint`
879 SQLAlchemy representation of the constraint.
880 """
881 name = self.shrinkDatabaseEntityName(
882 "_".join(
883 ["fkey", table, self._mangleTableName(spec.table)] + list(spec.target) + list(spec.source)
884 )
885 )
886 return sqlalchemy.schema.ForeignKeyConstraint(
887 spec.source,
888 [f"{self._mangleTableName(spec.table)}.{col}" for col in spec.target],
889 name=name,
890 ondelete=spec.onDelete,
891 )
893 def _convertExclusionConstraintSpec(
894 self,
895 table: str,
896 spec: tuple[str | type[TimespanDatabaseRepresentation], ...],
897 metadata: sqlalchemy.MetaData,
898 ) -> sqlalchemy.schema.Constraint:
899 """Convert a `tuple` from `ddl.TableSpec.exclusion` into a SQLAlchemy
900 constraint representation.
902 Parameters
903 ----------
904 table : `str`
905 Name of the table this constraint is being added to.
906 spec : `tuple` [ `str` or `type` ]
907 A tuple of `str` column names and the `type` object returned by
908 `getTimespanRepresentation` (which must appear exactly once),
909 indicating the order of the columns in the index used to back the
910 constraint.
911 metadata : `sqlalchemy.MetaData`
912 SQLAlchemy representation of the DDL schema this constraint is
913 being added to.
915 Returns
916 -------
917 constraint : `sqlalchemy.schema.Constraint`
918 SQLAlchemy representation of the constraint.
920 Raises
921 ------
922 NotImplementedError
923 Raised if this database does not support exclusion constraints.
924 """
925 raise NotImplementedError(f"Database {self} does not support exclusion constraints.")
927 def _convertTableSpec(
928 self, name: str, spec: ddl.TableSpec, metadata: sqlalchemy.MetaData, **kwargs: Any
929 ) -> sqlalchemy.schema.Table:
930 """Convert a `TableSpec` to a `sqlalchemy.schema.Table`.
932 Parameters
933 ----------
934 spec : `TableSpec`
935 Specification for the foreign key to be added.
936 metadata : `sqlalchemy.MetaData`
937 SQLAlchemy representation of the DDL schema this table is being
938 added to.
939 **kwargs
940 Additional keyword arguments to forward to the
941 `sqlalchemy.schema.Table` constructor. This is provided to make it
942 easier for derived classes to delegate to ``super()`` while making
943 only minor changes.
945 Returns
946 -------
947 table : `sqlalchemy.schema.Table`
948 SQLAlchemy representation of the table.
950 Notes
951 -----
952 This method does not handle ``spec.foreignKeys`` at all, in order to
953 avoid circular dependencies. These are added by higher-level logic in
954 `ensureTableExists`, `getExistingTable`, and `declareStaticTables`.
955 """
956 name = self._mangleTableName(name)
957 args: list[sqlalchemy.schema.SchemaItem] = [
958 self._convertFieldSpec(name, fieldSpec, metadata) for fieldSpec in spec.fields
959 ]
961 # Add any column constraints
962 for fieldSpec in spec.fields:
963 args.extend(self._makeColumnConstraints(name, fieldSpec))
965 # Track indexes added for primary key and unique constraints, to make
966 # sure we don't add duplicate explicit or foreign key indexes for
967 # those.
968 allIndexes = {tuple(fieldSpec.name for fieldSpec in spec.fields if fieldSpec.primaryKey)}
969 args.extend(
970 sqlalchemy.schema.UniqueConstraint(
971 *columns, name=self.shrinkDatabaseEntityName("_".join([name, "unq"] + list(columns)))
972 )
973 for columns in spec.unique
974 )
975 allIndexes.update(spec.unique)
976 args.extend(
977 sqlalchemy.schema.Index(
978 self.shrinkDatabaseEntityName("_".join([name, "idx"] + list(index.columns))),
979 *index.columns,
980 unique=(index.columns in spec.unique),
981 **index.kwargs,
982 )
983 for index in spec.indexes
984 if index.columns not in allIndexes
985 )
986 allIndexes.update(index.columns for index in spec.indexes)
987 args.extend(
988 sqlalchemy.schema.Index(
989 self.shrinkDatabaseEntityName("_".join((name, "fkidx") + fk.source)),
990 *fk.source,
991 )
992 for fk in spec.foreignKeys
993 if fk.addIndex and fk.source not in allIndexes
994 )
996 args.extend(self._convertExclusionConstraintSpec(name, excl, metadata) for excl in spec.exclusion)
998 assert spec.doc is None or isinstance(spec.doc, str), f"Bad doc for {name}."
999 return sqlalchemy.schema.Table(name, metadata, *args, comment=spec.doc, info={"spec": spec}, **kwargs)
1001 def ensureTableExists(self, name: str, spec: ddl.TableSpec) -> sqlalchemy.schema.Table:
1002 """Ensure that a table with the given name and specification exists,
1003 creating it if necessary.
1005 Parameters
1006 ----------
1007 name : `str`
1008 Name of the table (not including namespace qualifiers).
1009 spec : `TableSpec`
1010 Specification for the table. This will be used when creating the
1011 table, and *may* be used when obtaining an existing table to check
1012 for consistency, but no such check is guaranteed.
1014 Returns
1015 -------
1016 table : `sqlalchemy.schema.Table`
1017 SQLAlchemy representation of the table.
1019 Raises
1020 ------
1021 ReadOnlyDatabaseError
1022 Raised if `isWriteable` returns `False`, and the table does not
1023 already exist.
1024 DatabaseConflictError
1025 Raised if the table exists but ``spec`` is inconsistent with its
1026 definition.
1028 Notes
1029 -----
1030 This method may not be called within transactions. It may be called on
1031 read-only databases if and only if the table does in fact already
1032 exist.
1034 Subclasses may override this method, but usually should not need to.
1035 """
1036 # TODO: if _engine is used to make a table then it uses separate
1037 # connection and should not interfere with current transaction
1038 assert (
1039 self._session_connection is None or not self._session_connection.in_transaction()
1040 ), "Table creation interrupts transactions."
1041 assert self._metadata is not None, "Static tables must be declared before dynamic tables."
1042 table = self.getExistingTable(name, spec)
1043 if table is not None:
1044 return table
1045 if not self.isWriteable():
1046 raise ReadOnlyDatabaseError(
1047 f"Table {name} does not exist, and cannot be created because database {self} is read-only."
1048 )
1049 table = self._convertTableSpec(name, spec, self._metadata)
1050 for foreignKeySpec in spec.foreignKeys:
1051 table.append_constraint(self._convertForeignKeySpec(name, foreignKeySpec, self._metadata))
1052 try:
1053 with self._transaction() as (_, connection):
1054 table.create(connection)
1055 except sqlalchemy.exc.DatabaseError:
1056 # Some other process could have created the table meanwhile, which
1057 # usually causes OperationalError or ProgrammingError. We cannot
1058 # use IF NOT EXISTS clause in this case due to PostgreSQL race
1059 # condition on server side which causes IntegrityError. Instead we
1060 # catch these exceptions (they all inherit DatabaseError) and
1061 # re-check whether table is now there.
1062 table = self.getExistingTable(name, spec)
1063 if table is None:
1064 raise
1065 return table
1067 def getExistingTable(self, name: str, spec: ddl.TableSpec) -> sqlalchemy.schema.Table | None:
1068 """Obtain an existing table with the given name and specification.
1070 Parameters
1071 ----------
1072 name : `str`
1073 Name of the table (not including namespace qualifiers).
1074 spec : `TableSpec`
1075 Specification for the table. This will be used when creating the
1076 SQLAlchemy representation of the table, and it is used to
1077 check that the actual table in the database is consistent.
1079 Returns
1080 -------
1081 table : `sqlalchemy.schema.Table` or `None`
1082 SQLAlchemy representation of the table, or `None` if it does not
1083 exist.
1085 Raises
1086 ------
1087 DatabaseConflictError
1088 Raised if the table exists but ``spec`` is inconsistent with its
1089 definition.
1091 Notes
1092 -----
1093 This method can be called within transactions and never modifies the
1094 database.
1096 Subclasses may override this method, but usually should not need to.
1097 """
1098 assert self._metadata is not None, "Static tables must be declared before dynamic tables."
1099 name = self._mangleTableName(name)
1100 table = self._metadata.tables.get(name if self.namespace is None else f"{self.namespace}.{name}")
1101 if table is not None:
1102 if spec.fields.names != set(table.columns.keys()):
1103 raise DatabaseConflictError(
1104 f"Table '{name}' has already been defined differently; the new "
1105 f"specification has columns {list(spec.fields.names)}, while "
1106 f"the previous definition has {list(table.columns.keys())}."
1107 )
1108 else:
1109 inspector = sqlalchemy.inspect(
1110 self._engine if self._session_connection is None else self._session_connection, raiseerr=True
1111 )
1112 if name in inspector.get_table_names(schema=self.namespace):
1113 _checkExistingTableDefinition(name, spec, inspector.get_columns(name, schema=self.namespace))
1114 table = self._convertTableSpec(name, spec, self._metadata)
1115 for foreignKeySpec in spec.foreignKeys:
1116 table.append_constraint(self._convertForeignKeySpec(name, foreignKeySpec, self._metadata))
1117 return table
1118 return table
1120 def _make_temporary_table(
1121 self,
1122 connection: sqlalchemy.engine.Connection,
1123 spec: ddl.TableSpec,
1124 name: str | None = None,
1125 **kwargs: Any,
1126 ) -> sqlalchemy.schema.Table:
1127 """Create a temporary table.
1129 Parameters
1130 ----------
1131 connection : `sqlalchemy.engine.Connection`
1132 Connection to use when creating the table.
1133 spec : `TableSpec`
1134 Specification for the table.
1135 name : `str`, optional
1136 A unique (within this session/connetion) name for the table.
1137 Subclasses may override to modify the actual name used. If not
1138 provided, a unique name will be generated.
1139 **kwargs
1140 Additional keyword arguments to forward to the
1141 `sqlalchemy.schema.Table` constructor. This is provided to make it
1142 easier for derived classes to delegate to ``super()`` while making
1143 only minor changes.
1145 Returns
1146 -------
1147 table : `sqlalchemy.schema.Table`
1148 SQLAlchemy representation of the table.
1149 """
1150 if name is None:
1151 name = f"tmp_{uuid.uuid4().hex}"
1152 metadata = self._metadata
1153 if metadata is None:
1154 raise RuntimeError("Cannot create temporary table before static schema is defined.")
1155 table = self._convertTableSpec(
1156 name, spec, metadata, prefixes=["TEMPORARY"], schema=sqlalchemy.schema.BLANK_SCHEMA, **kwargs
1157 )
1158 if table.key in self._temp_tables and table.key != name:
1159 raise ValueError(
1160 f"A temporary table with name {name} (transformed to {table.key} by "
1161 "Database) already exists."
1162 )
1163 for foreignKeySpec in spec.foreignKeys:
1164 table.append_constraint(self._convertForeignKeySpec(name, foreignKeySpec, metadata))
1165 with self._transaction():
1166 table.create(connection)
1167 return table
1169 @classmethod
1170 def getTimespanRepresentation(cls) -> type[TimespanDatabaseRepresentation]:
1171 """Return a `type` that encapsulates the way `Timespan` objects are
1172 stored in this database.
1174 `Database` does not automatically use the return type of this method
1175 anywhere else; calling code is responsible for making sure that DDL
1176 and queries are consistent with it.
1178 Returns
1179 -------
1180 TimespanReprClass : `type` (`TimespanDatabaseRepresention` subclass)
1181 A type that encapsulates the way `Timespan` objects should be
1182 stored in this database.
1184 Notes
1185 -----
1186 There are two big reasons we've decided to keep timespan-mangling logic
1187 outside the `Database` implementations, even though the choice of
1188 representation is ultimately up to a `Database` implementation:
1190 - Timespans appear in relatively few tables and queries in our
1191 typical usage, and the code that operates on them is already aware
1192 that it is working with timespans. In contrast, a
1193 timespan-representation-aware implementation of, say, `insert`,
1194 would need to have extra logic to identify when timespan-mangling
1195 needed to occur, which would usually be useless overhead.
1197 - SQLAlchemy's rich SELECT query expression system has no way to wrap
1198 multiple columns in a single expression object (the ORM does, but
1199 we are not using the ORM). So we would have to wrap _much_ more of
1200 that code in our own interfaces to encapsulate timespan
1201 representations there.
1202 """
1203 return TimespanDatabaseRepresentation.Compound
1205 def sync(
1206 self,
1207 table: sqlalchemy.schema.Table,
1208 *,
1209 keys: dict[str, Any],
1210 compared: dict[str, Any] | None = None,
1211 extra: dict[str, Any] | None = None,
1212 returning: Sequence[str] | None = None,
1213 update: bool = False,
1214 ) -> tuple[dict[str, Any] | None, bool | dict[str, Any]]:
1215 """Insert into a table as necessary to ensure database contains
1216 values equivalent to the given ones.
1218 Parameters
1219 ----------
1220 table : `sqlalchemy.schema.Table`
1221 Table to be queried and possibly inserted into.
1222 keys : `dict`
1223 Column name-value pairs used to search for an existing row; must
1224 be a combination that can be used to select a single row if one
1225 exists. If such a row does not exist, these values are used in
1226 the insert.
1227 compared : `dict`, optional
1228 Column name-value pairs that are compared to those in any existing
1229 row. If such a row does not exist, these rows are used in the
1230 insert.
1231 extra : `dict`, optional
1232 Column name-value pairs that are ignored if a matching row exists,
1233 but used in an insert if one is necessary.
1234 returning : `~collections.abc.Sequence` of `str`, optional
1235 The names of columns whose values should be returned.
1236 update : `bool`, optional
1237 If `True` (`False` is default), update the existing row with the
1238 values in ``compared`` instead of raising `DatabaseConflictError`.
1240 Returns
1241 -------
1242 row : `dict`, optional
1243 The value of the fields indicated by ``returning``, or `None` if
1244 ``returning`` is `None`.
1245 inserted_or_updated : `bool` or `dict`
1246 If `True`, a new row was inserted; if `False`, a matching row
1247 already existed. If a `dict` (only possible if ``update=True``),
1248 then an existing row was updated, and the dict maps the names of
1249 the updated columns to their *old* values (new values can be
1250 obtained from ``compared``).
1252 Raises
1253 ------
1254 DatabaseConflictError
1255 Raised if the values in ``compared`` do not match the values in the
1256 database.
1257 ReadOnlyDatabaseError
1258 Raised if `isWriteable` returns `False`, and no matching record
1259 already exists.
1261 Notes
1262 -----
1263 May be used inside transaction contexts, so implementations may not
1264 perform operations that interrupt transactions.
1266 It may be called on read-only databases if and only if the matching row
1267 does in fact already exist.
1268 """
1270 def check() -> tuple[int, dict[str, Any] | None, list | None]:
1271 """Query for a row that matches the ``key`` argument, and compare
1272 to what was given by the caller.
1274 Returns
1275 -------
1276 n : `int`
1277 Number of matching rows. ``n != 1`` is always an error, but
1278 it's a different kind of error depending on where `check` is
1279 being called.
1280 bad : `dict` or `None`
1281 The subset of the keys of ``compared`` for which the existing
1282 values did not match the given one, mapped to the existing
1283 values in the database. Once again, ``not bad`` is always an
1284 error, but a different kind on context. `None` if ``n != 1``
1285 result : `list` or `None`
1286 Results in the database that correspond to the columns given
1287 in ``returning``, or `None` if ``returning is None``.
1288 """
1289 toSelect: set[str] = set()
1290 if compared is not None:
1291 toSelect.update(compared.keys())
1292 if returning is not None:
1293 toSelect.update(returning)
1294 if not toSelect:
1295 # Need to select some column, even if we just want to see
1296 # how many rows we get back.
1297 toSelect.add(next(iter(keys.keys())))
1298 selectSql = (
1299 sqlalchemy.sql.select(*[table.columns[k].label(k) for k in toSelect])
1300 .select_from(table)
1301 .where(sqlalchemy.sql.and_(*[table.columns[k] == v for k, v in keys.items()]))
1302 )
1303 with self._transaction() as (_, connection):
1304 fetched = list(connection.execute(selectSql).mappings())
1305 if len(fetched) != 1:
1306 return len(fetched), None, None
1307 existing = fetched[0]
1308 if compared is not None:
1310 def safeNotEqual(a: Any, b: Any) -> bool:
1311 if isinstance(a, astropy.time.Time):
1312 return not time_utils.TimeConverter().times_equal(a, b)
1313 return a != b
1315 inconsistencies = {
1316 k: existing[k] for k, v in compared.items() if safeNotEqual(existing[k], v)
1317 }
1318 else:
1319 inconsistencies = {}
1320 if returning is not None:
1321 toReturn: list | None = [existing[k] for k in returning]
1322 else:
1323 toReturn = None
1324 return 1, inconsistencies, toReturn
1326 def format_bad(inconsistencies: dict[str, Any]) -> str:
1327 """Format the 'bad' dictionary of existing values returned by
1328 ``check`` into a string suitable for an error message.
1329 """
1330 assert compared is not None, "Should not be able to get inconsistencies without comparing."
1331 return ", ".join(f"{k}: {v!r} != {compared[k]!r}" for k, v in inconsistencies.items())
1333 if self.isTableWriteable(table):
1334 # Try an insert first, but allow it to fail (in only specific
1335 # ways).
1336 row = keys.copy()
1337 if compared is not None:
1338 row.update(compared)
1339 if extra is not None:
1340 row.update(extra)
1341 with self.transaction():
1342 inserted = bool(self.ensure(table, row))
1343 inserted_or_updated: bool | dict[str, Any]
1344 # Need to perform check() for this branch inside the
1345 # transaction, so we roll back an insert that didn't do
1346 # what we expected. That limits the extent to which we
1347 # can reduce duplication between this block and the other
1348 # ones that perform similar logic.
1349 n, bad, result = check()
1350 if n < 1:
1351 raise ConflictingDefinitionError(
1352 f"Attempted to ensure {row} exists by inserting it with ON CONFLICT IGNORE, "
1353 f"but a post-insert query on {keys} returned no results. "
1354 f"Insert was {'' if inserted else 'not '}reported as successful. "
1355 "This can occur if the insert violated a database constraint other than the "
1356 "unique constraint or primary key used to identify the row in this call."
1357 )
1358 elif n > 1:
1359 raise RuntimeError(
1360 f"Keys passed to sync {keys.keys()} do not comprise a "
1361 f"unique constraint for table {table.name}."
1362 )
1363 elif bad:
1364 assert (
1365 compared is not None
1366 ), "Should not be able to get inconsistencies without comparing."
1367 if inserted:
1368 raise RuntimeError(
1369 f"Conflict ({bad}) in sync after successful insert; this is "
1370 "possible if the same table is being updated by a concurrent "
1371 "process that isn't using sync, but it may also be a bug in "
1372 "daf_butler."
1373 )
1374 elif update:
1375 with self._transaction() as (_, connection):
1376 connection.execute(
1377 table.update()
1378 .where(sqlalchemy.sql.and_(*[table.columns[k] == v for k, v in keys.items()]))
1379 .values(**{k: compared[k] for k in bad})
1380 )
1381 inserted_or_updated = bad
1382 else:
1383 raise DatabaseConflictError(
1384 f"Conflict in sync for table {table.name} on column(s) {format_bad(bad)}."
1385 )
1386 else:
1387 inserted_or_updated = inserted
1388 else:
1389 # Database is not writeable; just see if the row exists.
1390 n, bad, result = check()
1391 if n < 1:
1392 raise ReadOnlyDatabaseError("sync needs to insert, but database is read-only.")
1393 elif n > 1:
1394 raise RuntimeError("Keys passed to sync do not comprise a unique constraint.")
1395 elif bad:
1396 if update:
1397 raise ReadOnlyDatabaseError("sync needs to update, but database is read-only.")
1398 else:
1399 raise DatabaseConflictError(
1400 f"Conflict in sync for table {table.name} on column(s) {format_bad(bad)}."
1401 )
1402 inserted_or_updated = False
1403 if returning is None:
1404 return None, inserted_or_updated
1405 else:
1406 assert result is not None
1407 return {k: v for k, v in zip(returning, result, strict=True)}, inserted_or_updated
1409 def insert(
1410 self,
1411 table: sqlalchemy.schema.Table,
1412 *rows: dict,
1413 returnIds: bool = False,
1414 select: sqlalchemy.sql.expression.SelectBase | None = None,
1415 names: Iterable[str] | None = None,
1416 ) -> list[int] | None:
1417 """Insert one or more rows into a table, optionally returning
1418 autoincrement primary key values.
1420 Parameters
1421 ----------
1422 table : `sqlalchemy.schema.Table`
1423 Table rows should be inserted into.
1424 returnIds: `bool`
1425 If `True` (`False` is default), return the values of the table's
1426 autoincrement primary key field (which much exist).
1427 select : `sqlalchemy.sql.SelectBase`, optional
1428 A SELECT query expression to insert rows from. Cannot be provided
1429 with either ``rows`` or ``returnIds=True``.
1430 names : `~collections.abc.Iterable` [ `str` ], optional
1431 Names of columns in ``table`` to be populated, ordered to match the
1432 columns returned by ``select``. Ignored if ``select`` is `None`.
1433 If not provided, the columns returned by ``select`` must be named
1434 to match the desired columns of ``table``.
1435 *rows
1436 Positional arguments are the rows to be inserted, as dictionaries
1437 mapping column name to value. The keys in all dictionaries must
1438 be the same.
1440 Returns
1441 -------
1442 ids : `None`, or `list` of `int`
1443 If ``returnIds`` is `True`, a `list` containing the inserted
1444 values for the table's autoincrement primary key.
1446 Raises
1447 ------
1448 ReadOnlyDatabaseError
1449 Raised if `isWriteable` returns `False` when this method is called.
1451 Notes
1452 -----
1453 The default implementation uses bulk insert syntax when ``returnIds``
1454 is `False`, and a loop over single-row insert operations when it is
1455 `True`.
1457 Derived classes should reimplement when they can provide a more
1458 efficient implementation (especially for the latter case).
1460 May be used inside transaction contexts, so implementations may not
1461 perform operations that interrupt transactions.
1462 """
1463 self.assertTableWriteable(table, f"Cannot insert into read-only table {table}.")
1464 if select is not None and (rows or returnIds):
1465 raise TypeError("'select' is incompatible with passing value rows or returnIds=True.")
1466 if not rows and select is None:
1467 if returnIds:
1468 return []
1469 else:
1470 return None
1471 with self._transaction() as (_, connection):
1472 if not returnIds:
1473 if select is not None:
1474 if names is None:
1475 # columns() is deprecated since 1.4, but
1476 # selected_columns() method did not exist in 1.3.
1477 if hasattr(select, "selected_columns"):
1478 names = select.selected_columns.keys()
1479 else:
1480 names = select.columns.keys()
1481 connection.execute(table.insert().from_select(list(names), select))
1482 else:
1483 connection.execute(table.insert(), rows)
1484 return None
1485 else:
1486 sql = table.insert()
1487 return [connection.execute(sql, row).inserted_primary_key[0] for row in rows]
1489 @abstractmethod
1490 def replace(self, table: sqlalchemy.schema.Table, *rows: dict) -> None:
1491 """Insert one or more rows into a table, replacing any existing rows
1492 for which insertion of a new row would violate the primary key
1493 constraint.
1495 Parameters
1496 ----------
1497 table : `sqlalchemy.schema.Table`
1498 Table rows should be inserted into.
1499 *rows
1500 Positional arguments are the rows to be inserted, as dictionaries
1501 mapping column name to value. The keys in all dictionaries must
1502 be the same.
1504 Raises
1505 ------
1506 ReadOnlyDatabaseError
1507 Raised if `isWriteable` returns `False` when this method is called.
1509 Notes
1510 -----
1511 May be used inside transaction contexts, so implementations may not
1512 perform operations that interrupt transactions.
1514 Implementations should raise a `sqlalchemy.exc.IntegrityError`
1515 exception when a constraint other than the primary key would be
1516 violated.
1518 Implementations are not required to support `replace` on tables
1519 with autoincrement keys.
1520 """
1521 raise NotImplementedError()
1523 @abstractmethod
1524 def ensure(self, table: sqlalchemy.schema.Table, *rows: dict, primary_key_only: bool = False) -> int:
1525 """Insert one or more rows into a table, skipping any rows for which
1526 insertion would violate a unique constraint.
1528 Parameters
1529 ----------
1530 table : `sqlalchemy.schema.Table`
1531 Table rows should be inserted into.
1532 *rows
1533 Positional arguments are the rows to be inserted, as dictionaries
1534 mapping column name to value. The keys in all dictionaries must
1535 be the same.
1536 primary_key_only : `bool`, optional
1537 If `True` (`False` is default), only skip rows that violate the
1538 primary key constraint, and raise an exception (and rollback
1539 transactions) for other constraint violations.
1541 Returns
1542 -------
1543 count : `int`
1544 The number of rows actually inserted.
1546 Raises
1547 ------
1548 ReadOnlyDatabaseError
1549 Raised if `isWriteable` returns `False` when this method is called.
1550 This is raised even if the operation would do nothing even on a
1551 writeable database.
1553 Notes
1554 -----
1555 May be used inside transaction contexts, so implementations may not
1556 perform operations that interrupt transactions.
1558 Implementations are not required to support `ensure` on tables
1559 with autoincrement keys.
1560 """
1561 raise NotImplementedError()
1563 def delete(self, table: sqlalchemy.schema.Table, columns: Iterable[str], *rows: dict) -> int:
1564 """Delete one or more rows from a table.
1566 Parameters
1567 ----------
1568 table : `sqlalchemy.schema.Table`
1569 Table that rows should be deleted from.
1570 columns: `~collections.abc.Iterable` of `str`
1571 The names of columns that will be used to constrain the rows to
1572 be deleted; these will be combined via ``AND`` to form the
1573 ``WHERE`` clause of the delete query.
1574 *rows
1575 Positional arguments are the keys of rows to be deleted, as
1576 dictionaries mapping column name to value. The keys in all
1577 dictionaries must be exactly the names in ``columns``.
1579 Returns
1580 -------
1581 count : `int`
1582 Number of rows deleted.
1584 Raises
1585 ------
1586 ReadOnlyDatabaseError
1587 Raised if `isWriteable` returns `False` when this method is called.
1589 Notes
1590 -----
1591 May be used inside transaction contexts, so implementations may not
1592 perform operations that interrupt transactions.
1594 The default implementation should be sufficient for most derived
1595 classes.
1596 """
1597 self.assertTableWriteable(table, f"Cannot delete from read-only table {table}.")
1598 if columns and not rows:
1599 # If there are no columns, this operation is supposed to delete
1600 # everything (so we proceed as usual). But if there are columns,
1601 # but no rows, it was a constrained bulk operation where the
1602 # constraint is that no rows match, and we should short-circuit
1603 # while reporting that no rows were affected.
1604 return 0
1605 sql = table.delete()
1606 columns = list(columns) # Force iterators to list
1608 # More efficient to use IN operator if there is only one
1609 # variable changing across all rows.
1610 content: dict[str, set] = defaultdict(set)
1611 if len(columns) == 1:
1612 # Nothing to calculate since we can always use IN
1613 column = columns[0]
1614 changing_columns = [column]
1615 content[column] = {row[column] for row in rows}
1616 else:
1617 for row in rows:
1618 for k, v in row.items():
1619 content[k].add(v)
1620 changing_columns = [col for col, values in content.items() if len(values) > 1]
1622 if len(changing_columns) != 1:
1623 # More than one column changes each time so do explicit bind
1624 # parameters and have each row processed separately.
1625 whereTerms = [table.columns[name] == sqlalchemy.sql.bindparam(name) for name in columns]
1626 if whereTerms:
1627 sql = sql.where(sqlalchemy.sql.and_(*whereTerms))
1628 with self._transaction() as (_, connection):
1629 return connection.execute(sql, rows).rowcount
1630 else:
1631 # One of the columns has changing values but any others are
1632 # fixed. In this case we can use an IN operator and be more
1633 # efficient.
1634 name = changing_columns.pop()
1636 # Simple where clause for the unchanging columns
1637 clauses = []
1638 for k, v in content.items():
1639 if k == name:
1640 continue
1641 column = table.columns[k]
1642 # The set only has one element
1643 clauses.append(column == v.pop())
1645 # The IN operator will not work for "infinite" numbers of
1646 # rows so must batch it up into distinct calls.
1647 in_content = list(content[name])
1648 n_elements = len(in_content)
1650 rowcount = 0
1651 iposn = 0
1652 n_per_loop = 1_000 # Controls how many items to put in IN clause
1653 with self._transaction() as (_, connection):
1654 for iposn in range(0, n_elements, n_per_loop):
1655 endpos = iposn + n_per_loop
1656 in_clause = table.columns[name].in_(in_content[iposn:endpos])
1658 newsql = sql.where(sqlalchemy.sql.and_(*clauses, in_clause))
1659 rowcount += connection.execute(newsql).rowcount
1660 return rowcount
1662 def deleteWhere(self, table: sqlalchemy.schema.Table, where: sqlalchemy.sql.ColumnElement) -> int:
1663 """Delete rows from a table with pre-constructed WHERE clause.
1665 Parameters
1666 ----------
1667 table : `sqlalchemy.schema.Table`
1668 Table that rows should be deleted from.
1669 where: `sqlalchemy.sql.ClauseElement`
1670 The names of columns that will be used to constrain the rows to
1671 be deleted; these will be combined via ``AND`` to form the
1672 ``WHERE`` clause of the delete query.
1674 Returns
1675 -------
1676 count : `int`
1677 Number of rows deleted.
1679 Raises
1680 ------
1681 ReadOnlyDatabaseError
1682 Raised if `isWriteable` returns `False` when this method is called.
1684 Notes
1685 -----
1686 May be used inside transaction contexts, so implementations may not
1687 perform operations that interrupt transactions.
1689 The default implementation should be sufficient for most derived
1690 classes.
1691 """
1692 self.assertTableWriteable(table, f"Cannot delete from read-only table {table}.")
1694 sql = table.delete().where(where)
1695 with self._transaction() as (_, connection):
1696 return connection.execute(sql).rowcount
1698 def update(self, table: sqlalchemy.schema.Table, where: dict[str, str], *rows: dict) -> int:
1699 """Update one or more rows in a table.
1701 Parameters
1702 ----------
1703 table : `sqlalchemy.schema.Table`
1704 Table containing the rows to be updated.
1705 where : `dict` [`str`, `str`]
1706 A mapping from the names of columns that will be used to search for
1707 existing rows to the keys that will hold these values in the
1708 ``rows`` dictionaries. Note that these may not be the same due to
1709 SQLAlchemy limitations.
1710 *rows
1711 Positional arguments are the rows to be updated. The keys in all
1712 dictionaries must be the same, and may correspond to either a
1713 value in the ``where`` dictionary or the name of a column to be
1714 updated.
1716 Returns
1717 -------
1718 count : `int`
1719 Number of rows matched (regardless of whether the update actually
1720 modified them).
1722 Raises
1723 ------
1724 ReadOnlyDatabaseError
1725 Raised if `isWriteable` returns `False` when this method is called.
1727 Notes
1728 -----
1729 May be used inside transaction contexts, so implementations may not
1730 perform operations that interrupt transactions.
1732 The default implementation should be sufficient for most derived
1733 classes.
1734 """
1735 self.assertTableWriteable(table, f"Cannot update read-only table {table}.")
1736 if not rows:
1737 return 0
1738 sql = table.update().where(
1739 sqlalchemy.sql.and_(*[table.columns[k] == sqlalchemy.sql.bindparam(v) for k, v in where.items()])
1740 )
1741 with self._transaction() as (_, connection):
1742 return connection.execute(sql, rows).rowcount
1744 @contextmanager
1745 def query(
1746 self,
1747 sql: sqlalchemy.sql.expression.Executable | sqlalchemy.sql.expression.SelectBase,
1748 *args: Any,
1749 **kwargs: Any,
1750 ) -> Iterator[sqlalchemy.engine.CursorResult]:
1751 """Run a SELECT query against the database.
1753 Parameters
1754 ----------
1755 sql : `sqlalchemy.sql.expression.SelectBase`
1756 A SQLAlchemy representation of a ``SELECT`` query.
1757 *args
1758 Additional positional arguments are forwarded to
1759 `sqlalchemy.engine.Connection.execute`.
1760 **kwargs
1761 Additional keyword arguments are forwarded to
1762 `sqlalchemy.engine.Connection.execute`.
1764 Returns
1765 -------
1766 result_context : `sqlalchemy.engine.CursorResults`
1767 Context manager that returns the query result object when entered.
1768 These results are invalidated when the context is exited.
1769 """
1770 if self._session_connection is None:
1771 connection = self._engine.connect()
1772 else:
1773 connection = self._session_connection
1774 # TODO: SelectBase is not good for execute(), but it used everywhere,
1775 # e.g. in daf_relation. We should switch to Executable at some point.
1776 result = connection.execute(cast(sqlalchemy.sql.expression.Executable, sql), *args, **kwargs)
1777 try:
1778 yield result
1779 finally:
1780 if connection is not self._session_connection:
1781 connection.close()
1783 @abstractmethod
1784 def constant_rows(
1785 self,
1786 fields: NamedValueAbstractSet[ddl.FieldSpec],
1787 *rows: dict,
1788 name: str | None = None,
1789 ) -> sqlalchemy.sql.FromClause:
1790 """Return a SQLAlchemy object that represents a small number of
1791 constant-valued rows.
1793 Parameters
1794 ----------
1795 fields : `NamedValueAbstractSet` [ `ddl.FieldSpec` ]
1796 The columns of the rows. Unique and foreign key constraints are
1797 ignored.
1798 *rows : `dict`
1799 Values for the rows.
1800 name : `str`, optional
1801 If provided, the name of the SQL construct. If not provided, an
1802 opaque but unique identifier is generated.
1804 Returns
1805 -------
1806 from_clause : `sqlalchemy.sql.FromClause`
1807 SQLAlchemy object representing the given rows. This is guaranteed
1808 to be something that can be directly joined into a ``SELECT``
1809 query's ``FROM`` clause, and will not involve a temporary table
1810 that needs to be cleaned up later.
1812 Notes
1813 -----
1814 The default implementation uses the SQL-standard ``VALUES`` construct,
1815 but support for that construct is varied enough across popular RDBMSs
1816 that the method is still marked abstract to force explicit opt-in via
1817 delegation to `super`.
1818 """
1819 if name is None:
1820 name = f"tmp_{uuid.uuid4().hex}"
1821 return sqlalchemy.sql.values(
1822 *[sqlalchemy.Column(field.name, field.getSizedColumnType()) for field in fields],
1823 name=name,
1824 ).data([tuple(row[name] for name in fields.names) for row in rows])
1826 def get_constant_rows_max(self) -> int:
1827 """Return the maximum number of rows that should be passed to
1828 `constant_rows` for this backend.
1830 Returns
1831 -------
1832 max : `int`
1833 Maximum number of rows.
1835 Notes
1836 -----
1837 This should reflect typical performance profiles (or a guess at these),
1838 not just hard database engine limits.
1839 """
1840 return 100
1842 origin: int
1843 """An integer ID that should be used as the default for any datasets,
1844 quanta, or other entities that use a (autoincrement, origin) compound
1845 primary key (`int`).
1846 """
1848 namespace: str | None
1849 """The schema or namespace this database instance is associated with
1850 (`str` or `None`).
1851 """