Coverage for python/lsst/daf/butler/registry/interfaces/_database.py: 22%
412 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-25 15:14 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-25 15:14 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = [
24 "Database",
25 "ReadOnlyDatabaseError",
26 "DatabaseConflictError",
27 "DatabaseInsertMode",
28 "SchemaAlreadyDefinedError",
29 "StaticTablesContext",
30]
32import enum
33import uuid
34import warnings
35from abc import ABC, abstractmethod
36from collections import defaultdict
37from collections.abc import Callable, Iterable, Iterator, Sequence
38from contextlib import contextmanager
39from typing import Any, cast, final
41import astropy.time
42import sqlalchemy
44from ...core import TimespanDatabaseRepresentation, ddl, time_utils
45from ...core.named import NamedValueAbstractSet
46from .._exceptions import ConflictingDefinitionError
49class DatabaseInsertMode(enum.Enum):
50 """Mode options available for inserting database records."""
52 INSERT = enum.auto()
53 """Insert records, failing if they already exist."""
55 REPLACE = enum.auto()
56 """Replace records, overwriting existing."""
58 ENSURE = enum.auto()
59 """Insert records, skipping any that already exist."""
62# TODO: method is called with list[ReflectedColumn] in SA 2, and
63# ReflectedColumn does not exist in 1.4.
64def _checkExistingTableDefinition(name: str, spec: ddl.TableSpec, inspection: list) -> None:
65 """Test that the definition of a table in a `ddl.TableSpec` and from
66 database introspection are consistent.
68 Parameters
69 ----------
70 name : `str`
71 Name of the table (only used in error messages).
72 spec : `ddl.TableSpec`
73 Specification of the table.
74 inspection : `dict`
75 Dictionary returned by
76 `sqlalchemy.engine.reflection.Inspector.get_columns`.
78 Raises
79 ------
80 DatabaseConflictError
81 Raised if the definitions are inconsistent.
82 """
83 columnNames = [c["name"] for c in inspection]
84 if spec.fields.names != set(columnNames):
85 raise DatabaseConflictError(
86 f"Table '{name}' exists but is defined differently in the database; "
87 f"specification has columns {list(spec.fields.names)}, while the "
88 f"table in the database has {columnNames}."
89 )
92class ReadOnlyDatabaseError(RuntimeError):
93 """Exception raised when a write operation is called on a read-only
94 `Database`.
95 """
98class DatabaseConflictError(ConflictingDefinitionError):
99 """Exception raised when database content (row values or schema entities)
100 are inconsistent with what this client expects.
101 """
104class SchemaAlreadyDefinedError(RuntimeError):
105 """Exception raised when trying to initialize database schema when some
106 tables already exist.
107 """
110class StaticTablesContext:
111 """Helper class used to declare the static schema for a registry layer
112 in a database.
114 An instance of this class is returned by `Database.declareStaticTables`,
115 which should be the only way it should be constructed.
116 """
118 def __init__(self, db: Database, connection: sqlalchemy.engine.Connection):
119 self._db = db
120 self._foreignKeys: list[tuple[sqlalchemy.schema.Table, sqlalchemy.schema.ForeignKeyConstraint]] = []
121 self._inspector = sqlalchemy.inspect(connection)
122 self._tableNames = frozenset(self._inspector.get_table_names(schema=self._db.namespace))
123 self._initializers: list[Callable[[Database], None]] = []
125 def addTable(self, name: str, spec: ddl.TableSpec) -> sqlalchemy.schema.Table:
126 """Add a new table to the schema, returning its sqlalchemy
127 representation.
129 The new table may not actually be created until the end of the
130 context created by `Database.declareStaticTables`, allowing tables
131 to be declared in any order even in the presence of foreign key
132 relationships.
133 """
134 name = self._db._mangleTableName(name)
135 if name in self._tableNames:
136 _checkExistingTableDefinition(
137 name, spec, self._inspector.get_columns(name, schema=self._db.namespace)
138 )
139 metadata = self._db._metadata
140 assert metadata is not None, "Guaranteed by context manager that returns this object."
141 table = self._db._convertTableSpec(name, spec, metadata)
142 for foreignKeySpec in spec.foreignKeys:
143 self._foreignKeys.append((table, self._db._convertForeignKeySpec(name, foreignKeySpec, metadata)))
144 return table
146 def addTableTuple(self, specs: tuple[ddl.TableSpec, ...]) -> tuple[sqlalchemy.schema.Table, ...]:
147 """Add a named tuple of tables to the schema, returning their
148 SQLAlchemy representations in a named tuple of the same type.
150 The new tables may not actually be created until the end of the
151 context created by `Database.declareStaticTables`, allowing tables
152 to be declared in any order even in the presence of foreign key
153 relationships.
155 Notes
156 -----
157 ``specs`` *must* be an instance of a type created by
158 `collections.namedtuple`, not just regular tuple, and the returned
159 object is guaranteed to be the same. Because `~collections.namedtuple`
160 is just a factory for `type` objects, not an actual type itself,
161 we cannot represent this with type annotations.
162 """
163 return specs._make( # type: ignore
164 self.addTable(name, spec) for name, spec in zip(specs._fields, specs, strict=True) # type: ignore
165 )
167 def addInitializer(self, initializer: Callable[[Database], None]) -> None:
168 """Add a method that does one-time initialization of a database.
170 Initialization can mean anything that changes state of a database
171 and needs to be done exactly once after database schema was created.
172 An example for that could be population of schema attributes.
174 Parameters
175 ----------
176 initializer : callable
177 Method of a single argument which is a `Database` instance.
178 """
179 self._initializers.append(initializer)
182class Database(ABC):
183 """An abstract interface that represents a particular database engine's
184 representation of a single schema/namespace/database.
186 Parameters
187 ----------
188 origin : `int`
189 An integer ID that should be used as the default for any datasets,
190 quanta, or other entities that use a (autoincrement, origin) compound
191 primary key.
192 engine : `sqlalchemy.engine.Engine`
193 The SQLAlchemy engine for this `Database`.
194 namespace : `str`, optional
195 Name of the schema or namespace this instance is associated with.
196 This is passed as the ``schema`` argument when constructing a
197 `sqlalchemy.schema.MetaData` instance. We use ``namespace`` instead to
198 avoid confusion between "schema means namespace" and "schema means
199 table definitions".
201 Notes
202 -----
203 `Database` requires all write operations to go through its special named
204 methods. Our write patterns are sufficiently simple that we don't really
205 need the full flexibility of SQL insert/update/delete syntax, and we need
206 non-standard (but common) functionality in these operations sufficiently
207 often that it seems worthwhile to provide our own generic API.
209 In contrast, `Database.query` allows arbitrary ``SELECT`` queries (via
210 their SQLAlchemy representation) to be run, as we expect these to require
211 significantly more sophistication while still being limited to standard
212 SQL.
214 `Database` itself has several underscore-prefixed attributes:
216 - ``_engine``: SQLAlchemy object representing its engine.
217 - ``_connection``: method returning a context manager for
218 `sqlalchemy.engine.Connection` object.
219 - ``_metadata``: the `sqlalchemy.schema.MetaData` object representing
220 the tables and other schema entities.
222 These are considered protected (derived classes may access them, but other
223 code should not), and read-only, aside from executing SQL via
224 ``_connection``.
225 """
227 def __init__(self, *, origin: int, engine: sqlalchemy.engine.Engine, namespace: str | None = None):
228 self.origin = origin
229 self.namespace = namespace
230 self._engine = engine
231 self._session_connection: sqlalchemy.engine.Connection | None = None
232 self._metadata: sqlalchemy.schema.MetaData | None = None
233 self._temp_tables: set[str] = set()
235 def __repr__(self) -> str:
236 # Rather than try to reproduce all the parameters used to create
237 # the object, instead report the more useful information of the
238 # connection URL.
239 if self._engine.url.password is not None:
240 uri = str(self._engine.url.set(password="***"))
241 else:
242 uri = str(self._engine.url)
243 if self.namespace:
244 uri += f"#{self.namespace}"
245 return f'{type(self).__name__}("{uri}")'
247 @classmethod
248 def makeDefaultUri(cls, root: str) -> str | None:
249 """Create a default connection URI appropriate for the given root
250 directory, or `None` if there can be no such default.
251 """
252 return None
254 @classmethod
255 def fromUri(
256 cls,
257 uri: str | sqlalchemy.engine.URL,
258 *,
259 origin: int,
260 namespace: str | None = None,
261 writeable: bool = True,
262 ) -> Database:
263 """Construct a database from a SQLAlchemy URI.
265 Parameters
266 ----------
267 uri : `str` or `sqlalchemy.engine.URL`
268 A SQLAlchemy URI connection string.
269 origin : `int`
270 An integer ID that should be used as the default for any datasets,
271 quanta, or other entities that use a (autoincrement, origin)
272 compound primary key.
273 namespace : `str`, optional
274 A database namespace (i.e. schema) the new instance should be
275 associated with. If `None` (default), the namespace (if any) is
276 inferred from the URI.
277 writeable : `bool`, optional
278 If `True`, allow write operations on the database, including
279 ``CREATE TABLE``.
281 Returns
282 -------
283 db : `Database`
284 A new `Database` instance.
285 """
286 return cls.fromEngine(
287 cls.makeEngine(uri, writeable=writeable), origin=origin, namespace=namespace, writeable=writeable
288 )
290 @classmethod
291 @abstractmethod
292 def makeEngine(
293 cls, uri: str | sqlalchemy.engine.URL, *, writeable: bool = True
294 ) -> sqlalchemy.engine.Engine:
295 """Create a `sqlalchemy.engine.Engine` from a SQLAlchemy URI.
297 Parameters
298 ----------
299 uri : `str` or `sqlalchemy.engine.URL`
300 A SQLAlchemy URI connection string.
301 writeable : `bool`, optional
302 If `True`, allow write operations on the database, including
303 ``CREATE TABLE``.
305 Returns
306 -------
307 engine : `sqlalchemy.engine.Engine`
308 A database engine.
310 Notes
311 -----
312 Subclasses that support other ways to connect to a database are
313 encouraged to add optional arguments to their implementation of this
314 method, as long as they maintain compatibility with the base class
315 call signature.
316 """
317 raise NotImplementedError()
319 @classmethod
320 @abstractmethod
321 def fromEngine(
322 cls,
323 engine: sqlalchemy.engine.Engine,
324 *,
325 origin: int,
326 namespace: str | None = None,
327 writeable: bool = True,
328 ) -> Database:
329 """Create a new `Database` from an existing `sqlalchemy.engine.Engine`.
331 Parameters
332 ----------
333 engine : `sqlalchemy.engine.Engine`
334 The engine for the database. May be shared between `Database`
335 instances.
336 origin : `int`
337 An integer ID that should be used as the default for any datasets,
338 quanta, or other entities that use a (autoincrement, origin)
339 compound primary key.
340 namespace : `str`, optional
341 A different database namespace (i.e. schema) the new instance
342 should be associated with. If `None` (default), the namespace
343 (if any) is inferred from the connection.
344 writeable : `bool`, optional
345 If `True`, allow write operations on the database, including
346 ``CREATE TABLE``.
348 Returns
349 -------
350 db : `Database`
351 A new `Database` instance.
353 Notes
354 -----
355 This method allows different `Database` instances to share the same
356 engine, which is desirable when they represent different namespaces
357 can be queried together.
358 """
359 raise NotImplementedError()
361 @final
362 @contextmanager
363 def session(self) -> Iterator[None]:
364 """Return a context manager that represents a session (persistent
365 connection to a database).
367 Returns
368 -------
369 context : `AbstractContextManager` [ `None` ]
370 A context manager that does not return a value when entered.
372 Notes
373 -----
374 This method should be used when a sequence of read-only SQL operations
375 will be performed in rapid succession *without* a requirement that they
376 yield consistent results in the presence of concurrent writes (or, more
377 rarely, when conflicting concurrent writes are rare/impossible and the
378 session will be open long enough that a transaction is inadvisable).
379 """
380 with self._session():
381 yield
383 @final
384 @contextmanager
385 def transaction(
386 self,
387 *,
388 interrupting: bool = False,
389 savepoint: bool = False,
390 lock: Iterable[sqlalchemy.schema.Table] = (),
391 for_temp_tables: bool = False,
392 ) -> Iterator[None]:
393 """Return a context manager that represents a transaction.
395 Parameters
396 ----------
397 interrupting : `bool`, optional
398 If `True` (`False` is default), this transaction block may not be
399 nested without an outer one, and attempting to do so is a logic
400 (i.e. assertion) error.
401 savepoint : `bool`, optional
402 If `True` (`False` is default), create a `SAVEPOINT`, allowing
403 exceptions raised by the database (e.g. due to constraint
404 violations) during this transaction's context to be caught outside
405 it without also rolling back all operations in an outer transaction
406 block. If `False`, transactions may still be nested, but a
407 rollback may be generated at any level and affects all levels, and
408 commits are deferred until the outermost block completes. If any
409 outer transaction block was created with ``savepoint=True``, all
410 inner blocks will be as well (regardless of the actual value
411 passed). This has no effect if this is the outermost transaction.
412 lock : `~collections.abc.Iterable` [ `sqlalchemy.schema.Table` ], \
413 optional
414 A list of tables to lock for the duration of this transaction.
415 These locks are guaranteed to prevent concurrent writes and allow
416 this transaction (only) to acquire the same locks (others should
417 block), but only prevent concurrent reads if the database engine
418 requires that in order to block concurrent writes.
419 for_temp_tables : `bool`, optional
420 If `True`, this transaction may involve creating temporary tables.
422 Returns
423 -------
424 context : `AbstractContextManager` [ `None` ]
425 A context manager that commits the transaction when it is exited
426 without error and rolls back the transactoin when it is exited via
427 an exception.
429 Notes
430 -----
431 All transactions on a connection managed by one or more `Database`
432 instances _must_ go through this method, or transaction state will not
433 be correctly managed.
434 """
435 with self._transaction(
436 interrupting=interrupting, savepoint=savepoint, lock=lock, for_temp_tables=for_temp_tables
437 ):
438 yield
440 @contextmanager
441 def temporary_table(
442 self, spec: ddl.TableSpec, name: str | None = None
443 ) -> Iterator[sqlalchemy.schema.Table]:
444 """Return a context manager that creates and then drops a temporary
445 table.
447 Parameters
448 ----------
449 spec : `ddl.TableSpec`
450 Specification for the columns. Unique and foreign key constraints
451 may be ignored.
452 name : `str`, optional
453 If provided, the name of the SQL construct. If not provided, an
454 opaque but unique identifier is generated.
456 Returns
457 -------
458 context : `AbstractContextManager` [ `sqlalchemy.schema.Table` ]
459 A context manager that returns a SQLAlchemy representation of the
460 temporary table when entered.
462 Notes
463 -----
464 Temporary tables may be created, dropped, and written to even in
465 read-only databases - at least according to the Python-level
466 protections in the `Database` classes. Server permissions may say
467 otherwise, but in that case they probably need to be modified to
468 support the full range of expected read-only butler behavior.
469 """
470 with self._session() as connection:
471 table = self._make_temporary_table(connection, spec=spec, name=name)
472 self._temp_tables.add(table.key)
473 try:
474 yield table
475 finally:
476 with self._transaction():
477 table.drop(connection)
478 self._temp_tables.remove(table.key)
480 @contextmanager
481 def _session(self) -> Iterator[sqlalchemy.engine.Connection]:
482 """Protected implementation for `session` that actually returns the
483 connection.
485 This method is for internal `Database` calls that need the actual
486 SQLAlchemy connection object. It should be overridden by subclasses
487 instead of `session` itself.
489 Returns
490 -------
491 context : `AbstractContextManager` [ `sqlalchemy.engine.Connection` ]
492 A context manager that returns a SQLALchemy connection when
493 entered.
495 """
496 if self._session_connection is not None:
497 # session already started, just reuse that
498 yield self._session_connection
499 else:
500 try:
501 # open new connection and close it when done
502 self._session_connection = self._engine.connect()
503 yield self._session_connection
504 finally:
505 if self._session_connection is not None:
506 self._session_connection.close()
507 self._session_connection = None
508 # Temporary tables only live within session
509 self._temp_tables = set()
511 @contextmanager
512 def _transaction(
513 self,
514 *,
515 interrupting: bool = False,
516 savepoint: bool = False,
517 lock: Iterable[sqlalchemy.schema.Table] = (),
518 for_temp_tables: bool = False,
519 ) -> Iterator[tuple[bool, sqlalchemy.engine.Connection]]:
520 """Protected implementation for `transaction` that actually returns the
521 connection and whether this is a new outermost transaction.
523 This method is for internal `Database` calls that need the actual
524 SQLAlchemy connection object. It should be overridden by subclasses
525 instead of `transaction` itself.
527 Parameters
528 ----------
529 interrupting : `bool`, optional
530 If `True` (`False` is default), this transaction block may not be
531 nested without an outer one, and attempting to do so is a logic
532 (i.e. assertion) error.
533 savepoint : `bool`, optional
534 If `True` (`False` is default), create a `SAVEPOINT`, allowing
535 exceptions raised by the database (e.g. due to constraint
536 violations) during this transaction's context to be caught outside
537 it without also rolling back all operations in an outer transaction
538 block. If `False`, transactions may still be nested, but a
539 rollback may be generated at any level and affects all levels, and
540 commits are deferred until the outermost block completes. If any
541 outer transaction block was created with ``savepoint=True``, all
542 inner blocks will be as well (regardless of the actual value
543 passed). This has no effect if this is the outermost transaction.
544 lock : `~collections.abc.Iterable` [ `sqlalchemy.schema.Table` ], \
545 optional
546 A list of tables to lock for the duration of this transaction.
547 These locks are guaranteed to prevent concurrent writes and allow
548 this transaction (only) to acquire the same locks (others should
549 block), but only prevent concurrent reads if the database engine
550 requires that in order to block concurrent writes.
551 for_temp_tables : `bool`, optional
552 If `True`, this transaction may involve creating temporary tables.
554 Returns
555 -------
556 context : `AbstractContextManager` [ `tuple` [ `bool`,
557 `sqlalchemy.engine.Connection` ] ]
558 A context manager that commits the transaction when it is exited
559 without error and rolls back the transactoin when it is exited via
560 an exception. When entered, it returns a tuple of:
562 - ``is_new`` (`bool`): whether this is a new (outermost)
563 transaction;
564 - ``connection`` (`sqlalchemy.engine.Connection`): the connection.
565 """
566 with self._session() as connection:
567 already_in_transaction = connection.in_transaction()
568 assert not (interrupting and already_in_transaction), (
569 "Logic error in transaction nesting: an operation that would "
570 "interrupt the active transaction context has been requested."
571 )
572 savepoint = savepoint or connection.in_nested_transaction()
573 trans: sqlalchemy.engine.Transaction | None
574 if already_in_transaction:
575 if savepoint:
576 trans = connection.begin_nested()
577 else:
578 # Nested non-savepoint transactions don't do anything.
579 trans = None
580 else:
581 # Use a regular (non-savepoint) transaction always for the
582 # outermost context.
583 trans = connection.begin()
584 self._lockTables(connection, lock)
585 try:
586 yield not already_in_transaction, connection
587 if trans is not None:
588 trans.commit()
589 except BaseException:
590 if trans is not None:
591 trans.rollback()
592 raise
594 @abstractmethod
595 def _lockTables(
596 self, connection: sqlalchemy.engine.Connection, tables: Iterable[sqlalchemy.schema.Table] = ()
597 ) -> None:
598 """Acquire locks on the given tables.
600 This is an implementation hook for subclasses, called by `transaction`.
601 It should not be called directly by other code.
603 Parameters
604 ----------
605 connection : `sqlalchemy.engine.Connection`
606 Database connection object. It is guaranteed that transaction is
607 already in a progress for this connection.
608 tables : `~collections.abc.Iterable` [ `sqlalchemy.schema.Table` ], \
609 optional
610 A list of tables to lock for the duration of this transaction.
611 These locks are guaranteed to prevent concurrent writes and allow
612 this transaction (only) to acquire the same locks (others should
613 block), but only prevent concurrent reads if the database engine
614 requires that in order to block concurrent writes.
615 """
616 raise NotImplementedError()
618 def isTableWriteable(self, table: sqlalchemy.schema.Table) -> bool:
619 """Check whether a table is writeable, either because the database
620 connection is read-write or the table is a temporary table.
622 Parameters
623 ----------
624 table : `sqlalchemy.schema.Table`
625 SQLAlchemy table object to check.
627 Returns
628 -------
629 writeable : `bool`
630 Whether this table is writeable.
631 """
632 return self.isWriteable() or table.key in self._temp_tables
634 def assertTableWriteable(self, table: sqlalchemy.schema.Table, msg: str) -> None:
635 """Raise if the given table is not writeable, either because the
636 database connection is read-write or the table is a temporary table.
638 Parameters
639 ----------
640 table : `sqlalchemy.schema.Table`
641 SQLAlchemy table object to check.
642 msg : `str`, optional
643 If provided, raise `ReadOnlyDatabaseError` instead of returning
644 `False`, with this message.
645 """
646 if not self.isTableWriteable(table):
647 raise ReadOnlyDatabaseError(msg)
649 @contextmanager
650 def declareStaticTables(self, *, create: bool) -> Iterator[StaticTablesContext]:
651 """Return a context manager in which the database's static DDL schema
652 can be declared.
654 Parameters
655 ----------
656 create : `bool`
657 If `True`, attempt to create all tables at the end of the context.
658 If `False`, they will be assumed to already exist.
660 Returns
661 -------
662 schema : `StaticTablesContext`
663 A helper object that is used to add new tables.
665 Raises
666 ------
667 ReadOnlyDatabaseError
668 Raised if ``create`` is `True`, `Database.isWriteable` is `False`,
669 and one or more declared tables do not already exist.
671 Examples
672 --------
673 Given a `Database` instance ``db``::
675 with db.declareStaticTables(create=True) as schema:
676 schema.addTable("table1", TableSpec(...))
677 schema.addTable("table2", TableSpec(...))
679 Notes
680 -----
681 A database's static DDL schema must be declared before any dynamic
682 tables are managed via calls to `ensureTableExists` or
683 `getExistingTable`. The order in which static schema tables are added
684 inside the context block is unimportant; they will automatically be
685 sorted and added in an order consistent with their foreign key
686 relationships.
687 """
688 if create and not self.isWriteable():
689 raise ReadOnlyDatabaseError(f"Cannot create tables in read-only database {self}.")
690 self._metadata = sqlalchemy.MetaData(schema=self.namespace)
691 try:
692 with self._transaction() as (_, connection):
693 context = StaticTablesContext(self, connection)
694 if create and context._tableNames:
695 # Looks like database is already initalized, to avoid
696 # danger of modifying/destroying valid schema we refuse to
697 # do anything in this case
698 raise SchemaAlreadyDefinedError(f"Cannot create tables in non-empty database {self}.")
699 yield context
700 for table, foreignKey in context._foreignKeys:
701 table.append_constraint(foreignKey)
702 if create:
703 if (
704 self.namespace is not None
705 and self.namespace not in context._inspector.get_schema_names()
706 ):
707 connection.execute(sqlalchemy.schema.CreateSchema(self.namespace))
708 # In our tables we have columns that make use of sqlalchemy
709 # Sequence objects. There is currently a bug in sqlalchemy
710 # that causes a deprecation warning to be thrown on a
711 # property of the Sequence object when the repr for the
712 # sequence is created. Here a filter is used to catch these
713 # deprecation warnings when tables are created.
714 with warnings.catch_warnings():
715 warnings.simplefilter("ignore", category=sqlalchemy.exc.SADeprecationWarning)
716 self._metadata.create_all(connection)
717 # call all initializer methods sequentially
718 for init in context._initializers:
719 init(self)
720 except BaseException:
721 self._metadata = None
722 raise
724 @abstractmethod
725 def isWriteable(self) -> bool:
726 """Return `True` if this database can be modified by this client."""
727 raise NotImplementedError()
729 @abstractmethod
730 def __str__(self) -> str:
731 """Return a human-readable identifier for this `Database`, including
732 any namespace or schema that identifies its names within a `Registry`.
733 """
734 raise NotImplementedError()
736 @property
737 def dialect(self) -> sqlalchemy.engine.Dialect:
738 """The SQLAlchemy dialect for this database engine
739 (`sqlalchemy.engine.Dialect`).
740 """
741 return self._engine.dialect
743 def shrinkDatabaseEntityName(self, original: str) -> str:
744 """Return a version of the given name that fits within this database
745 engine's length limits for table, constraint, indexes, and sequence
746 names.
748 Implementations should not assume that simple truncation is safe,
749 because multiple long names often begin with the same prefix.
751 The default implementation simply returns the given name.
753 Parameters
754 ----------
755 original : `str`
756 The original name.
758 Returns
759 -------
760 shrunk : `str`
761 The new, possibly shortened name.
762 """
763 return original
765 def expandDatabaseEntityName(self, shrunk: str) -> str:
766 """Retrieve the original name for a database entity that was too long
767 to fit within the database engine's limits.
769 Parameters
770 ----------
771 original : `str`
772 The original name.
774 Returns
775 -------
776 shrunk : `str`
777 The new, possibly shortened name.
778 """
779 return shrunk
781 def _mangleTableName(self, name: str) -> str:
782 """Map a logical, user-visible table name to the true table name used
783 in the database.
785 The default implementation returns the given name unchanged.
787 Parameters
788 ----------
789 name : `str`
790 Input table name. Should not include a namespace (i.e. schema)
791 prefix.
793 Returns
794 -------
795 mangled : `str`
796 Mangled version of the table name (still with no namespace prefix).
798 Notes
799 -----
800 Reimplementations of this method must be idempotent - mangling an
801 already-mangled name must have no effect.
802 """
803 return name
805 def _makeColumnConstraints(self, table: str, spec: ddl.FieldSpec) -> list[sqlalchemy.CheckConstraint]:
806 """Create constraints based on this spec.
808 Parameters
809 ----------
810 table : `str`
811 Name of the table this column is being added to.
812 spec : `FieldSpec`
813 Specification for the field to be added.
815 Returns
816 -------
817 constraint : `list` of `sqlalchemy.CheckConstraint`
818 Constraint added for this column.
819 """
820 # By default we return no additional constraints
821 return []
823 def _convertFieldSpec(
824 self, table: str, spec: ddl.FieldSpec, metadata: sqlalchemy.MetaData, **kwargs: Any
825 ) -> sqlalchemy.schema.Column:
826 """Convert a `FieldSpec` to a `sqlalchemy.schema.Column`.
828 Parameters
829 ----------
830 table : `str`
831 Name of the table this column is being added to.
832 spec : `FieldSpec`
833 Specification for the field to be added.
834 metadata : `sqlalchemy.MetaData`
835 SQLAlchemy representation of the DDL schema this field's table is
836 being added to.
837 **kwargs
838 Additional keyword arguments to forward to the
839 `sqlalchemy.schema.Column` constructor. This is provided to make
840 it easier for derived classes to delegate to ``super()`` while
841 making only minor changes.
843 Returns
844 -------
845 column : `sqlalchemy.schema.Column`
846 SQLAlchemy representation of the field.
847 """
848 args = []
849 if spec.autoincrement:
850 # Generate a sequence to use for auto incrementing for databases
851 # that do not support it natively. This will be ignored by
852 # sqlalchemy for databases that do support it.
853 args.append(
854 sqlalchemy.Sequence(
855 self.shrinkDatabaseEntityName(f"{table}_seq_{spec.name}"), metadata=metadata
856 )
857 )
858 assert spec.doc is None or isinstance(spec.doc, str), f"Bad doc for {table}.{spec.name}."
859 return sqlalchemy.schema.Column(
860 spec.name,
861 spec.getSizedColumnType(),
862 *args,
863 nullable=spec.nullable,
864 primary_key=spec.primaryKey,
865 comment=spec.doc,
866 server_default=spec.default,
867 **kwargs,
868 )
870 def _convertForeignKeySpec(
871 self, table: str, spec: ddl.ForeignKeySpec, metadata: sqlalchemy.MetaData, **kwargs: Any
872 ) -> sqlalchemy.schema.ForeignKeyConstraint:
873 """Convert a `ForeignKeySpec` to a
874 `sqlalchemy.schema.ForeignKeyConstraint`.
876 Parameters
877 ----------
878 table : `str`
879 Name of the table this foreign key is being added to.
880 spec : `ForeignKeySpec`
881 Specification for the foreign key to be added.
882 metadata : `sqlalchemy.MetaData`
883 SQLAlchemy representation of the DDL schema this constraint is
884 being added to.
885 **kwargs
886 Additional keyword arguments to forward to the
887 `sqlalchemy.schema.ForeignKeyConstraint` constructor. This is
888 provided to make it easier for derived classes to delegate to
889 ``super()`` while making only minor changes.
891 Returns
892 -------
893 constraint : `sqlalchemy.schema.ForeignKeyConstraint`
894 SQLAlchemy representation of the constraint.
895 """
896 name = self.shrinkDatabaseEntityName(
897 "_".join(
898 ["fkey", table, self._mangleTableName(spec.table)] + list(spec.target) + list(spec.source)
899 )
900 )
901 return sqlalchemy.schema.ForeignKeyConstraint(
902 spec.source,
903 [f"{self._mangleTableName(spec.table)}.{col}" for col in spec.target],
904 name=name,
905 ondelete=spec.onDelete,
906 )
908 def _convertExclusionConstraintSpec(
909 self,
910 table: str,
911 spec: tuple[str | type[TimespanDatabaseRepresentation], ...],
912 metadata: sqlalchemy.MetaData,
913 ) -> sqlalchemy.schema.Constraint:
914 """Convert a `tuple` from `ddl.TableSpec.exclusion` into a SQLAlchemy
915 constraint representation.
917 Parameters
918 ----------
919 table : `str`
920 Name of the table this constraint is being added to.
921 spec : `tuple` [ `str` or `type` ]
922 A tuple of `str` column names and the `type` object returned by
923 `getTimespanRepresentation` (which must appear exactly once),
924 indicating the order of the columns in the index used to back the
925 constraint.
926 metadata : `sqlalchemy.MetaData`
927 SQLAlchemy representation of the DDL schema this constraint is
928 being added to.
930 Returns
931 -------
932 constraint : `sqlalchemy.schema.Constraint`
933 SQLAlchemy representation of the constraint.
935 Raises
936 ------
937 NotImplementedError
938 Raised if this database does not support exclusion constraints.
939 """
940 raise NotImplementedError(f"Database {self} does not support exclusion constraints.")
942 def _convertTableSpec(
943 self, name: str, spec: ddl.TableSpec, metadata: sqlalchemy.MetaData, **kwargs: Any
944 ) -> sqlalchemy.schema.Table:
945 """Convert a `TableSpec` to a `sqlalchemy.schema.Table`.
947 Parameters
948 ----------
949 spec : `TableSpec`
950 Specification for the foreign key to be added.
951 metadata : `sqlalchemy.MetaData`
952 SQLAlchemy representation of the DDL schema this table is being
953 added to.
954 **kwargs
955 Additional keyword arguments to forward to the
956 `sqlalchemy.schema.Table` constructor. This is provided to make it
957 easier for derived classes to delegate to ``super()`` while making
958 only minor changes.
960 Returns
961 -------
962 table : `sqlalchemy.schema.Table`
963 SQLAlchemy representation of the table.
965 Notes
966 -----
967 This method does not handle ``spec.foreignKeys`` at all, in order to
968 avoid circular dependencies. These are added by higher-level logic in
969 `ensureTableExists`, `getExistingTable`, and `declareStaticTables`.
970 """
971 name = self._mangleTableName(name)
972 args: list[sqlalchemy.schema.SchemaItem] = [
973 self._convertFieldSpec(name, fieldSpec, metadata) for fieldSpec in spec.fields
974 ]
976 # Add any column constraints
977 for fieldSpec in spec.fields:
978 args.extend(self._makeColumnConstraints(name, fieldSpec))
980 # Track indexes added for primary key and unique constraints, to make
981 # sure we don't add duplicate explicit or foreign key indexes for
982 # those.
983 allIndexes = {tuple(fieldSpec.name for fieldSpec in spec.fields if fieldSpec.primaryKey)}
984 args.extend(
985 sqlalchemy.schema.UniqueConstraint(
986 *columns, name=self.shrinkDatabaseEntityName("_".join([name, "unq"] + list(columns)))
987 )
988 for columns in spec.unique
989 )
990 allIndexes.update(spec.unique)
991 args.extend(
992 sqlalchemy.schema.Index(
993 self.shrinkDatabaseEntityName("_".join([name, "idx"] + list(index.columns))),
994 *index.columns,
995 unique=(index.columns in spec.unique),
996 **index.kwargs,
997 )
998 for index in spec.indexes
999 if index.columns not in allIndexes
1000 )
1001 allIndexes.update(index.columns for index in spec.indexes)
1002 args.extend(
1003 sqlalchemy.schema.Index(
1004 self.shrinkDatabaseEntityName("_".join((name, "fkidx") + fk.source)),
1005 *fk.source,
1006 )
1007 for fk in spec.foreignKeys
1008 if fk.addIndex and fk.source not in allIndexes
1009 )
1011 args.extend(self._convertExclusionConstraintSpec(name, excl, metadata) for excl in spec.exclusion)
1013 assert spec.doc is None or isinstance(spec.doc, str), f"Bad doc for {name}."
1014 return sqlalchemy.schema.Table(name, metadata, *args, comment=spec.doc, info={"spec": spec}, **kwargs)
1016 def ensureTableExists(self, name: str, spec: ddl.TableSpec) -> sqlalchemy.schema.Table:
1017 """Ensure that a table with the given name and specification exists,
1018 creating it if necessary.
1020 Parameters
1021 ----------
1022 name : `str`
1023 Name of the table (not including namespace qualifiers).
1024 spec : `TableSpec`
1025 Specification for the table. This will be used when creating the
1026 table, and *may* be used when obtaining an existing table to check
1027 for consistency, but no such check is guaranteed.
1029 Returns
1030 -------
1031 table : `sqlalchemy.schema.Table`
1032 SQLAlchemy representation of the table.
1034 Raises
1035 ------
1036 ReadOnlyDatabaseError
1037 Raised if `isWriteable` returns `False`, and the table does not
1038 already exist.
1039 DatabaseConflictError
1040 Raised if the table exists but ``spec`` is inconsistent with its
1041 definition.
1043 Notes
1044 -----
1045 This method may not be called within transactions. It may be called on
1046 read-only databases if and only if the table does in fact already
1047 exist.
1049 Subclasses may override this method, but usually should not need to.
1050 """
1051 # TODO: if _engine is used to make a table then it uses separate
1052 # connection and should not interfere with current transaction
1053 assert (
1054 self._session_connection is None or not self._session_connection.in_transaction()
1055 ), "Table creation interrupts transactions."
1056 assert self._metadata is not None, "Static tables must be declared before dynamic tables."
1057 table = self.getExistingTable(name, spec)
1058 if table is not None:
1059 return table
1060 if not self.isWriteable():
1061 raise ReadOnlyDatabaseError(
1062 f"Table {name} does not exist, and cannot be created because database {self} is read-only."
1063 )
1064 table = self._convertTableSpec(name, spec, self._metadata)
1065 for foreignKeySpec in spec.foreignKeys:
1066 table.append_constraint(self._convertForeignKeySpec(name, foreignKeySpec, self._metadata))
1067 try:
1068 with self._transaction() as (_, connection):
1069 table.create(connection)
1070 except sqlalchemy.exc.DatabaseError:
1071 # Some other process could have created the table meanwhile, which
1072 # usually causes OperationalError or ProgrammingError. We cannot
1073 # use IF NOT EXISTS clause in this case due to PostgreSQL race
1074 # condition on server side which causes IntegrityError. Instead we
1075 # catch these exceptions (they all inherit DatabaseError) and
1076 # re-check whether table is now there.
1077 table = self.getExistingTable(name, spec)
1078 if table is None:
1079 raise
1080 return table
1082 def getExistingTable(self, name: str, spec: ddl.TableSpec) -> sqlalchemy.schema.Table | None:
1083 """Obtain an existing table with the given name and specification.
1085 Parameters
1086 ----------
1087 name : `str`
1088 Name of the table (not including namespace qualifiers).
1089 spec : `TableSpec`
1090 Specification for the table. This will be used when creating the
1091 SQLAlchemy representation of the table, and it is used to
1092 check that the actual table in the database is consistent.
1094 Returns
1095 -------
1096 table : `sqlalchemy.schema.Table` or `None`
1097 SQLAlchemy representation of the table, or `None` if it does not
1098 exist.
1100 Raises
1101 ------
1102 DatabaseConflictError
1103 Raised if the table exists but ``spec`` is inconsistent with its
1104 definition.
1106 Notes
1107 -----
1108 This method can be called within transactions and never modifies the
1109 database.
1111 Subclasses may override this method, but usually should not need to.
1112 """
1113 assert self._metadata is not None, "Static tables must be declared before dynamic tables."
1114 name = self._mangleTableName(name)
1115 table = self._metadata.tables.get(name if self.namespace is None else f"{self.namespace}.{name}")
1116 if table is not None:
1117 if spec.fields.names != set(table.columns.keys()):
1118 raise DatabaseConflictError(
1119 f"Table '{name}' has already been defined differently; the new "
1120 f"specification has columns {list(spec.fields.names)}, while "
1121 f"the previous definition has {list(table.columns.keys())}."
1122 )
1123 else:
1124 inspector = sqlalchemy.inspect(
1125 self._engine if self._session_connection is None else self._session_connection, raiseerr=True
1126 )
1127 if name in inspector.get_table_names(schema=self.namespace):
1128 _checkExistingTableDefinition(name, spec, inspector.get_columns(name, schema=self.namespace))
1129 table = self._convertTableSpec(name, spec, self._metadata)
1130 for foreignKeySpec in spec.foreignKeys:
1131 table.append_constraint(self._convertForeignKeySpec(name, foreignKeySpec, self._metadata))
1132 return table
1133 return table
1135 def _make_temporary_table(
1136 self,
1137 connection: sqlalchemy.engine.Connection,
1138 spec: ddl.TableSpec,
1139 name: str | None = None,
1140 **kwargs: Any,
1141 ) -> sqlalchemy.schema.Table:
1142 """Create a temporary table.
1144 Parameters
1145 ----------
1146 connection : `sqlalchemy.engine.Connection`
1147 Connection to use when creating the table.
1148 spec : `TableSpec`
1149 Specification for the table.
1150 name : `str`, optional
1151 A unique (within this session/connetion) name for the table.
1152 Subclasses may override to modify the actual name used. If not
1153 provided, a unique name will be generated.
1154 **kwargs
1155 Additional keyword arguments to forward to the
1156 `sqlalchemy.schema.Table` constructor. This is provided to make it
1157 easier for derived classes to delegate to ``super()`` while making
1158 only minor changes.
1160 Returns
1161 -------
1162 table : `sqlalchemy.schema.Table`
1163 SQLAlchemy representation of the table.
1164 """
1165 if name is None:
1166 name = f"tmp_{uuid.uuid4().hex}"
1167 metadata = self._metadata
1168 if metadata is None:
1169 raise RuntimeError("Cannot create temporary table before static schema is defined.")
1170 table = self._convertTableSpec(
1171 name, spec, metadata, prefixes=["TEMPORARY"], schema=sqlalchemy.schema.BLANK_SCHEMA, **kwargs
1172 )
1173 if table.key in self._temp_tables and table.key != name:
1174 raise ValueError(
1175 f"A temporary table with name {name} (transformed to {table.key} by "
1176 "Database) already exists."
1177 )
1178 for foreignKeySpec in spec.foreignKeys:
1179 table.append_constraint(self._convertForeignKeySpec(name, foreignKeySpec, metadata))
1180 with self._transaction():
1181 table.create(connection)
1182 return table
1184 @classmethod
1185 def getTimespanRepresentation(cls) -> type[TimespanDatabaseRepresentation]:
1186 """Return a `type` that encapsulates the way `Timespan` objects are
1187 stored in this database.
1189 `Database` does not automatically use the return type of this method
1190 anywhere else; calling code is responsible for making sure that DDL
1191 and queries are consistent with it.
1193 Returns
1194 -------
1195 TimespanReprClass : `type` (`TimespanDatabaseRepresention` subclass)
1196 A type that encapsulates the way `Timespan` objects should be
1197 stored in this database.
1199 Notes
1200 -----
1201 There are two big reasons we've decided to keep timespan-mangling logic
1202 outside the `Database` implementations, even though the choice of
1203 representation is ultimately up to a `Database` implementation:
1205 - Timespans appear in relatively few tables and queries in our
1206 typical usage, and the code that operates on them is already aware
1207 that it is working with timespans. In contrast, a
1208 timespan-representation-aware implementation of, say, `insert`,
1209 would need to have extra logic to identify when timespan-mangling
1210 needed to occur, which would usually be useless overhead.
1212 - SQLAlchemy's rich SELECT query expression system has no way to wrap
1213 multiple columns in a single expression object (the ORM does, but
1214 we are not using the ORM). So we would have to wrap _much_ more of
1215 that code in our own interfaces to encapsulate timespan
1216 representations there.
1217 """
1218 return TimespanDatabaseRepresentation.Compound
1220 def sync(
1221 self,
1222 table: sqlalchemy.schema.Table,
1223 *,
1224 keys: dict[str, Any],
1225 compared: dict[str, Any] | None = None,
1226 extra: dict[str, Any] | None = None,
1227 returning: Sequence[str] | None = None,
1228 update: bool = False,
1229 ) -> tuple[dict[str, Any] | None, bool | dict[str, Any]]:
1230 """Insert into a table as necessary to ensure database contains
1231 values equivalent to the given ones.
1233 Parameters
1234 ----------
1235 table : `sqlalchemy.schema.Table`
1236 Table to be queried and possibly inserted into.
1237 keys : `dict`
1238 Column name-value pairs used to search for an existing row; must
1239 be a combination that can be used to select a single row if one
1240 exists. If such a row does not exist, these values are used in
1241 the insert.
1242 compared : `dict`, optional
1243 Column name-value pairs that are compared to those in any existing
1244 row. If such a row does not exist, these rows are used in the
1245 insert.
1246 extra : `dict`, optional
1247 Column name-value pairs that are ignored if a matching row exists,
1248 but used in an insert if one is necessary.
1249 returning : `~collections.abc.Sequence` of `str`, optional
1250 The names of columns whose values should be returned.
1251 update : `bool`, optional
1252 If `True` (`False` is default), update the existing row with the
1253 values in ``compared`` instead of raising `DatabaseConflictError`.
1255 Returns
1256 -------
1257 row : `dict`, optional
1258 The value of the fields indicated by ``returning``, or `None` if
1259 ``returning`` is `None`.
1260 inserted_or_updated : `bool` or `dict`
1261 If `True`, a new row was inserted; if `False`, a matching row
1262 already existed. If a `dict` (only possible if ``update=True``),
1263 then an existing row was updated, and the dict maps the names of
1264 the updated columns to their *old* values (new values can be
1265 obtained from ``compared``).
1267 Raises
1268 ------
1269 DatabaseConflictError
1270 Raised if the values in ``compared`` do not match the values in the
1271 database.
1272 ReadOnlyDatabaseError
1273 Raised if `isWriteable` returns `False`, and no matching record
1274 already exists.
1276 Notes
1277 -----
1278 May be used inside transaction contexts, so implementations may not
1279 perform operations that interrupt transactions.
1281 It may be called on read-only databases if and only if the matching row
1282 does in fact already exist.
1283 """
1285 def check() -> tuple[int, dict[str, Any] | None, list | None]:
1286 """Query for a row that matches the ``key`` argument, and compare
1287 to what was given by the caller.
1289 Returns
1290 -------
1291 n : `int`
1292 Number of matching rows. ``n != 1`` is always an error, but
1293 it's a different kind of error depending on where `check` is
1294 being called.
1295 bad : `dict` or `None`
1296 The subset of the keys of ``compared`` for which the existing
1297 values did not match the given one, mapped to the existing
1298 values in the database. Once again, ``not bad`` is always an
1299 error, but a different kind on context. `None` if ``n != 1``
1300 result : `list` or `None`
1301 Results in the database that correspond to the columns given
1302 in ``returning``, or `None` if ``returning is None``.
1303 """
1304 toSelect: set[str] = set()
1305 if compared is not None:
1306 toSelect.update(compared.keys())
1307 if returning is not None:
1308 toSelect.update(returning)
1309 if not toSelect:
1310 # Need to select some column, even if we just want to see
1311 # how many rows we get back.
1312 toSelect.add(next(iter(keys.keys())))
1313 selectSql = (
1314 sqlalchemy.sql.select(*[table.columns[k].label(k) for k in toSelect])
1315 .select_from(table)
1316 .where(sqlalchemy.sql.and_(*[table.columns[k] == v for k, v in keys.items()]))
1317 )
1318 with self._transaction() as (_, connection):
1319 fetched = list(connection.execute(selectSql).mappings())
1320 if len(fetched) != 1:
1321 return len(fetched), None, None
1322 existing = fetched[0]
1323 if compared is not None:
1325 def safeNotEqual(a: Any, b: Any) -> bool:
1326 if isinstance(a, astropy.time.Time):
1327 return not time_utils.TimeConverter().times_equal(a, b)
1328 return a != b
1330 inconsistencies = {
1331 k: existing[k] for k, v in compared.items() if safeNotEqual(existing[k], v)
1332 }
1333 else:
1334 inconsistencies = {}
1335 if returning is not None:
1336 toReturn: list | None = [existing[k] for k in returning]
1337 else:
1338 toReturn = None
1339 return 1, inconsistencies, toReturn
1341 def format_bad(inconsistencies: dict[str, Any]) -> str:
1342 """Format the 'bad' dictionary of existing values returned by
1343 ``check`` into a string suitable for an error message.
1344 """
1345 assert compared is not None, "Should not be able to get inconsistencies without comparing."
1346 return ", ".join(f"{k}: {v!r} != {compared[k]!r}" for k, v in inconsistencies.items())
1348 if self.isTableWriteable(table):
1349 # Try an insert first, but allow it to fail (in only specific
1350 # ways).
1351 row = keys.copy()
1352 if compared is not None:
1353 row.update(compared)
1354 if extra is not None:
1355 row.update(extra)
1356 with self.transaction():
1357 inserted = bool(self.ensure(table, row))
1358 inserted_or_updated: bool | dict[str, Any]
1359 # Need to perform check() for this branch inside the
1360 # transaction, so we roll back an insert that didn't do
1361 # what we expected. That limits the extent to which we
1362 # can reduce duplication between this block and the other
1363 # ones that perform similar logic.
1364 n, bad, result = check()
1365 if n < 1:
1366 raise ConflictingDefinitionError(
1367 f"Attempted to ensure {row} exists by inserting it with ON CONFLICT IGNORE, "
1368 f"but a post-insert query on {keys} returned no results. "
1369 f"Insert was {'' if inserted else 'not '}reported as successful. "
1370 "This can occur if the insert violated a database constraint other than the "
1371 "unique constraint or primary key used to identify the row in this call."
1372 )
1373 elif n > 1:
1374 raise RuntimeError(
1375 f"Keys passed to sync {keys.keys()} do not comprise a "
1376 f"unique constraint for table {table.name}."
1377 )
1378 elif bad:
1379 assert (
1380 compared is not None
1381 ), "Should not be able to get inconsistencies without comparing."
1382 if inserted:
1383 raise RuntimeError(
1384 f"Conflict ({bad}) in sync after successful insert; this is "
1385 "possible if the same table is being updated by a concurrent "
1386 "process that isn't using sync, but it may also be a bug in "
1387 "daf_butler."
1388 )
1389 elif update:
1390 with self._transaction() as (_, connection):
1391 connection.execute(
1392 table.update()
1393 .where(sqlalchemy.sql.and_(*[table.columns[k] == v for k, v in keys.items()]))
1394 .values(**{k: compared[k] for k in bad})
1395 )
1396 inserted_or_updated = bad
1397 else:
1398 raise DatabaseConflictError(
1399 f"Conflict in sync for table {table.name} on column(s) {format_bad(bad)}."
1400 )
1401 else:
1402 inserted_or_updated = inserted
1403 else:
1404 # Database is not writeable; just see if the row exists.
1405 n, bad, result = check()
1406 if n < 1:
1407 raise ReadOnlyDatabaseError("sync needs to insert, but database is read-only.")
1408 elif n > 1:
1409 raise RuntimeError("Keys passed to sync do not comprise a unique constraint.")
1410 elif bad:
1411 if update:
1412 raise ReadOnlyDatabaseError("sync needs to update, but database is read-only.")
1413 else:
1414 raise DatabaseConflictError(
1415 f"Conflict in sync for table {table.name} on column(s) {format_bad(bad)}."
1416 )
1417 inserted_or_updated = False
1418 if returning is None:
1419 return None, inserted_or_updated
1420 else:
1421 assert result is not None
1422 return dict(zip(returning, result, strict=True)), inserted_or_updated
1424 def insert(
1425 self,
1426 table: sqlalchemy.schema.Table,
1427 *rows: dict,
1428 returnIds: bool = False,
1429 select: sqlalchemy.sql.expression.SelectBase | None = None,
1430 names: Iterable[str] | None = None,
1431 ) -> list[int] | None:
1432 """Insert one or more rows into a table, optionally returning
1433 autoincrement primary key values.
1435 Parameters
1436 ----------
1437 table : `sqlalchemy.schema.Table`
1438 Table rows should be inserted into.
1439 returnIds: `bool`
1440 If `True` (`False` is default), return the values of the table's
1441 autoincrement primary key field (which much exist).
1442 select : `sqlalchemy.sql.SelectBase`, optional
1443 A SELECT query expression to insert rows from. Cannot be provided
1444 with either ``rows`` or ``returnIds=True``.
1445 names : `~collections.abc.Iterable` [ `str` ], optional
1446 Names of columns in ``table`` to be populated, ordered to match the
1447 columns returned by ``select``. Ignored if ``select`` is `None`.
1448 If not provided, the columns returned by ``select`` must be named
1449 to match the desired columns of ``table``.
1450 *rows
1451 Positional arguments are the rows to be inserted, as dictionaries
1452 mapping column name to value. The keys in all dictionaries must
1453 be the same.
1455 Returns
1456 -------
1457 ids : `None`, or `list` of `int`
1458 If ``returnIds`` is `True`, a `list` containing the inserted
1459 values for the table's autoincrement primary key.
1461 Raises
1462 ------
1463 ReadOnlyDatabaseError
1464 Raised if `isWriteable` returns `False` when this method is called.
1466 Notes
1467 -----
1468 The default implementation uses bulk insert syntax when ``returnIds``
1469 is `False`, and a loop over single-row insert operations when it is
1470 `True`.
1472 Derived classes should reimplement when they can provide a more
1473 efficient implementation (especially for the latter case).
1475 May be used inside transaction contexts, so implementations may not
1476 perform operations that interrupt transactions.
1477 """
1478 self.assertTableWriteable(table, f"Cannot insert into read-only table {table}.")
1479 if select is not None and (rows or returnIds):
1480 raise TypeError("'select' is incompatible with passing value rows or returnIds=True.")
1481 if not rows and select is None:
1482 if returnIds:
1483 return []
1484 else:
1485 return None
1486 with self._transaction() as (_, connection):
1487 if not returnIds:
1488 if select is not None:
1489 if names is None:
1490 # columns() is deprecated since 1.4, but
1491 # selected_columns() method did not exist in 1.3.
1492 if hasattr(select, "selected_columns"):
1493 names = select.selected_columns.keys()
1494 else:
1495 names = select.columns.keys()
1496 connection.execute(table.insert().from_select(list(names), select))
1497 else:
1498 connection.execute(table.insert(), rows)
1499 return None
1500 else:
1501 sql = table.insert()
1502 return [connection.execute(sql, row).inserted_primary_key[0] for row in rows]
1504 @abstractmethod
1505 def replace(self, table: sqlalchemy.schema.Table, *rows: dict) -> None:
1506 """Insert one or more rows into a table, replacing any existing rows
1507 for which insertion of a new row would violate the primary key
1508 constraint.
1510 Parameters
1511 ----------
1512 table : `sqlalchemy.schema.Table`
1513 Table rows should be inserted into.
1514 *rows
1515 Positional arguments are the rows to be inserted, as dictionaries
1516 mapping column name to value. The keys in all dictionaries must
1517 be the same.
1519 Raises
1520 ------
1521 ReadOnlyDatabaseError
1522 Raised if `isWriteable` returns `False` when this method is called.
1524 Notes
1525 -----
1526 May be used inside transaction contexts, so implementations may not
1527 perform operations that interrupt transactions.
1529 Implementations should raise a `sqlalchemy.exc.IntegrityError`
1530 exception when a constraint other than the primary key would be
1531 violated.
1533 Implementations are not required to support `replace` on tables
1534 with autoincrement keys.
1535 """
1536 raise NotImplementedError()
1538 @abstractmethod
1539 def ensure(self, table: sqlalchemy.schema.Table, *rows: dict, primary_key_only: bool = False) -> int:
1540 """Insert one or more rows into a table, skipping any rows for which
1541 insertion would violate a unique constraint.
1543 Parameters
1544 ----------
1545 table : `sqlalchemy.schema.Table`
1546 Table rows should be inserted into.
1547 *rows
1548 Positional arguments are the rows to be inserted, as dictionaries
1549 mapping column name to value. The keys in all dictionaries must
1550 be the same.
1551 primary_key_only : `bool`, optional
1552 If `True` (`False` is default), only skip rows that violate the
1553 primary key constraint, and raise an exception (and rollback
1554 transactions) for other constraint violations.
1556 Returns
1557 -------
1558 count : `int`
1559 The number of rows actually inserted.
1561 Raises
1562 ------
1563 ReadOnlyDatabaseError
1564 Raised if `isWriteable` returns `False` when this method is called.
1565 This is raised even if the operation would do nothing even on a
1566 writeable database.
1568 Notes
1569 -----
1570 May be used inside transaction contexts, so implementations may not
1571 perform operations that interrupt transactions.
1573 Implementations are not required to support `ensure` on tables
1574 with autoincrement keys.
1575 """
1576 raise NotImplementedError()
1578 def delete(self, table: sqlalchemy.schema.Table, columns: Iterable[str], *rows: dict) -> int:
1579 """Delete one or more rows from a table.
1581 Parameters
1582 ----------
1583 table : `sqlalchemy.schema.Table`
1584 Table that rows should be deleted from.
1585 columns: `~collections.abc.Iterable` of `str`
1586 The names of columns that will be used to constrain the rows to
1587 be deleted; these will be combined via ``AND`` to form the
1588 ``WHERE`` clause of the delete query.
1589 *rows
1590 Positional arguments are the keys of rows to be deleted, as
1591 dictionaries mapping column name to value. The keys in all
1592 dictionaries must be exactly the names in ``columns``.
1594 Returns
1595 -------
1596 count : `int`
1597 Number of rows deleted.
1599 Raises
1600 ------
1601 ReadOnlyDatabaseError
1602 Raised if `isWriteable` returns `False` when this method is called.
1604 Notes
1605 -----
1606 May be used inside transaction contexts, so implementations may not
1607 perform operations that interrupt transactions.
1609 The default implementation should be sufficient for most derived
1610 classes.
1611 """
1612 self.assertTableWriteable(table, f"Cannot delete from read-only table {table}.")
1613 if columns and not rows:
1614 # If there are no columns, this operation is supposed to delete
1615 # everything (so we proceed as usual). But if there are columns,
1616 # but no rows, it was a constrained bulk operation where the
1617 # constraint is that no rows match, and we should short-circuit
1618 # while reporting that no rows were affected.
1619 return 0
1620 sql = table.delete()
1621 columns = list(columns) # Force iterators to list
1623 # More efficient to use IN operator if there is only one
1624 # variable changing across all rows.
1625 content: dict[str, set] = defaultdict(set)
1626 if len(columns) == 1:
1627 # Nothing to calculate since we can always use IN
1628 column = columns[0]
1629 changing_columns = [column]
1630 content[column] = {row[column] for row in rows}
1631 else:
1632 for row in rows:
1633 for k, v in row.items():
1634 content[k].add(v)
1635 changing_columns = [col for col, values in content.items() if len(values) > 1]
1637 if len(changing_columns) != 1:
1638 # More than one column changes each time so do explicit bind
1639 # parameters and have each row processed separately.
1640 whereTerms = [table.columns[name] == sqlalchemy.sql.bindparam(name) for name in columns]
1641 if whereTerms:
1642 sql = sql.where(sqlalchemy.sql.and_(*whereTerms))
1643 with self._transaction() as (_, connection):
1644 return connection.execute(sql, rows).rowcount
1645 else:
1646 # One of the columns has changing values but any others are
1647 # fixed. In this case we can use an IN operator and be more
1648 # efficient.
1649 name = changing_columns.pop()
1651 # Simple where clause for the unchanging columns
1652 clauses = []
1653 for k, v in content.items():
1654 if k == name:
1655 continue
1656 column = table.columns[k]
1657 # The set only has one element
1658 clauses.append(column == v.pop())
1660 # The IN operator will not work for "infinite" numbers of
1661 # rows so must batch it up into distinct calls.
1662 in_content = list(content[name])
1663 n_elements = len(in_content)
1665 rowcount = 0
1666 iposn = 0
1667 n_per_loop = 1_000 # Controls how many items to put in IN clause
1668 with self._transaction() as (_, connection):
1669 for iposn in range(0, n_elements, n_per_loop):
1670 endpos = iposn + n_per_loop
1671 in_clause = table.columns[name].in_(in_content[iposn:endpos])
1673 newsql = sql.where(sqlalchemy.sql.and_(*clauses, in_clause))
1674 rowcount += connection.execute(newsql).rowcount
1675 return rowcount
1677 def deleteWhere(self, table: sqlalchemy.schema.Table, where: sqlalchemy.sql.ColumnElement) -> int:
1678 """Delete rows from a table with pre-constructed WHERE clause.
1680 Parameters
1681 ----------
1682 table : `sqlalchemy.schema.Table`
1683 Table that rows should be deleted from.
1684 where: `sqlalchemy.sql.ClauseElement`
1685 The names of columns that will be used to constrain the rows to
1686 be deleted; these will be combined via ``AND`` to form the
1687 ``WHERE`` clause of the delete query.
1689 Returns
1690 -------
1691 count : `int`
1692 Number of rows deleted.
1694 Raises
1695 ------
1696 ReadOnlyDatabaseError
1697 Raised if `isWriteable` returns `False` when this method is called.
1699 Notes
1700 -----
1701 May be used inside transaction contexts, so implementations may not
1702 perform operations that interrupt transactions.
1704 The default implementation should be sufficient for most derived
1705 classes.
1706 """
1707 self.assertTableWriteable(table, f"Cannot delete from read-only table {table}.")
1709 sql = table.delete().where(where)
1710 with self._transaction() as (_, connection):
1711 return connection.execute(sql).rowcount
1713 def update(self, table: sqlalchemy.schema.Table, where: dict[str, str], *rows: dict) -> int:
1714 """Update one or more rows in a table.
1716 Parameters
1717 ----------
1718 table : `sqlalchemy.schema.Table`
1719 Table containing the rows to be updated.
1720 where : `dict` [`str`, `str`]
1721 A mapping from the names of columns that will be used to search for
1722 existing rows to the keys that will hold these values in the
1723 ``rows`` dictionaries. Note that these may not be the same due to
1724 SQLAlchemy limitations.
1725 *rows
1726 Positional arguments are the rows to be updated. The keys in all
1727 dictionaries must be the same, and may correspond to either a
1728 value in the ``where`` dictionary or the name of a column to be
1729 updated.
1731 Returns
1732 -------
1733 count : `int`
1734 Number of rows matched (regardless of whether the update actually
1735 modified them).
1737 Raises
1738 ------
1739 ReadOnlyDatabaseError
1740 Raised if `isWriteable` returns `False` when this method is called.
1742 Notes
1743 -----
1744 May be used inside transaction contexts, so implementations may not
1745 perform operations that interrupt transactions.
1747 The default implementation should be sufficient for most derived
1748 classes.
1749 """
1750 self.assertTableWriteable(table, f"Cannot update read-only table {table}.")
1751 if not rows:
1752 return 0
1753 sql = table.update().where(
1754 sqlalchemy.sql.and_(*[table.columns[k] == sqlalchemy.sql.bindparam(v) for k, v in where.items()])
1755 )
1756 with self._transaction() as (_, connection):
1757 return connection.execute(sql, rows).rowcount
1759 @contextmanager
1760 def query(
1761 self,
1762 sql: sqlalchemy.sql.expression.Executable | sqlalchemy.sql.expression.SelectBase,
1763 *args: Any,
1764 **kwargs: Any,
1765 ) -> Iterator[sqlalchemy.engine.CursorResult]:
1766 """Run a SELECT query against the database.
1768 Parameters
1769 ----------
1770 sql : `sqlalchemy.sql.expression.SelectBase`
1771 A SQLAlchemy representation of a ``SELECT`` query.
1772 *args
1773 Additional positional arguments are forwarded to
1774 `sqlalchemy.engine.Connection.execute`.
1775 **kwargs
1776 Additional keyword arguments are forwarded to
1777 `sqlalchemy.engine.Connection.execute`.
1779 Returns
1780 -------
1781 result_context : `sqlalchemy.engine.CursorResults`
1782 Context manager that returns the query result object when entered.
1783 These results are invalidated when the context is exited.
1784 """
1785 if self._session_connection is None:
1786 connection = self._engine.connect()
1787 else:
1788 connection = self._session_connection
1789 # TODO: SelectBase is not good for execute(), but it used everywhere,
1790 # e.g. in daf_relation. We should switch to Executable at some point.
1791 result = connection.execute(cast(sqlalchemy.sql.expression.Executable, sql), *args, **kwargs)
1792 try:
1793 yield result
1794 finally:
1795 if connection is not self._session_connection:
1796 connection.close()
1798 @abstractmethod
1799 def constant_rows(
1800 self,
1801 fields: NamedValueAbstractSet[ddl.FieldSpec],
1802 *rows: dict,
1803 name: str | None = None,
1804 ) -> sqlalchemy.sql.FromClause:
1805 """Return a SQLAlchemy object that represents a small number of
1806 constant-valued rows.
1808 Parameters
1809 ----------
1810 fields : `NamedValueAbstractSet` [ `ddl.FieldSpec` ]
1811 The columns of the rows. Unique and foreign key constraints are
1812 ignored.
1813 *rows : `dict`
1814 Values for the rows.
1815 name : `str`, optional
1816 If provided, the name of the SQL construct. If not provided, an
1817 opaque but unique identifier is generated.
1819 Returns
1820 -------
1821 from_clause : `sqlalchemy.sql.FromClause`
1822 SQLAlchemy object representing the given rows. This is guaranteed
1823 to be something that can be directly joined into a ``SELECT``
1824 query's ``FROM`` clause, and will not involve a temporary table
1825 that needs to be cleaned up later.
1827 Notes
1828 -----
1829 The default implementation uses the SQL-standard ``VALUES`` construct,
1830 but support for that construct is varied enough across popular RDBMSs
1831 that the method is still marked abstract to force explicit opt-in via
1832 delegation to `super`.
1833 """
1834 if name is None:
1835 name = f"tmp_{uuid.uuid4().hex}"
1836 return sqlalchemy.sql.values(
1837 *[sqlalchemy.Column(field.name, field.getSizedColumnType()) for field in fields],
1838 name=name,
1839 ).data([tuple(row[name] for name in fields.names) for row in rows])
1841 def get_constant_rows_max(self) -> int:
1842 """Return the maximum number of rows that should be passed to
1843 `constant_rows` for this backend.
1845 Returns
1846 -------
1847 max : `int`
1848 Maximum number of rows.
1850 Notes
1851 -----
1852 This should reflect typical performance profiles (or a guess at these),
1853 not just hard database engine limits.
1854 """
1855 return 100
1857 origin: int
1858 """An integer ID that should be used as the default for any datasets,
1859 quanta, or other entities that use a (autoincrement, origin) compound
1860 primary key (`int`).
1861 """
1863 namespace: str | None
1864 """The schema or namespace this database instance is associated with
1865 (`str` or `None`).
1866 """