Coverage for python/lsst/daf/butler/registry/interfaces/_database.py: 22%
412 statements
« prev ^ index » next coverage.py v7.3.1, created at 2023-10-02 08:00 +0000
« prev ^ index » next coverage.py v7.3.1, created at 2023-10-02 08:00 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
27from __future__ import annotations
29__all__ = [
30 "Database",
31 "ReadOnlyDatabaseError",
32 "DatabaseConflictError",
33 "DatabaseInsertMode",
34 "SchemaAlreadyDefinedError",
35 "StaticTablesContext",
36]
38import enum
39import uuid
40import warnings
41from abc import ABC, abstractmethod
42from collections import defaultdict
43from collections.abc import Callable, Iterable, Iterator, Sequence
44from contextlib import contextmanager
45from typing import Any, cast, final
47import astropy.time
48import sqlalchemy
50from ...core import TimespanDatabaseRepresentation, ddl, time_utils
51from ...core.named import NamedValueAbstractSet
52from .._exceptions import ConflictingDefinitionError
55class DatabaseInsertMode(enum.Enum):
56 """Mode options available for inserting database records."""
58 INSERT = enum.auto()
59 """Insert records, failing if they already exist."""
61 REPLACE = enum.auto()
62 """Replace records, overwriting existing."""
64 ENSURE = enum.auto()
65 """Insert records, skipping any that already exist."""
68# TODO: method is called with list[ReflectedColumn] in SA 2, and
69# ReflectedColumn does not exist in 1.4.
70def _checkExistingTableDefinition(name: str, spec: ddl.TableSpec, inspection: list) -> None:
71 """Test that the definition of a table in a `ddl.TableSpec` and from
72 database introspection are consistent.
74 Parameters
75 ----------
76 name : `str`
77 Name of the table (only used in error messages).
78 spec : `ddl.TableSpec`
79 Specification of the table.
80 inspection : `dict`
81 Dictionary returned by
82 `sqlalchemy.engine.reflection.Inspector.get_columns`.
84 Raises
85 ------
86 DatabaseConflictError
87 Raised if the definitions are inconsistent.
88 """
89 columnNames = [c["name"] for c in inspection]
90 if spec.fields.names != set(columnNames):
91 raise DatabaseConflictError(
92 f"Table '{name}' exists but is defined differently in the database; "
93 f"specification has columns {list(spec.fields.names)}, while the "
94 f"table in the database has {columnNames}."
95 )
98class ReadOnlyDatabaseError(RuntimeError):
99 """Exception raised when a write operation is called on a read-only
100 `Database`.
101 """
104class DatabaseConflictError(ConflictingDefinitionError):
105 """Exception raised when database content (row values or schema entities)
106 are inconsistent with what this client expects.
107 """
110class SchemaAlreadyDefinedError(RuntimeError):
111 """Exception raised when trying to initialize database schema when some
112 tables already exist.
113 """
116class StaticTablesContext:
117 """Helper class used to declare the static schema for a registry layer
118 in a database.
120 An instance of this class is returned by `Database.declareStaticTables`,
121 which should be the only way it should be constructed.
122 """
124 def __init__(self, db: Database, connection: sqlalchemy.engine.Connection):
125 self._db = db
126 self._foreignKeys: list[tuple[sqlalchemy.schema.Table, sqlalchemy.schema.ForeignKeyConstraint]] = []
127 self._inspector = sqlalchemy.inspect(connection)
128 self._tableNames = frozenset(self._inspector.get_table_names(schema=self._db.namespace))
129 self._initializers: list[Callable[[Database], None]] = []
131 def addTable(self, name: str, spec: ddl.TableSpec) -> sqlalchemy.schema.Table:
132 """Add a new table to the schema, returning its sqlalchemy
133 representation.
135 The new table may not actually be created until the end of the
136 context created by `Database.declareStaticTables`, allowing tables
137 to be declared in any order even in the presence of foreign key
138 relationships.
139 """
140 name = self._db._mangleTableName(name)
141 if name in self._tableNames:
142 _checkExistingTableDefinition(
143 name, spec, self._inspector.get_columns(name, schema=self._db.namespace)
144 )
145 metadata = self._db._metadata
146 assert metadata is not None, "Guaranteed by context manager that returns this object."
147 table = self._db._convertTableSpec(name, spec, metadata)
148 for foreignKeySpec in spec.foreignKeys:
149 self._foreignKeys.append((table, self._db._convertForeignKeySpec(name, foreignKeySpec, metadata)))
150 return table
152 def addTableTuple(self, specs: tuple[ddl.TableSpec, ...]) -> tuple[sqlalchemy.schema.Table, ...]:
153 """Add a named tuple of tables to the schema, returning their
154 SQLAlchemy representations in a named tuple of the same type.
156 The new tables may not actually be created until the end of the
157 context created by `Database.declareStaticTables`, allowing tables
158 to be declared in any order even in the presence of foreign key
159 relationships.
161 Notes
162 -----
163 ``specs`` *must* be an instance of a type created by
164 `collections.namedtuple`, not just regular tuple, and the returned
165 object is guaranteed to be the same. Because `~collections.namedtuple`
166 is just a factory for `type` objects, not an actual type itself,
167 we cannot represent this with type annotations.
168 """
169 return specs._make( # type: ignore
170 self.addTable(name, spec) for name, spec in zip(specs._fields, specs, strict=True) # type: ignore
171 )
173 def addInitializer(self, initializer: Callable[[Database], None]) -> None:
174 """Add a method that does one-time initialization of a database.
176 Initialization can mean anything that changes state of a database
177 and needs to be done exactly once after database schema was created.
178 An example for that could be population of schema attributes.
180 Parameters
181 ----------
182 initializer : callable
183 Method of a single argument which is a `Database` instance.
184 """
185 self._initializers.append(initializer)
188class Database(ABC):
189 """An abstract interface that represents a particular database engine's
190 representation of a single schema/namespace/database.
192 Parameters
193 ----------
194 origin : `int`
195 An integer ID that should be used as the default for any datasets,
196 quanta, or other entities that use a (autoincrement, origin) compound
197 primary key.
198 engine : `sqlalchemy.engine.Engine`
199 The SQLAlchemy engine for this `Database`.
200 namespace : `str`, optional
201 Name of the schema or namespace this instance is associated with.
202 This is passed as the ``schema`` argument when constructing a
203 `sqlalchemy.schema.MetaData` instance. We use ``namespace`` instead to
204 avoid confusion between "schema means namespace" and "schema means
205 table definitions".
207 Notes
208 -----
209 `Database` requires all write operations to go through its special named
210 methods. Our write patterns are sufficiently simple that we don't really
211 need the full flexibility of SQL insert/update/delete syntax, and we need
212 non-standard (but common) functionality in these operations sufficiently
213 often that it seems worthwhile to provide our own generic API.
215 In contrast, `Database.query` allows arbitrary ``SELECT`` queries (via
216 their SQLAlchemy representation) to be run, as we expect these to require
217 significantly more sophistication while still being limited to standard
218 SQL.
220 `Database` itself has several underscore-prefixed attributes:
222 - ``_engine``: SQLAlchemy object representing its engine.
223 - ``_connection``: method returning a context manager for
224 `sqlalchemy.engine.Connection` object.
225 - ``_metadata``: the `sqlalchemy.schema.MetaData` object representing
226 the tables and other schema entities.
228 These are considered protected (derived classes may access them, but other
229 code should not), and read-only, aside from executing SQL via
230 ``_connection``.
231 """
233 def __init__(self, *, origin: int, engine: sqlalchemy.engine.Engine, namespace: str | None = None):
234 self.origin = origin
235 self.namespace = namespace
236 self._engine = engine
237 self._session_connection: sqlalchemy.engine.Connection | None = None
238 self._metadata: sqlalchemy.schema.MetaData | None = None
239 self._temp_tables: set[str] = set()
241 def __repr__(self) -> str:
242 # Rather than try to reproduce all the parameters used to create
243 # the object, instead report the more useful information of the
244 # connection URL.
245 if self._engine.url.password is not None:
246 uri = str(self._engine.url.set(password="***"))
247 else:
248 uri = str(self._engine.url)
249 if self.namespace:
250 uri += f"#{self.namespace}"
251 return f'{type(self).__name__}("{uri}")'
253 @classmethod
254 def makeDefaultUri(cls, root: str) -> str | None:
255 """Create a default connection URI appropriate for the given root
256 directory, or `None` if there can be no such default.
257 """
258 return None
260 @classmethod
261 def fromUri(
262 cls,
263 uri: str | sqlalchemy.engine.URL,
264 *,
265 origin: int,
266 namespace: str | None = None,
267 writeable: bool = True,
268 ) -> Database:
269 """Construct a database from a SQLAlchemy URI.
271 Parameters
272 ----------
273 uri : `str` or `sqlalchemy.engine.URL`
274 A SQLAlchemy URI connection string.
275 origin : `int`
276 An integer ID that should be used as the default for any datasets,
277 quanta, or other entities that use a (autoincrement, origin)
278 compound primary key.
279 namespace : `str`, optional
280 A database namespace (i.e. schema) the new instance should be
281 associated with. If `None` (default), the namespace (if any) is
282 inferred from the URI.
283 writeable : `bool`, optional
284 If `True`, allow write operations on the database, including
285 ``CREATE TABLE``.
287 Returns
288 -------
289 db : `Database`
290 A new `Database` instance.
291 """
292 return cls.fromEngine(
293 cls.makeEngine(uri, writeable=writeable), origin=origin, namespace=namespace, writeable=writeable
294 )
296 @classmethod
297 @abstractmethod
298 def makeEngine(
299 cls, uri: str | sqlalchemy.engine.URL, *, writeable: bool = True
300 ) -> sqlalchemy.engine.Engine:
301 """Create a `sqlalchemy.engine.Engine` from a SQLAlchemy URI.
303 Parameters
304 ----------
305 uri : `str` or `sqlalchemy.engine.URL`
306 A SQLAlchemy URI connection string.
307 writeable : `bool`, optional
308 If `True`, allow write operations on the database, including
309 ``CREATE TABLE``.
311 Returns
312 -------
313 engine : `sqlalchemy.engine.Engine`
314 A database engine.
316 Notes
317 -----
318 Subclasses that support other ways to connect to a database are
319 encouraged to add optional arguments to their implementation of this
320 method, as long as they maintain compatibility with the base class
321 call signature.
322 """
323 raise NotImplementedError()
325 @classmethod
326 @abstractmethod
327 def fromEngine(
328 cls,
329 engine: sqlalchemy.engine.Engine,
330 *,
331 origin: int,
332 namespace: str | None = None,
333 writeable: bool = True,
334 ) -> Database:
335 """Create a new `Database` from an existing `sqlalchemy.engine.Engine`.
337 Parameters
338 ----------
339 engine : `sqlalchemy.engine.Engine`
340 The engine for the database. May be shared between `Database`
341 instances.
342 origin : `int`
343 An integer ID that should be used as the default for any datasets,
344 quanta, or other entities that use a (autoincrement, origin)
345 compound primary key.
346 namespace : `str`, optional
347 A different database namespace (i.e. schema) the new instance
348 should be associated with. If `None` (default), the namespace
349 (if any) is inferred from the connection.
350 writeable : `bool`, optional
351 If `True`, allow write operations on the database, including
352 ``CREATE TABLE``.
354 Returns
355 -------
356 db : `Database`
357 A new `Database` instance.
359 Notes
360 -----
361 This method allows different `Database` instances to share the same
362 engine, which is desirable when they represent different namespaces
363 can be queried together.
364 """
365 raise NotImplementedError()
367 @final
368 @contextmanager
369 def session(self) -> Iterator[None]:
370 """Return a context manager that represents a session (persistent
371 connection to a database).
373 Returns
374 -------
375 context : `AbstractContextManager` [ `None` ]
376 A context manager that does not return a value when entered.
378 Notes
379 -----
380 This method should be used when a sequence of read-only SQL operations
381 will be performed in rapid succession *without* a requirement that they
382 yield consistent results in the presence of concurrent writes (or, more
383 rarely, when conflicting concurrent writes are rare/impossible and the
384 session will be open long enough that a transaction is inadvisable).
385 """
386 with self._session():
387 yield
389 @final
390 @contextmanager
391 def transaction(
392 self,
393 *,
394 interrupting: bool = False,
395 savepoint: bool = False,
396 lock: Iterable[sqlalchemy.schema.Table] = (),
397 for_temp_tables: bool = False,
398 ) -> Iterator[None]:
399 """Return a context manager that represents a transaction.
401 Parameters
402 ----------
403 interrupting : `bool`, optional
404 If `True` (`False` is default), this transaction block may not be
405 nested without an outer one, and attempting to do so is a logic
406 (i.e. assertion) error.
407 savepoint : `bool`, optional
408 If `True` (`False` is default), create a `SAVEPOINT`, allowing
409 exceptions raised by the database (e.g. due to constraint
410 violations) during this transaction's context to be caught outside
411 it without also rolling back all operations in an outer transaction
412 block. If `False`, transactions may still be nested, but a
413 rollback may be generated at any level and affects all levels, and
414 commits are deferred until the outermost block completes. If any
415 outer transaction block was created with ``savepoint=True``, all
416 inner blocks will be as well (regardless of the actual value
417 passed). This has no effect if this is the outermost transaction.
418 lock : `~collections.abc.Iterable` [ `sqlalchemy.schema.Table` ], \
419 optional
420 A list of tables to lock for the duration of this transaction.
421 These locks are guaranteed to prevent concurrent writes and allow
422 this transaction (only) to acquire the same locks (others should
423 block), but only prevent concurrent reads if the database engine
424 requires that in order to block concurrent writes.
425 for_temp_tables : `bool`, optional
426 If `True`, this transaction may involve creating temporary tables.
428 Returns
429 -------
430 context : `AbstractContextManager` [ `None` ]
431 A context manager that commits the transaction when it is exited
432 without error and rolls back the transactoin when it is exited via
433 an exception.
435 Notes
436 -----
437 All transactions on a connection managed by one or more `Database`
438 instances _must_ go through this method, or transaction state will not
439 be correctly managed.
440 """
441 with self._transaction(
442 interrupting=interrupting, savepoint=savepoint, lock=lock, for_temp_tables=for_temp_tables
443 ):
444 yield
446 @contextmanager
447 def temporary_table(
448 self, spec: ddl.TableSpec, name: str | None = None
449 ) -> Iterator[sqlalchemy.schema.Table]:
450 """Return a context manager that creates and then drops a temporary
451 table.
453 Parameters
454 ----------
455 spec : `ddl.TableSpec`
456 Specification for the columns. Unique and foreign key constraints
457 may be ignored.
458 name : `str`, optional
459 If provided, the name of the SQL construct. If not provided, an
460 opaque but unique identifier is generated.
462 Returns
463 -------
464 context : `AbstractContextManager` [ `sqlalchemy.schema.Table` ]
465 A context manager that returns a SQLAlchemy representation of the
466 temporary table when entered.
468 Notes
469 -----
470 Temporary tables may be created, dropped, and written to even in
471 read-only databases - at least according to the Python-level
472 protections in the `Database` classes. Server permissions may say
473 otherwise, but in that case they probably need to be modified to
474 support the full range of expected read-only butler behavior.
475 """
476 with self._session() as connection:
477 table = self._make_temporary_table(connection, spec=spec, name=name)
478 self._temp_tables.add(table.key)
479 try:
480 yield table
481 finally:
482 with self._transaction():
483 table.drop(connection)
484 self._temp_tables.remove(table.key)
486 @contextmanager
487 def _session(self) -> Iterator[sqlalchemy.engine.Connection]:
488 """Protected implementation for `session` that actually returns the
489 connection.
491 This method is for internal `Database` calls that need the actual
492 SQLAlchemy connection object. It should be overridden by subclasses
493 instead of `session` itself.
495 Returns
496 -------
497 context : `AbstractContextManager` [ `sqlalchemy.engine.Connection` ]
498 A context manager that returns a SQLALchemy connection when
499 entered.
501 """
502 if self._session_connection is not None:
503 # session already started, just reuse that
504 yield self._session_connection
505 else:
506 try:
507 # open new connection and close it when done
508 self._session_connection = self._engine.connect()
509 yield self._session_connection
510 finally:
511 if self._session_connection is not None:
512 self._session_connection.close()
513 self._session_connection = None
514 # Temporary tables only live within session
515 self._temp_tables = set()
517 @contextmanager
518 def _transaction(
519 self,
520 *,
521 interrupting: bool = False,
522 savepoint: bool = False,
523 lock: Iterable[sqlalchemy.schema.Table] = (),
524 for_temp_tables: bool = False,
525 ) -> Iterator[tuple[bool, sqlalchemy.engine.Connection]]:
526 """Protected implementation for `transaction` that actually returns the
527 connection and whether this is a new outermost transaction.
529 This method is for internal `Database` calls that need the actual
530 SQLAlchemy connection object. It should be overridden by subclasses
531 instead of `transaction` itself.
533 Parameters
534 ----------
535 interrupting : `bool`, optional
536 If `True` (`False` is default), this transaction block may not be
537 nested without an outer one, and attempting to do so is a logic
538 (i.e. assertion) error.
539 savepoint : `bool`, optional
540 If `True` (`False` is default), create a `SAVEPOINT`, allowing
541 exceptions raised by the database (e.g. due to constraint
542 violations) during this transaction's context to be caught outside
543 it without also rolling back all operations in an outer transaction
544 block. If `False`, transactions may still be nested, but a
545 rollback may be generated at any level and affects all levels, and
546 commits are deferred until the outermost block completes. If any
547 outer transaction block was created with ``savepoint=True``, all
548 inner blocks will be as well (regardless of the actual value
549 passed). This has no effect if this is the outermost transaction.
550 lock : `~collections.abc.Iterable` [ `sqlalchemy.schema.Table` ], \
551 optional
552 A list of tables to lock for the duration of this transaction.
553 These locks are guaranteed to prevent concurrent writes and allow
554 this transaction (only) to acquire the same locks (others should
555 block), but only prevent concurrent reads if the database engine
556 requires that in order to block concurrent writes.
557 for_temp_tables : `bool`, optional
558 If `True`, this transaction may involve creating temporary tables.
560 Returns
561 -------
562 context : `AbstractContextManager` [ `tuple` [ `bool`,
563 `sqlalchemy.engine.Connection` ] ]
564 A context manager that commits the transaction when it is exited
565 without error and rolls back the transactoin when it is exited via
566 an exception. When entered, it returns a tuple of:
568 - ``is_new`` (`bool`): whether this is a new (outermost)
569 transaction;
570 - ``connection`` (`sqlalchemy.engine.Connection`): the connection.
571 """
572 with self._session() as connection:
573 already_in_transaction = connection.in_transaction()
574 assert not (interrupting and already_in_transaction), (
575 "Logic error in transaction nesting: an operation that would "
576 "interrupt the active transaction context has been requested."
577 )
578 savepoint = savepoint or connection.in_nested_transaction()
579 trans: sqlalchemy.engine.Transaction | None
580 if already_in_transaction:
581 if savepoint:
582 trans = connection.begin_nested()
583 else:
584 # Nested non-savepoint transactions don't do anything.
585 trans = None
586 else:
587 # Use a regular (non-savepoint) transaction always for the
588 # outermost context.
589 trans = connection.begin()
590 self._lockTables(connection, lock)
591 try:
592 yield not already_in_transaction, connection
593 if trans is not None:
594 trans.commit()
595 except BaseException:
596 if trans is not None:
597 trans.rollback()
598 raise
600 @abstractmethod
601 def _lockTables(
602 self, connection: sqlalchemy.engine.Connection, tables: Iterable[sqlalchemy.schema.Table] = ()
603 ) -> None:
604 """Acquire locks on the given tables.
606 This is an implementation hook for subclasses, called by `transaction`.
607 It should not be called directly by other code.
609 Parameters
610 ----------
611 connection : `sqlalchemy.engine.Connection`
612 Database connection object. It is guaranteed that transaction is
613 already in a progress for this connection.
614 tables : `~collections.abc.Iterable` [ `sqlalchemy.schema.Table` ], \
615 optional
616 A list of tables to lock for the duration of this transaction.
617 These locks are guaranteed to prevent concurrent writes and allow
618 this transaction (only) to acquire the same locks (others should
619 block), but only prevent concurrent reads if the database engine
620 requires that in order to block concurrent writes.
621 """
622 raise NotImplementedError()
624 def isTableWriteable(self, table: sqlalchemy.schema.Table) -> bool:
625 """Check whether a table is writeable, either because the database
626 connection is read-write or the table is a temporary table.
628 Parameters
629 ----------
630 table : `sqlalchemy.schema.Table`
631 SQLAlchemy table object to check.
633 Returns
634 -------
635 writeable : `bool`
636 Whether this table is writeable.
637 """
638 return self.isWriteable() or table.key in self._temp_tables
640 def assertTableWriteable(self, table: sqlalchemy.schema.Table, msg: str) -> None:
641 """Raise if the given table is not writeable, either because the
642 database connection is read-write or the table is a temporary table.
644 Parameters
645 ----------
646 table : `sqlalchemy.schema.Table`
647 SQLAlchemy table object to check.
648 msg : `str`, optional
649 If provided, raise `ReadOnlyDatabaseError` instead of returning
650 `False`, with this message.
651 """
652 if not self.isTableWriteable(table):
653 raise ReadOnlyDatabaseError(msg)
655 @contextmanager
656 def declareStaticTables(self, *, create: bool) -> Iterator[StaticTablesContext]:
657 """Return a context manager in which the database's static DDL schema
658 can be declared.
660 Parameters
661 ----------
662 create : `bool`
663 If `True`, attempt to create all tables at the end of the context.
664 If `False`, they will be assumed to already exist.
666 Returns
667 -------
668 schema : `StaticTablesContext`
669 A helper object that is used to add new tables.
671 Raises
672 ------
673 ReadOnlyDatabaseError
674 Raised if ``create`` is `True`, `Database.isWriteable` is `False`,
675 and one or more declared tables do not already exist.
677 Examples
678 --------
679 Given a `Database` instance ``db``::
681 with db.declareStaticTables(create=True) as schema:
682 schema.addTable("table1", TableSpec(...))
683 schema.addTable("table2", TableSpec(...))
685 Notes
686 -----
687 A database's static DDL schema must be declared before any dynamic
688 tables are managed via calls to `ensureTableExists` or
689 `getExistingTable`. The order in which static schema tables are added
690 inside the context block is unimportant; they will automatically be
691 sorted and added in an order consistent with their foreign key
692 relationships.
693 """
694 if create and not self.isWriteable():
695 raise ReadOnlyDatabaseError(f"Cannot create tables in read-only database {self}.")
696 self._metadata = sqlalchemy.MetaData(schema=self.namespace)
697 try:
698 with self._transaction() as (_, connection):
699 context = StaticTablesContext(self, connection)
700 if create and context._tableNames:
701 # Looks like database is already initalized, to avoid
702 # danger of modifying/destroying valid schema we refuse to
703 # do anything in this case
704 raise SchemaAlreadyDefinedError(f"Cannot create tables in non-empty database {self}.")
705 yield context
706 for table, foreignKey in context._foreignKeys:
707 table.append_constraint(foreignKey)
708 if create:
709 if (
710 self.namespace is not None
711 and self.namespace not in context._inspector.get_schema_names()
712 ):
713 connection.execute(sqlalchemy.schema.CreateSchema(self.namespace))
714 # In our tables we have columns that make use of sqlalchemy
715 # Sequence objects. There is currently a bug in sqlalchemy
716 # that causes a deprecation warning to be thrown on a
717 # property of the Sequence object when the repr for the
718 # sequence is created. Here a filter is used to catch these
719 # deprecation warnings when tables are created.
720 with warnings.catch_warnings():
721 warnings.simplefilter("ignore", category=sqlalchemy.exc.SADeprecationWarning)
722 self._metadata.create_all(connection)
723 # call all initializer methods sequentially
724 for init in context._initializers:
725 init(self)
726 except BaseException:
727 self._metadata = None
728 raise
730 @abstractmethod
731 def isWriteable(self) -> bool:
732 """Return `True` if this database can be modified by this client."""
733 raise NotImplementedError()
735 @abstractmethod
736 def __str__(self) -> str:
737 """Return a human-readable identifier for this `Database`, including
738 any namespace or schema that identifies its names within a `Registry`.
739 """
740 raise NotImplementedError()
742 @property
743 def dialect(self) -> sqlalchemy.engine.Dialect:
744 """The SQLAlchemy dialect for this database engine
745 (`sqlalchemy.engine.Dialect`).
746 """
747 return self._engine.dialect
749 def shrinkDatabaseEntityName(self, original: str) -> str:
750 """Return a version of the given name that fits within this database
751 engine's length limits for table, constraint, indexes, and sequence
752 names.
754 Implementations should not assume that simple truncation is safe,
755 because multiple long names often begin with the same prefix.
757 The default implementation simply returns the given name.
759 Parameters
760 ----------
761 original : `str`
762 The original name.
764 Returns
765 -------
766 shrunk : `str`
767 The new, possibly shortened name.
768 """
769 return original
771 def expandDatabaseEntityName(self, shrunk: str) -> str:
772 """Retrieve the original name for a database entity that was too long
773 to fit within the database engine's limits.
775 Parameters
776 ----------
777 original : `str`
778 The original name.
780 Returns
781 -------
782 shrunk : `str`
783 The new, possibly shortened name.
784 """
785 return shrunk
787 def _mangleTableName(self, name: str) -> str:
788 """Map a logical, user-visible table name to the true table name used
789 in the database.
791 The default implementation returns the given name unchanged.
793 Parameters
794 ----------
795 name : `str`
796 Input table name. Should not include a namespace (i.e. schema)
797 prefix.
799 Returns
800 -------
801 mangled : `str`
802 Mangled version of the table name (still with no namespace prefix).
804 Notes
805 -----
806 Reimplementations of this method must be idempotent - mangling an
807 already-mangled name must have no effect.
808 """
809 return name
811 def _makeColumnConstraints(self, table: str, spec: ddl.FieldSpec) -> list[sqlalchemy.CheckConstraint]:
812 """Create constraints based on this spec.
814 Parameters
815 ----------
816 table : `str`
817 Name of the table this column is being added to.
818 spec : `FieldSpec`
819 Specification for the field to be added.
821 Returns
822 -------
823 constraint : `list` of `sqlalchemy.CheckConstraint`
824 Constraint added for this column.
825 """
826 # By default we return no additional constraints
827 return []
829 def _convertFieldSpec(
830 self, table: str, spec: ddl.FieldSpec, metadata: sqlalchemy.MetaData, **kwargs: Any
831 ) -> sqlalchemy.schema.Column:
832 """Convert a `FieldSpec` to a `sqlalchemy.schema.Column`.
834 Parameters
835 ----------
836 table : `str`
837 Name of the table this column is being added to.
838 spec : `FieldSpec`
839 Specification for the field to be added.
840 metadata : `sqlalchemy.MetaData`
841 SQLAlchemy representation of the DDL schema this field's table is
842 being added to.
843 **kwargs
844 Additional keyword arguments to forward to the
845 `sqlalchemy.schema.Column` constructor. This is provided to make
846 it easier for derived classes to delegate to ``super()`` while
847 making only minor changes.
849 Returns
850 -------
851 column : `sqlalchemy.schema.Column`
852 SQLAlchemy representation of the field.
853 """
854 args = []
855 if spec.autoincrement:
856 # Generate a sequence to use for auto incrementing for databases
857 # that do not support it natively. This will be ignored by
858 # sqlalchemy for databases that do support it.
859 args.append(
860 sqlalchemy.Sequence(
861 self.shrinkDatabaseEntityName(f"{table}_seq_{spec.name}"), metadata=metadata
862 )
863 )
864 assert spec.doc is None or isinstance(spec.doc, str), f"Bad doc for {table}.{spec.name}."
865 return sqlalchemy.schema.Column(
866 spec.name,
867 spec.getSizedColumnType(),
868 *args,
869 nullable=spec.nullable,
870 primary_key=spec.primaryKey,
871 comment=spec.doc,
872 server_default=spec.default,
873 **kwargs,
874 )
876 def _convertForeignKeySpec(
877 self, table: str, spec: ddl.ForeignKeySpec, metadata: sqlalchemy.MetaData, **kwargs: Any
878 ) -> sqlalchemy.schema.ForeignKeyConstraint:
879 """Convert a `ForeignKeySpec` to a
880 `sqlalchemy.schema.ForeignKeyConstraint`.
882 Parameters
883 ----------
884 table : `str`
885 Name of the table this foreign key is being added to.
886 spec : `ForeignKeySpec`
887 Specification for the foreign key to be added.
888 metadata : `sqlalchemy.MetaData`
889 SQLAlchemy representation of the DDL schema this constraint is
890 being added to.
891 **kwargs
892 Additional keyword arguments to forward to the
893 `sqlalchemy.schema.ForeignKeyConstraint` constructor. This is
894 provided to make it easier for derived classes to delegate to
895 ``super()`` while making only minor changes.
897 Returns
898 -------
899 constraint : `sqlalchemy.schema.ForeignKeyConstraint`
900 SQLAlchemy representation of the constraint.
901 """
902 name = self.shrinkDatabaseEntityName(
903 "_".join(
904 ["fkey", table, self._mangleTableName(spec.table)] + list(spec.target) + list(spec.source)
905 )
906 )
907 return sqlalchemy.schema.ForeignKeyConstraint(
908 spec.source,
909 [f"{self._mangleTableName(spec.table)}.{col}" for col in spec.target],
910 name=name,
911 ondelete=spec.onDelete,
912 )
914 def _convertExclusionConstraintSpec(
915 self,
916 table: str,
917 spec: tuple[str | type[TimespanDatabaseRepresentation], ...],
918 metadata: sqlalchemy.MetaData,
919 ) -> sqlalchemy.schema.Constraint:
920 """Convert a `tuple` from `ddl.TableSpec.exclusion` into a SQLAlchemy
921 constraint representation.
923 Parameters
924 ----------
925 table : `str`
926 Name of the table this constraint is being added to.
927 spec : `tuple` [ `str` or `type` ]
928 A tuple of `str` column names and the `type` object returned by
929 `getTimespanRepresentation` (which must appear exactly once),
930 indicating the order of the columns in the index used to back the
931 constraint.
932 metadata : `sqlalchemy.MetaData`
933 SQLAlchemy representation of the DDL schema this constraint is
934 being added to.
936 Returns
937 -------
938 constraint : `sqlalchemy.schema.Constraint`
939 SQLAlchemy representation of the constraint.
941 Raises
942 ------
943 NotImplementedError
944 Raised if this database does not support exclusion constraints.
945 """
946 raise NotImplementedError(f"Database {self} does not support exclusion constraints.")
948 def _convertTableSpec(
949 self, name: str, spec: ddl.TableSpec, metadata: sqlalchemy.MetaData, **kwargs: Any
950 ) -> sqlalchemy.schema.Table:
951 """Convert a `TableSpec` to a `sqlalchemy.schema.Table`.
953 Parameters
954 ----------
955 spec : `TableSpec`
956 Specification for the foreign key to be added.
957 metadata : `sqlalchemy.MetaData`
958 SQLAlchemy representation of the DDL schema this table is being
959 added to.
960 **kwargs
961 Additional keyword arguments to forward to the
962 `sqlalchemy.schema.Table` constructor. This is provided to make it
963 easier for derived classes to delegate to ``super()`` while making
964 only minor changes.
966 Returns
967 -------
968 table : `sqlalchemy.schema.Table`
969 SQLAlchemy representation of the table.
971 Notes
972 -----
973 This method does not handle ``spec.foreignKeys`` at all, in order to
974 avoid circular dependencies. These are added by higher-level logic in
975 `ensureTableExists`, `getExistingTable`, and `declareStaticTables`.
976 """
977 name = self._mangleTableName(name)
978 args: list[sqlalchemy.schema.SchemaItem] = [
979 self._convertFieldSpec(name, fieldSpec, metadata) for fieldSpec in spec.fields
980 ]
982 # Add any column constraints
983 for fieldSpec in spec.fields:
984 args.extend(self._makeColumnConstraints(name, fieldSpec))
986 # Track indexes added for primary key and unique constraints, to make
987 # sure we don't add duplicate explicit or foreign key indexes for
988 # those.
989 allIndexes = {tuple(fieldSpec.name for fieldSpec in spec.fields if fieldSpec.primaryKey)}
990 args.extend(
991 sqlalchemy.schema.UniqueConstraint(
992 *columns, name=self.shrinkDatabaseEntityName("_".join([name, "unq"] + list(columns)))
993 )
994 for columns in spec.unique
995 )
996 allIndexes.update(spec.unique)
997 args.extend(
998 sqlalchemy.schema.Index(
999 self.shrinkDatabaseEntityName("_".join([name, "idx"] + list(index.columns))),
1000 *index.columns,
1001 unique=(index.columns in spec.unique),
1002 **index.kwargs,
1003 )
1004 for index in spec.indexes
1005 if index.columns not in allIndexes
1006 )
1007 allIndexes.update(index.columns for index in spec.indexes)
1008 args.extend(
1009 sqlalchemy.schema.Index(
1010 self.shrinkDatabaseEntityName("_".join((name, "fkidx") + fk.source)),
1011 *fk.source,
1012 )
1013 for fk in spec.foreignKeys
1014 if fk.addIndex and fk.source not in allIndexes
1015 )
1017 args.extend(self._convertExclusionConstraintSpec(name, excl, metadata) for excl in spec.exclusion)
1019 assert spec.doc is None or isinstance(spec.doc, str), f"Bad doc for {name}."
1020 return sqlalchemy.schema.Table(name, metadata, *args, comment=spec.doc, info={"spec": spec}, **kwargs)
1022 def ensureTableExists(self, name: str, spec: ddl.TableSpec) -> sqlalchemy.schema.Table:
1023 """Ensure that a table with the given name and specification exists,
1024 creating it if necessary.
1026 Parameters
1027 ----------
1028 name : `str`
1029 Name of the table (not including namespace qualifiers).
1030 spec : `TableSpec`
1031 Specification for the table. This will be used when creating the
1032 table, and *may* be used when obtaining an existing table to check
1033 for consistency, but no such check is guaranteed.
1035 Returns
1036 -------
1037 table : `sqlalchemy.schema.Table`
1038 SQLAlchemy representation of the table.
1040 Raises
1041 ------
1042 ReadOnlyDatabaseError
1043 Raised if `isWriteable` returns `False`, and the table does not
1044 already exist.
1045 DatabaseConflictError
1046 Raised if the table exists but ``spec`` is inconsistent with its
1047 definition.
1049 Notes
1050 -----
1051 This method may not be called within transactions. It may be called on
1052 read-only databases if and only if the table does in fact already
1053 exist.
1055 Subclasses may override this method, but usually should not need to.
1056 """
1057 # TODO: if _engine is used to make a table then it uses separate
1058 # connection and should not interfere with current transaction
1059 assert (
1060 self._session_connection is None or not self._session_connection.in_transaction()
1061 ), "Table creation interrupts transactions."
1062 assert self._metadata is not None, "Static tables must be declared before dynamic tables."
1063 table = self.getExistingTable(name, spec)
1064 if table is not None:
1065 return table
1066 if not self.isWriteable():
1067 raise ReadOnlyDatabaseError(
1068 f"Table {name} does not exist, and cannot be created because database {self} is read-only."
1069 )
1070 table = self._convertTableSpec(name, spec, self._metadata)
1071 for foreignKeySpec in spec.foreignKeys:
1072 table.append_constraint(self._convertForeignKeySpec(name, foreignKeySpec, self._metadata))
1073 try:
1074 with self._transaction() as (_, connection):
1075 table.create(connection)
1076 except sqlalchemy.exc.DatabaseError:
1077 # Some other process could have created the table meanwhile, which
1078 # usually causes OperationalError or ProgrammingError. We cannot
1079 # use IF NOT EXISTS clause in this case due to PostgreSQL race
1080 # condition on server side which causes IntegrityError. Instead we
1081 # catch these exceptions (they all inherit DatabaseError) and
1082 # re-check whether table is now there.
1083 table = self.getExistingTable(name, spec)
1084 if table is None:
1085 raise
1086 return table
1088 def getExistingTable(self, name: str, spec: ddl.TableSpec) -> sqlalchemy.schema.Table | None:
1089 """Obtain an existing table with the given name and specification.
1091 Parameters
1092 ----------
1093 name : `str`
1094 Name of the table (not including namespace qualifiers).
1095 spec : `TableSpec`
1096 Specification for the table. This will be used when creating the
1097 SQLAlchemy representation of the table, and it is used to
1098 check that the actual table in the database is consistent.
1100 Returns
1101 -------
1102 table : `sqlalchemy.schema.Table` or `None`
1103 SQLAlchemy representation of the table, or `None` if it does not
1104 exist.
1106 Raises
1107 ------
1108 DatabaseConflictError
1109 Raised if the table exists but ``spec`` is inconsistent with its
1110 definition.
1112 Notes
1113 -----
1114 This method can be called within transactions and never modifies the
1115 database.
1117 Subclasses may override this method, but usually should not need to.
1118 """
1119 assert self._metadata is not None, "Static tables must be declared before dynamic tables."
1120 name = self._mangleTableName(name)
1121 table = self._metadata.tables.get(name if self.namespace is None else f"{self.namespace}.{name}")
1122 if table is not None:
1123 if spec.fields.names != set(table.columns.keys()):
1124 raise DatabaseConflictError(
1125 f"Table '{name}' has already been defined differently; the new "
1126 f"specification has columns {list(spec.fields.names)}, while "
1127 f"the previous definition has {list(table.columns.keys())}."
1128 )
1129 else:
1130 inspector = sqlalchemy.inspect(
1131 self._engine if self._session_connection is None else self._session_connection, raiseerr=True
1132 )
1133 if name in inspector.get_table_names(schema=self.namespace):
1134 _checkExistingTableDefinition(name, spec, inspector.get_columns(name, schema=self.namespace))
1135 table = self._convertTableSpec(name, spec, self._metadata)
1136 for foreignKeySpec in spec.foreignKeys:
1137 table.append_constraint(self._convertForeignKeySpec(name, foreignKeySpec, self._metadata))
1138 return table
1139 return table
1141 def _make_temporary_table(
1142 self,
1143 connection: sqlalchemy.engine.Connection,
1144 spec: ddl.TableSpec,
1145 name: str | None = None,
1146 **kwargs: Any,
1147 ) -> sqlalchemy.schema.Table:
1148 """Create a temporary table.
1150 Parameters
1151 ----------
1152 connection : `sqlalchemy.engine.Connection`
1153 Connection to use when creating the table.
1154 spec : `TableSpec`
1155 Specification for the table.
1156 name : `str`, optional
1157 A unique (within this session/connetion) name for the table.
1158 Subclasses may override to modify the actual name used. If not
1159 provided, a unique name will be generated.
1160 **kwargs
1161 Additional keyword arguments to forward to the
1162 `sqlalchemy.schema.Table` constructor. This is provided to make it
1163 easier for derived classes to delegate to ``super()`` while making
1164 only minor changes.
1166 Returns
1167 -------
1168 table : `sqlalchemy.schema.Table`
1169 SQLAlchemy representation of the table.
1170 """
1171 if name is None:
1172 name = f"tmp_{uuid.uuid4().hex}"
1173 metadata = self._metadata
1174 if metadata is None:
1175 raise RuntimeError("Cannot create temporary table before static schema is defined.")
1176 table = self._convertTableSpec(
1177 name, spec, metadata, prefixes=["TEMPORARY"], schema=sqlalchemy.schema.BLANK_SCHEMA, **kwargs
1178 )
1179 if table.key in self._temp_tables and table.key != name:
1180 raise ValueError(
1181 f"A temporary table with name {name} (transformed to {table.key} by "
1182 "Database) already exists."
1183 )
1184 for foreignKeySpec in spec.foreignKeys:
1185 table.append_constraint(self._convertForeignKeySpec(name, foreignKeySpec, metadata))
1186 with self._transaction():
1187 table.create(connection)
1188 return table
1190 @classmethod
1191 def getTimespanRepresentation(cls) -> type[TimespanDatabaseRepresentation]:
1192 """Return a `type` that encapsulates the way `Timespan` objects are
1193 stored in this database.
1195 `Database` does not automatically use the return type of this method
1196 anywhere else; calling code is responsible for making sure that DDL
1197 and queries are consistent with it.
1199 Returns
1200 -------
1201 TimespanReprClass : `type` (`TimespanDatabaseRepresention` subclass)
1202 A type that encapsulates the way `Timespan` objects should be
1203 stored in this database.
1205 Notes
1206 -----
1207 There are two big reasons we've decided to keep timespan-mangling logic
1208 outside the `Database` implementations, even though the choice of
1209 representation is ultimately up to a `Database` implementation:
1211 - Timespans appear in relatively few tables and queries in our
1212 typical usage, and the code that operates on them is already aware
1213 that it is working with timespans. In contrast, a
1214 timespan-representation-aware implementation of, say, `insert`,
1215 would need to have extra logic to identify when timespan-mangling
1216 needed to occur, which would usually be useless overhead.
1218 - SQLAlchemy's rich SELECT query expression system has no way to wrap
1219 multiple columns in a single expression object (the ORM does, but
1220 we are not using the ORM). So we would have to wrap _much_ more of
1221 that code in our own interfaces to encapsulate timespan
1222 representations there.
1223 """
1224 return TimespanDatabaseRepresentation.Compound
1226 def sync(
1227 self,
1228 table: sqlalchemy.schema.Table,
1229 *,
1230 keys: dict[str, Any],
1231 compared: dict[str, Any] | None = None,
1232 extra: dict[str, Any] | None = None,
1233 returning: Sequence[str] | None = None,
1234 update: bool = False,
1235 ) -> tuple[dict[str, Any] | None, bool | dict[str, Any]]:
1236 """Insert into a table as necessary to ensure database contains
1237 values equivalent to the given ones.
1239 Parameters
1240 ----------
1241 table : `sqlalchemy.schema.Table`
1242 Table to be queried and possibly inserted into.
1243 keys : `dict`
1244 Column name-value pairs used to search for an existing row; must
1245 be a combination that can be used to select a single row if one
1246 exists. If such a row does not exist, these values are used in
1247 the insert.
1248 compared : `dict`, optional
1249 Column name-value pairs that are compared to those in any existing
1250 row. If such a row does not exist, these rows are used in the
1251 insert.
1252 extra : `dict`, optional
1253 Column name-value pairs that are ignored if a matching row exists,
1254 but used in an insert if one is necessary.
1255 returning : `~collections.abc.Sequence` of `str`, optional
1256 The names of columns whose values should be returned.
1257 update : `bool`, optional
1258 If `True` (`False` is default), update the existing row with the
1259 values in ``compared`` instead of raising `DatabaseConflictError`.
1261 Returns
1262 -------
1263 row : `dict`, optional
1264 The value of the fields indicated by ``returning``, or `None` if
1265 ``returning`` is `None`.
1266 inserted_or_updated : `bool` or `dict`
1267 If `True`, a new row was inserted; if `False`, a matching row
1268 already existed. If a `dict` (only possible if ``update=True``),
1269 then an existing row was updated, and the dict maps the names of
1270 the updated columns to their *old* values (new values can be
1271 obtained from ``compared``).
1273 Raises
1274 ------
1275 DatabaseConflictError
1276 Raised if the values in ``compared`` do not match the values in the
1277 database.
1278 ReadOnlyDatabaseError
1279 Raised if `isWriteable` returns `False`, and no matching record
1280 already exists.
1282 Notes
1283 -----
1284 May be used inside transaction contexts, so implementations may not
1285 perform operations that interrupt transactions.
1287 It may be called on read-only databases if and only if the matching row
1288 does in fact already exist.
1289 """
1291 def check() -> tuple[int, dict[str, Any] | None, list | None]:
1292 """Query for a row that matches the ``key`` argument, and compare
1293 to what was given by the caller.
1295 Returns
1296 -------
1297 n : `int`
1298 Number of matching rows. ``n != 1`` is always an error, but
1299 it's a different kind of error depending on where `check` is
1300 being called.
1301 bad : `dict` or `None`
1302 The subset of the keys of ``compared`` for which the existing
1303 values did not match the given one, mapped to the existing
1304 values in the database. Once again, ``not bad`` is always an
1305 error, but a different kind on context. `None` if ``n != 1``
1306 result : `list` or `None`
1307 Results in the database that correspond to the columns given
1308 in ``returning``, or `None` if ``returning is None``.
1309 """
1310 toSelect: set[str] = set()
1311 if compared is not None:
1312 toSelect.update(compared.keys())
1313 if returning is not None:
1314 toSelect.update(returning)
1315 if not toSelect:
1316 # Need to select some column, even if we just want to see
1317 # how many rows we get back.
1318 toSelect.add(next(iter(keys.keys())))
1319 selectSql = (
1320 sqlalchemy.sql.select(*[table.columns[k].label(k) for k in toSelect])
1321 .select_from(table)
1322 .where(sqlalchemy.sql.and_(*[table.columns[k] == v for k, v in keys.items()]))
1323 )
1324 with self._transaction() as (_, connection):
1325 fetched = list(connection.execute(selectSql).mappings())
1326 if len(fetched) != 1:
1327 return len(fetched), None, None
1328 existing = fetched[0]
1329 if compared is not None:
1331 def safeNotEqual(a: Any, b: Any) -> bool:
1332 if isinstance(a, astropy.time.Time):
1333 return not time_utils.TimeConverter().times_equal(a, b)
1334 return a != b
1336 inconsistencies = {
1337 k: existing[k] for k, v in compared.items() if safeNotEqual(existing[k], v)
1338 }
1339 else:
1340 inconsistencies = {}
1341 if returning is not None:
1342 toReturn: list | None = [existing[k] for k in returning]
1343 else:
1344 toReturn = None
1345 return 1, inconsistencies, toReturn
1347 def format_bad(inconsistencies: dict[str, Any]) -> str:
1348 """Format the 'bad' dictionary of existing values returned by
1349 ``check`` into a string suitable for an error message.
1350 """
1351 assert compared is not None, "Should not be able to get inconsistencies without comparing."
1352 return ", ".join(f"{k}: {v!r} != {compared[k]!r}" for k, v in inconsistencies.items())
1354 if self.isTableWriteable(table):
1355 # Try an insert first, but allow it to fail (in only specific
1356 # ways).
1357 row = keys.copy()
1358 if compared is not None:
1359 row.update(compared)
1360 if extra is not None:
1361 row.update(extra)
1362 with self.transaction():
1363 inserted = bool(self.ensure(table, row))
1364 inserted_or_updated: bool | dict[str, Any]
1365 # Need to perform check() for this branch inside the
1366 # transaction, so we roll back an insert that didn't do
1367 # what we expected. That limits the extent to which we
1368 # can reduce duplication between this block and the other
1369 # ones that perform similar logic.
1370 n, bad, result = check()
1371 if n < 1:
1372 raise ConflictingDefinitionError(
1373 f"Attempted to ensure {row} exists by inserting it with ON CONFLICT IGNORE, "
1374 f"but a post-insert query on {keys} returned no results. "
1375 f"Insert was {'' if inserted else 'not '}reported as successful. "
1376 "This can occur if the insert violated a database constraint other than the "
1377 "unique constraint or primary key used to identify the row in this call."
1378 )
1379 elif n > 1:
1380 raise RuntimeError(
1381 f"Keys passed to sync {keys.keys()} do not comprise a "
1382 f"unique constraint for table {table.name}."
1383 )
1384 elif bad:
1385 assert (
1386 compared is not None
1387 ), "Should not be able to get inconsistencies without comparing."
1388 if inserted:
1389 raise RuntimeError(
1390 f"Conflict ({bad}) in sync after successful insert; this is "
1391 "possible if the same table is being updated by a concurrent "
1392 "process that isn't using sync, but it may also be a bug in "
1393 "daf_butler."
1394 )
1395 elif update:
1396 with self._transaction() as (_, connection):
1397 connection.execute(
1398 table.update()
1399 .where(sqlalchemy.sql.and_(*[table.columns[k] == v for k, v in keys.items()]))
1400 .values(**{k: compared[k] for k in bad})
1401 )
1402 inserted_or_updated = bad
1403 else:
1404 raise DatabaseConflictError(
1405 f"Conflict in sync for table {table.name} on column(s) {format_bad(bad)}."
1406 )
1407 else:
1408 inserted_or_updated = inserted
1409 else:
1410 # Database is not writeable; just see if the row exists.
1411 n, bad, result = check()
1412 if n < 1:
1413 raise ReadOnlyDatabaseError("sync needs to insert, but database is read-only.")
1414 elif n > 1:
1415 raise RuntimeError("Keys passed to sync do not comprise a unique constraint.")
1416 elif bad:
1417 if update:
1418 raise ReadOnlyDatabaseError("sync needs to update, but database is read-only.")
1419 else:
1420 raise DatabaseConflictError(
1421 f"Conflict in sync for table {table.name} on column(s) {format_bad(bad)}."
1422 )
1423 inserted_or_updated = False
1424 if returning is None:
1425 return None, inserted_or_updated
1426 else:
1427 assert result is not None
1428 return dict(zip(returning, result, strict=True)), inserted_or_updated
1430 def insert(
1431 self,
1432 table: sqlalchemy.schema.Table,
1433 *rows: dict,
1434 returnIds: bool = False,
1435 select: sqlalchemy.sql.expression.SelectBase | None = None,
1436 names: Iterable[str] | None = None,
1437 ) -> list[int] | None:
1438 """Insert one or more rows into a table, optionally returning
1439 autoincrement primary key values.
1441 Parameters
1442 ----------
1443 table : `sqlalchemy.schema.Table`
1444 Table rows should be inserted into.
1445 returnIds: `bool`
1446 If `True` (`False` is default), return the values of the table's
1447 autoincrement primary key field (which much exist).
1448 select : `sqlalchemy.sql.SelectBase`, optional
1449 A SELECT query expression to insert rows from. Cannot be provided
1450 with either ``rows`` or ``returnIds=True``.
1451 names : `~collections.abc.Iterable` [ `str` ], optional
1452 Names of columns in ``table`` to be populated, ordered to match the
1453 columns returned by ``select``. Ignored if ``select`` is `None`.
1454 If not provided, the columns returned by ``select`` must be named
1455 to match the desired columns of ``table``.
1456 *rows
1457 Positional arguments are the rows to be inserted, as dictionaries
1458 mapping column name to value. The keys in all dictionaries must
1459 be the same.
1461 Returns
1462 -------
1463 ids : `None`, or `list` of `int`
1464 If ``returnIds`` is `True`, a `list` containing the inserted
1465 values for the table's autoincrement primary key.
1467 Raises
1468 ------
1469 ReadOnlyDatabaseError
1470 Raised if `isWriteable` returns `False` when this method is called.
1472 Notes
1473 -----
1474 The default implementation uses bulk insert syntax when ``returnIds``
1475 is `False`, and a loop over single-row insert operations when it is
1476 `True`.
1478 Derived classes should reimplement when they can provide a more
1479 efficient implementation (especially for the latter case).
1481 May be used inside transaction contexts, so implementations may not
1482 perform operations that interrupt transactions.
1483 """
1484 self.assertTableWriteable(table, f"Cannot insert into read-only table {table}.")
1485 if select is not None and (rows or returnIds):
1486 raise TypeError("'select' is incompatible with passing value rows or returnIds=True.")
1487 if not rows and select is None:
1488 if returnIds:
1489 return []
1490 else:
1491 return None
1492 with self._transaction() as (_, connection):
1493 if not returnIds:
1494 if select is not None:
1495 if names is None:
1496 # columns() is deprecated since 1.4, but
1497 # selected_columns() method did not exist in 1.3.
1498 if hasattr(select, "selected_columns"):
1499 names = select.selected_columns.keys()
1500 else:
1501 names = select.columns.keys()
1502 connection.execute(table.insert().from_select(list(names), select))
1503 else:
1504 connection.execute(table.insert(), rows)
1505 return None
1506 else:
1507 sql = table.insert()
1508 return [connection.execute(sql, row).inserted_primary_key[0] for row in rows]
1510 @abstractmethod
1511 def replace(self, table: sqlalchemy.schema.Table, *rows: dict) -> None:
1512 """Insert one or more rows into a table, replacing any existing rows
1513 for which insertion of a new row would violate the primary key
1514 constraint.
1516 Parameters
1517 ----------
1518 table : `sqlalchemy.schema.Table`
1519 Table rows should be inserted into.
1520 *rows
1521 Positional arguments are the rows to be inserted, as dictionaries
1522 mapping column name to value. The keys in all dictionaries must
1523 be the same.
1525 Raises
1526 ------
1527 ReadOnlyDatabaseError
1528 Raised if `isWriteable` returns `False` when this method is called.
1530 Notes
1531 -----
1532 May be used inside transaction contexts, so implementations may not
1533 perform operations that interrupt transactions.
1535 Implementations should raise a `sqlalchemy.exc.IntegrityError`
1536 exception when a constraint other than the primary key would be
1537 violated.
1539 Implementations are not required to support `replace` on tables
1540 with autoincrement keys.
1541 """
1542 raise NotImplementedError()
1544 @abstractmethod
1545 def ensure(self, table: sqlalchemy.schema.Table, *rows: dict, primary_key_only: bool = False) -> int:
1546 """Insert one or more rows into a table, skipping any rows for which
1547 insertion would violate a unique constraint.
1549 Parameters
1550 ----------
1551 table : `sqlalchemy.schema.Table`
1552 Table rows should be inserted into.
1553 *rows
1554 Positional arguments are the rows to be inserted, as dictionaries
1555 mapping column name to value. The keys in all dictionaries must
1556 be the same.
1557 primary_key_only : `bool`, optional
1558 If `True` (`False` is default), only skip rows that violate the
1559 primary key constraint, and raise an exception (and rollback
1560 transactions) for other constraint violations.
1562 Returns
1563 -------
1564 count : `int`
1565 The number of rows actually inserted.
1567 Raises
1568 ------
1569 ReadOnlyDatabaseError
1570 Raised if `isWriteable` returns `False` when this method is called.
1571 This is raised even if the operation would do nothing even on a
1572 writeable database.
1574 Notes
1575 -----
1576 May be used inside transaction contexts, so implementations may not
1577 perform operations that interrupt transactions.
1579 Implementations are not required to support `ensure` on tables
1580 with autoincrement keys.
1581 """
1582 raise NotImplementedError()
1584 def delete(self, table: sqlalchemy.schema.Table, columns: Iterable[str], *rows: dict) -> int:
1585 """Delete one or more rows from a table.
1587 Parameters
1588 ----------
1589 table : `sqlalchemy.schema.Table`
1590 Table that rows should be deleted from.
1591 columns: `~collections.abc.Iterable` of `str`
1592 The names of columns that will be used to constrain the rows to
1593 be deleted; these will be combined via ``AND`` to form the
1594 ``WHERE`` clause of the delete query.
1595 *rows
1596 Positional arguments are the keys of rows to be deleted, as
1597 dictionaries mapping column name to value. The keys in all
1598 dictionaries must be exactly the names in ``columns``.
1600 Returns
1601 -------
1602 count : `int`
1603 Number of rows deleted.
1605 Raises
1606 ------
1607 ReadOnlyDatabaseError
1608 Raised if `isWriteable` returns `False` when this method is called.
1610 Notes
1611 -----
1612 May be used inside transaction contexts, so implementations may not
1613 perform operations that interrupt transactions.
1615 The default implementation should be sufficient for most derived
1616 classes.
1617 """
1618 self.assertTableWriteable(table, f"Cannot delete from read-only table {table}.")
1619 if columns and not rows:
1620 # If there are no columns, this operation is supposed to delete
1621 # everything (so we proceed as usual). But if there are columns,
1622 # but no rows, it was a constrained bulk operation where the
1623 # constraint is that no rows match, and we should short-circuit
1624 # while reporting that no rows were affected.
1625 return 0
1626 sql = table.delete()
1627 columns = list(columns) # Force iterators to list
1629 # More efficient to use IN operator if there is only one
1630 # variable changing across all rows.
1631 content: dict[str, set] = defaultdict(set)
1632 if len(columns) == 1:
1633 # Nothing to calculate since we can always use IN
1634 column = columns[0]
1635 changing_columns = [column]
1636 content[column] = {row[column] for row in rows}
1637 else:
1638 for row in rows:
1639 for k, v in row.items():
1640 content[k].add(v)
1641 changing_columns = [col for col, values in content.items() if len(values) > 1]
1643 if len(changing_columns) != 1:
1644 # More than one column changes each time so do explicit bind
1645 # parameters and have each row processed separately.
1646 whereTerms = [table.columns[name] == sqlalchemy.sql.bindparam(name) for name in columns]
1647 if whereTerms:
1648 sql = sql.where(sqlalchemy.sql.and_(*whereTerms))
1649 with self._transaction() as (_, connection):
1650 return connection.execute(sql, rows).rowcount
1651 else:
1652 # One of the columns has changing values but any others are
1653 # fixed. In this case we can use an IN operator and be more
1654 # efficient.
1655 name = changing_columns.pop()
1657 # Simple where clause for the unchanging columns
1658 clauses = []
1659 for k, v in content.items():
1660 if k == name:
1661 continue
1662 column = table.columns[k]
1663 # The set only has one element
1664 clauses.append(column == v.pop())
1666 # The IN operator will not work for "infinite" numbers of
1667 # rows so must batch it up into distinct calls.
1668 in_content = list(content[name])
1669 n_elements = len(in_content)
1671 rowcount = 0
1672 iposn = 0
1673 n_per_loop = 1_000 # Controls how many items to put in IN clause
1674 with self._transaction() as (_, connection):
1675 for iposn in range(0, n_elements, n_per_loop):
1676 endpos = iposn + n_per_loop
1677 in_clause = table.columns[name].in_(in_content[iposn:endpos])
1679 newsql = sql.where(sqlalchemy.sql.and_(*clauses, in_clause))
1680 rowcount += connection.execute(newsql).rowcount
1681 return rowcount
1683 def deleteWhere(self, table: sqlalchemy.schema.Table, where: sqlalchemy.sql.ColumnElement) -> int:
1684 """Delete rows from a table with pre-constructed WHERE clause.
1686 Parameters
1687 ----------
1688 table : `sqlalchemy.schema.Table`
1689 Table that rows should be deleted from.
1690 where: `sqlalchemy.sql.ClauseElement`
1691 The names of columns that will be used to constrain the rows to
1692 be deleted; these will be combined via ``AND`` to form the
1693 ``WHERE`` clause of the delete query.
1695 Returns
1696 -------
1697 count : `int`
1698 Number of rows deleted.
1700 Raises
1701 ------
1702 ReadOnlyDatabaseError
1703 Raised if `isWriteable` returns `False` when this method is called.
1705 Notes
1706 -----
1707 May be used inside transaction contexts, so implementations may not
1708 perform operations that interrupt transactions.
1710 The default implementation should be sufficient for most derived
1711 classes.
1712 """
1713 self.assertTableWriteable(table, f"Cannot delete from read-only table {table}.")
1715 sql = table.delete().where(where)
1716 with self._transaction() as (_, connection):
1717 return connection.execute(sql).rowcount
1719 def update(self, table: sqlalchemy.schema.Table, where: dict[str, str], *rows: dict) -> int:
1720 """Update one or more rows in a table.
1722 Parameters
1723 ----------
1724 table : `sqlalchemy.schema.Table`
1725 Table containing the rows to be updated.
1726 where : `dict` [`str`, `str`]
1727 A mapping from the names of columns that will be used to search for
1728 existing rows to the keys that will hold these values in the
1729 ``rows`` dictionaries. Note that these may not be the same due to
1730 SQLAlchemy limitations.
1731 *rows
1732 Positional arguments are the rows to be updated. The keys in all
1733 dictionaries must be the same, and may correspond to either a
1734 value in the ``where`` dictionary or the name of a column to be
1735 updated.
1737 Returns
1738 -------
1739 count : `int`
1740 Number of rows matched (regardless of whether the update actually
1741 modified them).
1743 Raises
1744 ------
1745 ReadOnlyDatabaseError
1746 Raised if `isWriteable` returns `False` when this method is called.
1748 Notes
1749 -----
1750 May be used inside transaction contexts, so implementations may not
1751 perform operations that interrupt transactions.
1753 The default implementation should be sufficient for most derived
1754 classes.
1755 """
1756 self.assertTableWriteable(table, f"Cannot update read-only table {table}.")
1757 if not rows:
1758 return 0
1759 sql = table.update().where(
1760 sqlalchemy.sql.and_(*[table.columns[k] == sqlalchemy.sql.bindparam(v) for k, v in where.items()])
1761 )
1762 with self._transaction() as (_, connection):
1763 return connection.execute(sql, rows).rowcount
1765 @contextmanager
1766 def query(
1767 self,
1768 sql: sqlalchemy.sql.expression.Executable | sqlalchemy.sql.expression.SelectBase,
1769 *args: Any,
1770 **kwargs: Any,
1771 ) -> Iterator[sqlalchemy.engine.CursorResult]:
1772 """Run a SELECT query against the database.
1774 Parameters
1775 ----------
1776 sql : `sqlalchemy.sql.expression.SelectBase`
1777 A SQLAlchemy representation of a ``SELECT`` query.
1778 *args
1779 Additional positional arguments are forwarded to
1780 `sqlalchemy.engine.Connection.execute`.
1781 **kwargs
1782 Additional keyword arguments are forwarded to
1783 `sqlalchemy.engine.Connection.execute`.
1785 Returns
1786 -------
1787 result_context : `sqlalchemy.engine.CursorResults`
1788 Context manager that returns the query result object when entered.
1789 These results are invalidated when the context is exited.
1790 """
1791 if self._session_connection is None:
1792 connection = self._engine.connect()
1793 else:
1794 connection = self._session_connection
1795 # TODO: SelectBase is not good for execute(), but it used everywhere,
1796 # e.g. in daf_relation. We should switch to Executable at some point.
1797 result = connection.execute(cast(sqlalchemy.sql.expression.Executable, sql), *args, **kwargs)
1798 try:
1799 yield result
1800 finally:
1801 if connection is not self._session_connection:
1802 connection.close()
1804 @abstractmethod
1805 def constant_rows(
1806 self,
1807 fields: NamedValueAbstractSet[ddl.FieldSpec],
1808 *rows: dict,
1809 name: str | None = None,
1810 ) -> sqlalchemy.sql.FromClause:
1811 """Return a SQLAlchemy object that represents a small number of
1812 constant-valued rows.
1814 Parameters
1815 ----------
1816 fields : `NamedValueAbstractSet` [ `ddl.FieldSpec` ]
1817 The columns of the rows. Unique and foreign key constraints are
1818 ignored.
1819 *rows : `dict`
1820 Values for the rows.
1821 name : `str`, optional
1822 If provided, the name of the SQL construct. If not provided, an
1823 opaque but unique identifier is generated.
1825 Returns
1826 -------
1827 from_clause : `sqlalchemy.sql.FromClause`
1828 SQLAlchemy object representing the given rows. This is guaranteed
1829 to be something that can be directly joined into a ``SELECT``
1830 query's ``FROM`` clause, and will not involve a temporary table
1831 that needs to be cleaned up later.
1833 Notes
1834 -----
1835 The default implementation uses the SQL-standard ``VALUES`` construct,
1836 but support for that construct is varied enough across popular RDBMSs
1837 that the method is still marked abstract to force explicit opt-in via
1838 delegation to `super`.
1839 """
1840 if name is None:
1841 name = f"tmp_{uuid.uuid4().hex}"
1842 return sqlalchemy.sql.values(
1843 *[sqlalchemy.Column(field.name, field.getSizedColumnType()) for field in fields],
1844 name=name,
1845 ).data([tuple(row[name] for name in fields.names) for row in rows])
1847 def get_constant_rows_max(self) -> int:
1848 """Return the maximum number of rows that should be passed to
1849 `constant_rows` for this backend.
1851 Returns
1852 -------
1853 max : `int`
1854 Maximum number of rows.
1856 Notes
1857 -----
1858 This should reflect typical performance profiles (or a guess at these),
1859 not just hard database engine limits.
1860 """
1861 return 100
1863 origin: int
1864 """An integer ID that should be used as the default for any datasets,
1865 quanta, or other entities that use a (autoincrement, origin) compound
1866 primary key (`int`).
1867 """
1869 namespace: str | None
1870 """The schema or namespace this database instance is associated with
1871 (`str` or `None`).
1872 """