Coverage for python/lsst/daf/butler/registry/interfaces/_database.py: 25%
424 statements
« prev ^ index » next coverage.py v7.5.0, created at 2024-04-30 02:53 -0700
« prev ^ index » next coverage.py v7.5.0, created at 2024-04-30 02:53 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
27from __future__ import annotations
29from ... import ddl, time_utils
31__all__ = [
32 "Database",
33 "ReadOnlyDatabaseError",
34 "DatabaseConflictError",
35 "DatabaseInsertMode",
36 "SchemaAlreadyDefinedError",
37 "StaticTablesContext",
38]
40import enum
41import uuid
42import warnings
43from abc import ABC, abstractmethod
44from collections import defaultdict
45from collections.abc import Callable, Iterable, Iterator, Sequence
46from contextlib import contextmanager
47from typing import Any, cast, final
49import astropy.time
50import sqlalchemy
52from ..._named import NamedValueAbstractSet
53from ...timespan_database_representation import TimespanDatabaseRepresentation
54from .._exceptions import ConflictingDefinitionError
57class DatabaseInsertMode(enum.Enum):
58 """Mode options available for inserting database records."""
60 INSERT = enum.auto()
61 """Insert records, failing if they already exist."""
63 REPLACE = enum.auto()
64 """Replace records, overwriting existing."""
66 ENSURE = enum.auto()
67 """Insert records, skipping any that already exist."""
70# TODO: method is called with list[ReflectedColumn] in SA 2, and
71# ReflectedColumn does not exist in 1.4.
72def _checkExistingTableDefinition(name: str, spec: ddl.TableSpec, inspection: list) -> None:
73 """Test that the definition of a table in a `ddl.TableSpec` and from
74 database introspection are consistent.
76 Parameters
77 ----------
78 name : `str`
79 Name of the table (only used in error messages).
80 spec : `ddl.TableSpec`
81 Specification of the table.
82 inspection : `dict`
83 Dictionary returned by
84 `sqlalchemy.engine.reflection.Inspector.get_columns`.
86 Raises
87 ------
88 DatabaseConflictError
89 Raised if the definitions are inconsistent.
90 """
91 columnNames = [c["name"] for c in inspection]
92 if spec.fields.names != set(columnNames):
93 raise DatabaseConflictError(
94 f"Table '{name}' exists but is defined differently in the database; "
95 f"specification has columns {list(spec.fields.names)}, while the "
96 f"table in the database has {columnNames}."
97 )
100class ReadOnlyDatabaseError(RuntimeError):
101 """Exception raised when a write operation is called on a read-only
102 `Database`.
103 """
106class DatabaseConflictError(ConflictingDefinitionError):
107 """Exception raised when database content (row values or schema entities)
108 are inconsistent with what this client expects.
109 """
112class SchemaAlreadyDefinedError(RuntimeError):
113 """Exception raised when trying to initialize database schema when some
114 tables already exist.
115 """
118class StaticTablesContext:
119 """Helper class used to declare the static schema for a registry layer
120 in a database.
122 An instance of this class is returned by `Database.declareStaticTables`,
123 which should be the only way it should be constructed.
125 Parameters
126 ----------
127 db : `Database`
128 The database.
129 connection : `sqlalchemy.engine.Connection`
130 The connection object.
131 """
133 def __init__(self, db: Database, connection: sqlalchemy.engine.Connection):
134 self._db = db
135 self._foreignKeys: list[tuple[sqlalchemy.schema.Table, sqlalchemy.schema.ForeignKeyConstraint]] = []
136 self._inspector = sqlalchemy.inspect(connection)
137 self._tableNames = frozenset(self._inspector.get_table_names(schema=self._db.namespace))
138 self._initializers: list[Callable[[Database], None]] = []
140 def addTable(self, name: str, spec: ddl.TableSpec) -> sqlalchemy.schema.Table:
141 """Add a new table to the schema, returning its sqlalchemy
142 representation.
144 Parameters
145 ----------
146 name : `str`
147 The name of the table.
148 spec : `ddl.TableSpec`
149 The specification of the table.
151 Returns
152 -------
153 table : `sqlalchemy.schema.Table`
154 The created table.
156 Notes
157 -----
158 The new table may not actually be created until the end of the
159 context created by `Database.declareStaticTables`, allowing tables
160 to be declared in any order even in the presence of foreign key
161 relationships.
162 """
163 name = self._db._mangleTableName(name)
164 metadata = self._db._metadata
165 assert metadata is not None, "Guaranteed by context manager that returns this object."
166 table = self._db._convertTableSpec(name, spec, metadata)
167 for foreignKeySpec in spec.foreignKeys:
168 self._foreignKeys.append((table, self._db._convertForeignKeySpec(name, foreignKeySpec, metadata)))
169 return table
171 def addTableTuple(self, specs: tuple[ddl.TableSpec, ...]) -> tuple[sqlalchemy.schema.Table, ...]:
172 """Add a named tuple of tables to the schema, returning their
173 SQLAlchemy representations in a named tuple of the same type.
175 The new tables may not actually be created until the end of the
176 context created by `Database.declareStaticTables`, allowing tables
177 to be declared in any order even in the presence of foreign key
178 relationships.
180 Parameters
181 ----------
182 specs : `tuple` of `ddl.TableSpec`
183 Specifications of multiple tables.
185 Returns
186 -------
187 tables : `tuple` of `sqlalchemy.schema.Table`
188 All the tables created.
190 Notes
191 -----
192 ``specs`` *must* be an instance of a type created by
193 `collections.namedtuple`, not just regular tuple, and the returned
194 object is guaranteed to be the same. Because `~collections.namedtuple`
195 is just a factory for `type` objects, not an actual type itself,
196 we cannot represent this with type annotations.
197 """
198 return specs._make( # type: ignore
199 self.addTable(name, spec) for name, spec in zip(specs._fields, specs, strict=True) # type: ignore
200 )
202 def addInitializer(self, initializer: Callable[[Database], None]) -> None:
203 """Add a method that does one-time initialization of a database.
205 Initialization can mean anything that changes state of a database
206 and needs to be done exactly once after database schema was created.
207 An example for that could be population of schema attributes.
209 Parameters
210 ----------
211 initializer : `~collections.abc.Callable`
212 Method of a single argument which is a `Database` instance.
213 """
214 self._initializers.append(initializer)
217class Database(ABC):
218 """An abstract interface that represents a particular database engine's
219 representation of a single schema/namespace/database.
221 Parameters
222 ----------
223 origin : `int`
224 An integer ID that should be used as the default for any datasets,
225 quanta, or other entities that use a (autoincrement, origin) compound
226 primary key.
227 engine : `sqlalchemy.engine.Engine`
228 The SQLAlchemy engine for this `Database`.
229 namespace : `str`, optional
230 Name of the schema or namespace this instance is associated with.
231 This is passed as the ``schema`` argument when constructing a
232 `sqlalchemy.schema.MetaData` instance. We use ``namespace`` instead to
233 avoid confusion between "schema means namespace" and "schema means
234 table definitions".
235 metadata : `sqlalchemy.schema.MetaData`, optional
236 Object representing the tables and other schema entities. If not
237 provided, will be generated during the next call to
238 ``declareStaticTables``.
240 Notes
241 -----
242 `Database` requires all write operations to go through its special named
243 methods. Our write patterns are sufficiently simple that we don't really
244 need the full flexibility of SQL insert/update/delete syntax, and we need
245 non-standard (but common) functionality in these operations sufficiently
246 often that it seems worthwhile to provide our own generic API.
248 In contrast, `Database.query` allows arbitrary ``SELECT`` queries (via
249 their SQLAlchemy representation) to be run, as we expect these to require
250 significantly more sophistication while still being limited to standard
251 SQL.
253 `Database` itself has several underscore-prefixed attributes:
255 - ``_engine``: SQLAlchemy object representing its engine.
256 - ``_connection``: method returning a context manager for
257 `sqlalchemy.engine.Connection` object.
258 - ``_metadata``: the `sqlalchemy.schema.MetaData` object representing
259 the tables and other schema entities.
261 These are considered protected (derived classes may access them, but other
262 code should not), and read-only, aside from executing SQL via
263 ``_connection``.
264 """
266 def __init__(
267 self,
268 *,
269 origin: int,
270 engine: sqlalchemy.engine.Engine,
271 namespace: str | None = None,
272 metadata: sqlalchemy.schema.MetaData | None = None,
273 ):
274 self.origin = origin
275 self.namespace = namespace
276 self._engine = engine
277 self._session_connection: sqlalchemy.engine.Connection | None = None
278 self._metadata = metadata
279 self._temp_tables: set[str] = set()
281 def __repr__(self) -> str:
282 # Rather than try to reproduce all the parameters used to create
283 # the object, instead report the more useful information of the
284 # connection URL.
285 if self._engine.url.password is not None:
286 uri = str(self._engine.url.set(password="***"))
287 else:
288 uri = str(self._engine.url)
289 if self.namespace:
290 uri += f"#{self.namespace}"
291 return f'{type(self).__name__}("{uri}")'
293 @classmethod
294 def makeDefaultUri(cls, root: str) -> str | None:
295 """Create a default connection URI appropriate for the given root
296 directory, or `None` if there can be no such default.
298 Parameters
299 ----------
300 root : `str`
301 Root string to use to build connection URI.
303 Returns
304 -------
305 uri : `str` or `None`
306 The URI string or `None`.
307 """
308 return None
310 @classmethod
311 def fromUri(
312 cls,
313 uri: str | sqlalchemy.engine.URL,
314 *,
315 origin: int,
316 namespace: str | None = None,
317 writeable: bool = True,
318 ) -> Database:
319 """Construct a database from a SQLAlchemy URI.
321 Parameters
322 ----------
323 uri : `str` or `sqlalchemy.engine.URL`
324 A SQLAlchemy URI connection string.
325 origin : `int`
326 An integer ID that should be used as the default for any datasets,
327 quanta, or other entities that use a (autoincrement, origin)
328 compound primary key.
329 namespace : `str`, optional
330 A database namespace (i.e. schema) the new instance should be
331 associated with. If `None` (default), the namespace (if any) is
332 inferred from the URI.
333 writeable : `bool`, optional
334 If `True`, allow write operations on the database, including
335 ``CREATE TABLE``.
337 Returns
338 -------
339 db : `Database`
340 A new `Database` instance.
341 """
342 return cls.fromEngine(
343 cls.makeEngine(uri, writeable=writeable), origin=origin, namespace=namespace, writeable=writeable
344 )
346 @abstractmethod
347 def clone(self) -> Database:
348 """Make an independent copy of this `Database` object.
350 Returns
351 -------
352 db : `Database`
353 A new `Database` instance with the same configuration as this
354 instance.
355 """
356 raise NotImplementedError()
358 @classmethod
359 @abstractmethod
360 def makeEngine(
361 cls, uri: str | sqlalchemy.engine.URL, *, writeable: bool = True
362 ) -> sqlalchemy.engine.Engine:
363 """Create a `sqlalchemy.engine.Engine` from a SQLAlchemy URI.
365 Parameters
366 ----------
367 uri : `str` or `sqlalchemy.engine.URL`
368 A SQLAlchemy URI connection string.
369 writeable : `bool`, optional
370 If `True`, allow write operations on the database, including
371 ``CREATE TABLE``.
373 Returns
374 -------
375 engine : `sqlalchemy.engine.Engine`
376 A database engine.
378 Notes
379 -----
380 Subclasses that support other ways to connect to a database are
381 encouraged to add optional arguments to their implementation of this
382 method, as long as they maintain compatibility with the base class
383 call signature.
384 """
385 raise NotImplementedError()
387 @classmethod
388 @abstractmethod
389 def fromEngine(
390 cls,
391 engine: sqlalchemy.engine.Engine,
392 *,
393 origin: int,
394 namespace: str | None = None,
395 writeable: bool = True,
396 ) -> Database:
397 """Create a new `Database` from an existing `sqlalchemy.engine.Engine`.
399 Parameters
400 ----------
401 engine : `sqlalchemy.engine.Engine`
402 The engine for the database. May be shared between `Database`
403 instances.
404 origin : `int`
405 An integer ID that should be used as the default for any datasets,
406 quanta, or other entities that use a (autoincrement, origin)
407 compound primary key.
408 namespace : `str`, optional
409 A different database namespace (i.e. schema) the new instance
410 should be associated with. If `None` (default), the namespace
411 (if any) is inferred from the connection.
412 writeable : `bool`, optional
413 If `True`, allow write operations on the database, including
414 ``CREATE TABLE``.
416 Returns
417 -------
418 db : `Database`
419 A new `Database` instance.
421 Notes
422 -----
423 This method allows different `Database` instances to share the same
424 engine, which is desirable when they represent different namespaces
425 can be queried together.
426 """
427 raise NotImplementedError()
429 @final
430 @contextmanager
431 def session(self) -> Iterator[None]:
432 """Return a context manager that represents a session (persistent
433 connection to a database).
435 Returns
436 -------
437 context : `AbstractContextManager` [ `None` ]
438 A context manager that does not return a value when entered.
440 Notes
441 -----
442 This method should be used when a sequence of read-only SQL operations
443 will be performed in rapid succession *without* a requirement that they
444 yield consistent results in the presence of concurrent writes (or, more
445 rarely, when conflicting concurrent writes are rare/impossible and the
446 session will be open long enough that a transaction is inadvisable).
447 """
448 with self._session():
449 yield
451 @final
452 @contextmanager
453 def transaction(
454 self,
455 *,
456 interrupting: bool = False,
457 savepoint: bool = False,
458 lock: Iterable[sqlalchemy.schema.Table] = (),
459 for_temp_tables: bool = False,
460 ) -> Iterator[None]:
461 """Return a context manager that represents a transaction.
463 Parameters
464 ----------
465 interrupting : `bool`, optional
466 If `True` (`False` is default), this transaction block may not be
467 nested within an outer one, and attempting to do so is a logic
468 (i.e. assertion) error.
469 savepoint : `bool`, optional
470 If `True` (`False` is default), create a `SAVEPOINT`, allowing
471 exceptions raised by the database (e.g. due to constraint
472 violations) during this transaction's context to be caught outside
473 it without also rolling back all operations in an outer transaction
474 block. If `False`, transactions may still be nested, but a
475 rollback may be generated at any level and affects all levels, and
476 commits are deferred until the outermost block completes. If any
477 outer transaction block was created with ``savepoint=True``, all
478 inner blocks will be as well (regardless of the actual value
479 passed). This has no effect if this is the outermost transaction.
480 lock : `~collections.abc.Iterable` [ `sqlalchemy.schema.Table` ], \
481 optional
482 A list of tables to lock for the duration of this transaction.
483 These locks are guaranteed to prevent concurrent writes and allow
484 this transaction (only) to acquire the same locks (others should
485 block), but only prevent concurrent reads if the database engine
486 requires that in order to block concurrent writes.
487 for_temp_tables : `bool`, optional
488 If `True`, this transaction may involve creating temporary tables.
490 Returns
491 -------
492 context : `AbstractContextManager` [ `None` ]
493 A context manager that commits the transaction when it is exited
494 without error and rolls back the transactoin when it is exited via
495 an exception.
497 Notes
498 -----
499 All transactions on a connection managed by one or more `Database`
500 instances _must_ go through this method, or transaction state will not
501 be correctly managed.
502 """
503 with self._transaction(
504 interrupting=interrupting, savepoint=savepoint, lock=lock, for_temp_tables=for_temp_tables
505 ):
506 yield
508 @contextmanager
509 def temporary_table(
510 self, spec: ddl.TableSpec, name: str | None = None
511 ) -> Iterator[sqlalchemy.schema.Table]:
512 """Return a context manager that creates and then drops a temporary
513 table.
515 Parameters
516 ----------
517 spec : `ddl.TableSpec`
518 Specification for the columns. Unique and foreign key constraints
519 may be ignored.
520 name : `str`, optional
521 If provided, the name of the SQL construct. If not provided, an
522 opaque but unique identifier is generated.
524 Returns
525 -------
526 context : `AbstractContextManager` [ `sqlalchemy.schema.Table` ]
527 A context manager that returns a SQLAlchemy representation of the
528 temporary table when entered.
530 Notes
531 -----
532 Temporary tables may be created, dropped, and written to even in
533 read-only databases - at least according to the Python-level
534 protections in the `Database` classes. Server permissions may say
535 otherwise, but in that case they probably need to be modified to
536 support the full range of expected read-only butler behavior.
537 """
538 with self._session() as connection:
539 table = self._make_temporary_table(connection, spec=spec, name=name)
540 self._temp_tables.add(table.key)
541 try:
542 yield table
543 finally:
544 with self._transaction():
545 table.drop(connection)
546 self._temp_tables.remove(table.key)
548 @contextmanager
549 def _session(self) -> Iterator[sqlalchemy.engine.Connection]:
550 """Protected implementation for `session` that actually returns the
551 connection.
553 This method is for internal `Database` calls that need the actual
554 SQLAlchemy connection object. It should be overridden by subclasses
555 instead of `session` itself.
557 Returns
558 -------
559 context : `AbstractContextManager` [ `sqlalchemy.engine.Connection` ]
560 A context manager that returns a SQLALchemy connection when
561 entered.
563 """
564 if self._session_connection is not None:
565 # session already started, just reuse that
566 yield self._session_connection
567 else:
568 try:
569 # open new connection and close it when done
570 self._session_connection = self._engine.connect()
571 yield self._session_connection
572 finally:
573 if self._session_connection is not None:
574 self._session_connection.close()
575 self._session_connection = None
576 # Temporary tables only live within session
577 self._temp_tables = set()
579 @contextmanager
580 def _transaction(
581 self,
582 *,
583 interrupting: bool = False,
584 savepoint: bool = False,
585 lock: Iterable[sqlalchemy.schema.Table] = (),
586 for_temp_tables: bool = False,
587 ) -> Iterator[tuple[bool, sqlalchemy.engine.Connection]]:
588 """Protected implementation for `transaction` that actually returns the
589 connection and whether this is a new outermost transaction.
591 This method is for internal `Database` calls that need the actual
592 SQLAlchemy connection object. It should be overridden by subclasses
593 instead of `transaction` itself.
595 Parameters
596 ----------
597 interrupting : `bool`, optional
598 If `True` (`False` is default), this transaction block may not be
599 nested without an outer one, and attempting to do so is a logic
600 (i.e. assertion) error.
601 savepoint : `bool`, optional
602 If `True` (`False` is default), create a `SAVEPOINT`, allowing
603 exceptions raised by the database (e.g. due to constraint
604 violations) during this transaction's context to be caught outside
605 it without also rolling back all operations in an outer transaction
606 block. If `False`, transactions may still be nested, but a
607 rollback may be generated at any level and affects all levels, and
608 commits are deferred until the outermost block completes. If any
609 outer transaction block was created with ``savepoint=True``, all
610 inner blocks will be as well (regardless of the actual value
611 passed). This has no effect if this is the outermost transaction.
612 lock : `~collections.abc.Iterable` [ `sqlalchemy.schema.Table` ], \
613 optional
614 A list of tables to lock for the duration of this transaction.
615 These locks are guaranteed to prevent concurrent writes and allow
616 this transaction (only) to acquire the same locks (others should
617 block), but only prevent concurrent reads if the database engine
618 requires that in order to block concurrent writes.
619 for_temp_tables : `bool`, optional
620 If `True`, this transaction may involve creating temporary tables.
622 Returns
623 -------
624 context : `AbstractContextManager` [ `tuple` [ `bool`,
625 `sqlalchemy.engine.Connection` ] ]
626 A context manager that commits the transaction when it is exited
627 without error and rolls back the transactoin when it is exited via
628 an exception. When entered, it returns a tuple of:
630 - ``is_new`` (`bool`): whether this is a new (outermost)
631 transaction;
632 - ``connection`` (`sqlalchemy.engine.Connection`): the connection.
633 """
634 with self._session() as connection:
635 already_in_transaction = self.isInTransaction()
636 assert not (interrupting and already_in_transaction), (
637 "Logic error in transaction nesting: an operation that would "
638 "interrupt the active transaction context has been requested."
639 )
640 savepoint = savepoint or connection.in_nested_transaction()
641 trans: sqlalchemy.engine.Transaction | None
642 if already_in_transaction:
643 if savepoint:
644 trans = connection.begin_nested()
645 else:
646 # Nested non-savepoint transactions don't do anything.
647 trans = None
648 else:
649 # Use a regular (non-savepoint) transaction always for the
650 # outermost context.
651 trans = connection.begin()
652 self._lockTables(connection, lock)
653 try:
654 yield not already_in_transaction, connection
655 if trans is not None:
656 trans.commit()
657 except BaseException:
658 if trans is not None:
659 trans.rollback()
660 raise
662 @abstractmethod
663 def _lockTables(
664 self, connection: sqlalchemy.engine.Connection, tables: Iterable[sqlalchemy.schema.Table] = ()
665 ) -> None:
666 """Acquire locks on the given tables.
668 This is an implementation hook for subclasses, called by `transaction`.
669 It should not be called directly by other code.
671 Parameters
672 ----------
673 connection : `sqlalchemy.engine.Connection`
674 Database connection object. It is guaranteed that transaction is
675 already in a progress for this connection.
676 tables : `~collections.abc.Iterable` [ `sqlalchemy.schema.Table` ], \
677 optional
678 A list of tables to lock for the duration of this transaction.
679 These locks are guaranteed to prevent concurrent writes and allow
680 this transaction (only) to acquire the same locks (others should
681 block), but only prevent concurrent reads if the database engine
682 requires that in order to block concurrent writes.
683 """
684 raise NotImplementedError()
686 def isTableWriteable(self, table: sqlalchemy.schema.Table) -> bool:
687 """Check whether a table is writeable, either because the database
688 connection is read-write or the table is a temporary table.
690 Parameters
691 ----------
692 table : `sqlalchemy.schema.Table`
693 SQLAlchemy table object to check.
695 Returns
696 -------
697 writeable : `bool`
698 Whether this table is writeable.
699 """
700 return self.isWriteable() or table.key in self._temp_tables
702 def assertTableWriteable(self, table: sqlalchemy.schema.Table, msg: str) -> None:
703 """Raise if the given table is not writeable, either because the
704 database connection is read-write or the table is a temporary table.
706 Parameters
707 ----------
708 table : `sqlalchemy.schema.Table`
709 SQLAlchemy table object to check.
710 msg : `str`, optional
711 If provided, raise `ReadOnlyDatabaseError` instead of returning
712 `False`, with this message.
713 """
714 if not self.isTableWriteable(table):
715 raise ReadOnlyDatabaseError(msg)
717 @contextmanager
718 def declareStaticTables(self, *, create: bool) -> Iterator[StaticTablesContext]:
719 """Return a context manager in which the database's static DDL schema
720 can be declared.
722 Parameters
723 ----------
724 create : `bool`
725 If `True`, attempt to create all tables at the end of the context.
726 If `False`, they will be assumed to already exist.
728 Returns
729 -------
730 schema : `StaticTablesContext`
731 A helper object that is used to add new tables.
733 Raises
734 ------
735 ReadOnlyDatabaseError
736 Raised if ``create`` is `True`, `Database.isWriteable` is `False`,
737 and one or more declared tables do not already exist.
739 Examples
740 --------
741 Given a `Database` instance ``db``::
743 with db.declareStaticTables(create=True) as schema:
744 schema.addTable("table1", TableSpec(...))
745 schema.addTable("table2", TableSpec(...))
747 Notes
748 -----
749 A database's static DDL schema must be declared before any dynamic
750 tables are managed via calls to `ensureTableExists` or
751 `getExistingTable`. The order in which static schema tables are added
752 inside the context block is unimportant; they will automatically be
753 sorted and added in an order consistent with their foreign key
754 relationships.
755 """
756 if create and not self.isWriteable():
757 raise ReadOnlyDatabaseError(f"Cannot create tables in read-only database {self}.")
758 self._metadata = sqlalchemy.MetaData(schema=self.namespace)
759 try:
760 with self._transaction() as (_, connection):
761 context = StaticTablesContext(self, connection)
762 if create and context._tableNames:
763 # Looks like database is already initalized, to avoid
764 # danger of modifying/destroying valid schema we refuse to
765 # do anything in this case
766 raise SchemaAlreadyDefinedError(f"Cannot create tables in non-empty database {self}.")
767 yield context
768 for table, foreignKey in context._foreignKeys:
769 table.append_constraint(foreignKey)
770 if create:
771 if (
772 self.namespace is not None
773 and self.namespace not in context._inspector.get_schema_names()
774 ):
775 connection.execute(sqlalchemy.schema.CreateSchema(self.namespace))
776 # In our tables we have columns that make use of sqlalchemy
777 # Sequence objects. There is currently a bug in sqlalchemy
778 # that causes a deprecation warning to be thrown on a
779 # property of the Sequence object when the repr for the
780 # sequence is created. Here a filter is used to catch these
781 # deprecation warnings when tables are created.
782 with warnings.catch_warnings():
783 warnings.simplefilter("ignore", category=sqlalchemy.exc.SADeprecationWarning)
784 self._metadata.create_all(connection)
785 # call all initializer methods sequentially
786 for init in context._initializers:
787 init(self)
788 except BaseException:
789 self._metadata = None
790 raise
792 @abstractmethod
793 def isWriteable(self) -> bool:
794 """Return `True` if this database can be modified by this client."""
795 raise NotImplementedError()
797 def isInTransaction(self) -> bool:
798 """Return `True` if there is currently a database connection open with
799 an active transaction; `False` otherwise.
800 """
801 session = self._session_connection
802 return session is not None and session.in_transaction()
804 @abstractmethod
805 def __str__(self) -> str:
806 """Return a human-readable identifier for this `Database`, including
807 any namespace or schema that identifies its names within a `Registry`.
808 """
809 raise NotImplementedError()
811 @property
812 def dialect(self) -> sqlalchemy.engine.Dialect:
813 """The SQLAlchemy dialect for this database engine
814 (`sqlalchemy.engine.Dialect`).
815 """
816 return self._engine.dialect
818 def shrinkDatabaseEntityName(self, original: str) -> str:
819 """Return a version of the given name that fits within this database
820 engine's length limits for table, constraint, indexes, and sequence
821 names.
823 Implementations should not assume that simple truncation is safe,
824 because multiple long names often begin with the same prefix.
826 The default implementation simply returns the given name.
828 Parameters
829 ----------
830 original : `str`
831 The original name.
833 Returns
834 -------
835 shrunk : `str`
836 The new, possibly shortened name.
837 """
838 return original
840 def expandDatabaseEntityName(self, shrunk: str) -> str:
841 """Retrieve the original name for a database entity that was too long
842 to fit within the database engine's limits.
844 Parameters
845 ----------
846 shrunk : `str`
847 The original name.
849 Returns
850 -------
851 shrunk : `str`
852 The new, possibly shortened name.
853 """
854 return shrunk
856 def _mangleTableName(self, name: str) -> str:
857 """Map a logical, user-visible table name to the true table name used
858 in the database.
860 The default implementation returns the given name unchanged.
862 Parameters
863 ----------
864 name : `str`
865 Input table name. Should not include a namespace (i.e. schema)
866 prefix.
868 Returns
869 -------
870 mangled : `str`
871 Mangled version of the table name (still with no namespace prefix).
873 Notes
874 -----
875 Reimplementations of this method must be idempotent - mangling an
876 already-mangled name must have no effect.
877 """
878 return name
880 def _makeColumnConstraints(self, table: str, spec: ddl.FieldSpec) -> list[sqlalchemy.CheckConstraint]:
881 """Create constraints based on this spec.
883 Parameters
884 ----------
885 table : `str`
886 Name of the table this column is being added to.
887 spec : `FieldSpec`
888 Specification for the field to be added.
890 Returns
891 -------
892 constraint : `list` of `sqlalchemy.CheckConstraint`
893 Constraint added for this column.
894 """
895 # By default we return no additional constraints
896 return []
898 def _convertFieldSpec(
899 self, table: str, spec: ddl.FieldSpec, metadata: sqlalchemy.MetaData, **kwargs: Any
900 ) -> sqlalchemy.schema.Column:
901 """Convert a `FieldSpec` to a `sqlalchemy.schema.Column`.
903 Parameters
904 ----------
905 table : `str`
906 Name of the table this column is being added to.
907 spec : `FieldSpec`
908 Specification for the field to be added.
909 metadata : `sqlalchemy.MetaData`
910 SQLAlchemy representation of the DDL schema this field's table is
911 being added to.
912 **kwargs
913 Additional keyword arguments to forward to the
914 `sqlalchemy.schema.Column` constructor. This is provided to make
915 it easier for derived classes to delegate to ``super()`` while
916 making only minor changes.
918 Returns
919 -------
920 column : `sqlalchemy.schema.Column`
921 SQLAlchemy representation of the field.
922 """
923 args = []
924 if spec.autoincrement:
925 # Generate a sequence to use for auto incrementing for databases
926 # that do not support it natively. This will be ignored by
927 # sqlalchemy for databases that do support it.
928 args.append(
929 sqlalchemy.Sequence(
930 self.shrinkDatabaseEntityName(f"{table}_seq_{spec.name}"), metadata=metadata
931 )
932 )
933 assert spec.doc is None or isinstance(spec.doc, str), f"Bad doc for {table}.{spec.name}."
934 return sqlalchemy.schema.Column(
935 spec.name,
936 spec.getSizedColumnType(),
937 *args,
938 nullable=spec.nullable,
939 primary_key=spec.primaryKey,
940 comment=spec.doc,
941 server_default=spec.default,
942 **kwargs,
943 )
945 def _convertForeignKeySpec(
946 self, table: str, spec: ddl.ForeignKeySpec, metadata: sqlalchemy.MetaData, **kwargs: Any
947 ) -> sqlalchemy.schema.ForeignKeyConstraint:
948 """Convert a `ForeignKeySpec` to a
949 `sqlalchemy.schema.ForeignKeyConstraint`.
951 Parameters
952 ----------
953 table : `str`
954 Name of the table this foreign key is being added to.
955 spec : `ForeignKeySpec`
956 Specification for the foreign key to be added.
957 metadata : `sqlalchemy.MetaData`
958 SQLAlchemy representation of the DDL schema this constraint is
959 being added to.
960 **kwargs
961 Additional keyword arguments to forward to the
962 `sqlalchemy.schema.ForeignKeyConstraint` constructor. This is
963 provided to make it easier for derived classes to delegate to
964 ``super()`` while making only minor changes.
966 Returns
967 -------
968 constraint : `sqlalchemy.schema.ForeignKeyConstraint`
969 SQLAlchemy representation of the constraint.
970 """
971 name = self.shrinkDatabaseEntityName(
972 "_".join(
973 ["fkey", table, self._mangleTableName(spec.table)] + list(spec.target) + list(spec.source)
974 )
975 )
976 return sqlalchemy.schema.ForeignKeyConstraint(
977 spec.source,
978 [f"{self._mangleTableName(spec.table)}.{col}" for col in spec.target],
979 name=name,
980 ondelete=spec.onDelete,
981 )
983 def _convertExclusionConstraintSpec(
984 self,
985 table: str,
986 spec: tuple[str | type[TimespanDatabaseRepresentation], ...],
987 metadata: sqlalchemy.MetaData,
988 ) -> sqlalchemy.schema.Constraint:
989 """Convert a `tuple` from `ddl.TableSpec.exclusion` into a SQLAlchemy
990 constraint representation.
992 Parameters
993 ----------
994 table : `str`
995 Name of the table this constraint is being added to.
996 spec : `tuple` [ `str` or `type` ]
997 A tuple of `str` column names and the `type` object returned by
998 `getTimespanRepresentation` (which must appear exactly once),
999 indicating the order of the columns in the index used to back the
1000 constraint.
1001 metadata : `sqlalchemy.MetaData`
1002 SQLAlchemy representation of the DDL schema this constraint is
1003 being added to.
1005 Returns
1006 -------
1007 constraint : `sqlalchemy.schema.Constraint`
1008 SQLAlchemy representation of the constraint.
1010 Raises
1011 ------
1012 NotImplementedError
1013 Raised if this database does not support exclusion constraints.
1014 """
1015 raise NotImplementedError(f"Database {self} does not support exclusion constraints.")
1017 def _convertTableSpec(
1018 self, name: str, spec: ddl.TableSpec, metadata: sqlalchemy.MetaData, **kwargs: Any
1019 ) -> sqlalchemy.schema.Table:
1020 """Convert a `TableSpec` to a `sqlalchemy.schema.Table`.
1022 Parameters
1023 ----------
1024 name : `str`
1025 The name of the table.
1026 spec : `TableSpec`
1027 Specification for the foreign key to be added.
1028 metadata : `sqlalchemy.MetaData`
1029 SQLAlchemy representation of the DDL schema this table is being
1030 added to.
1031 **kwargs
1032 Additional keyword arguments to forward to the
1033 `sqlalchemy.schema.Table` constructor. This is provided to make it
1034 easier for derived classes to delegate to ``super()`` while making
1035 only minor changes.
1037 Returns
1038 -------
1039 table : `sqlalchemy.schema.Table`
1040 SQLAlchemy representation of the table.
1042 Notes
1043 -----
1044 This method does not handle ``spec.foreignKeys`` at all, in order to
1045 avoid circular dependencies. These are added by higher-level logic in
1046 `ensureTableExists`, `getExistingTable`, and `declareStaticTables`.
1047 """
1048 name = self._mangleTableName(name)
1049 args: list[sqlalchemy.schema.SchemaItem] = [
1050 self._convertFieldSpec(name, fieldSpec, metadata) for fieldSpec in spec.fields
1051 ]
1053 # Add any column constraints
1054 for fieldSpec in spec.fields:
1055 args.extend(self._makeColumnConstraints(name, fieldSpec))
1057 # Track indexes added for primary key and unique constraints, to make
1058 # sure we don't add duplicate explicit or foreign key indexes for
1059 # those.
1060 allIndexes = {tuple(fieldSpec.name for fieldSpec in spec.fields if fieldSpec.primaryKey)}
1061 args.extend(
1062 sqlalchemy.schema.UniqueConstraint(
1063 *columns, name=self.shrinkDatabaseEntityName("_".join([name, "unq"] + list(columns)))
1064 )
1065 for columns in spec.unique
1066 )
1067 allIndexes.update(spec.unique)
1068 args.extend(
1069 sqlalchemy.schema.Index(
1070 self.shrinkDatabaseEntityName("_".join([name, "idx"] + list(index.columns))),
1071 *index.columns,
1072 unique=(index.columns in spec.unique),
1073 **index.kwargs,
1074 )
1075 for index in spec.indexes
1076 if index.columns not in allIndexes
1077 )
1078 allIndexes.update(index.columns for index in spec.indexes)
1079 args.extend(
1080 sqlalchemy.schema.Index(
1081 self.shrinkDatabaseEntityName("_".join((name, "fkidx") + fk.source)),
1082 *fk.source,
1083 )
1084 for fk in spec.foreignKeys
1085 if fk.addIndex and fk.source not in allIndexes
1086 )
1088 args.extend(self._convertExclusionConstraintSpec(name, excl, metadata) for excl in spec.exclusion)
1090 assert spec.doc is None or isinstance(spec.doc, str), f"Bad doc for {name}."
1091 return sqlalchemy.schema.Table(name, metadata, *args, comment=spec.doc, info={"spec": spec}, **kwargs)
1093 def ensureTableExists(self, name: str, spec: ddl.TableSpec) -> sqlalchemy.schema.Table:
1094 """Ensure that a table with the given name and specification exists,
1095 creating it if necessary.
1097 Parameters
1098 ----------
1099 name : `str`
1100 Name of the table (not including namespace qualifiers).
1101 spec : `TableSpec`
1102 Specification for the table. This will be used when creating the
1103 table, and *may* be used when obtaining an existing table to check
1104 for consistency, but no such check is guaranteed.
1106 Returns
1107 -------
1108 table : `sqlalchemy.schema.Table`
1109 SQLAlchemy representation of the table.
1111 Raises
1112 ------
1113 ReadOnlyDatabaseError
1114 Raised if `isWriteable` returns `False`, and the table does not
1115 already exist.
1116 DatabaseConflictError
1117 Raised if the table exists but ``spec`` is inconsistent with its
1118 definition.
1120 Notes
1121 -----
1122 This method may not be called within transactions. It may be called on
1123 read-only databases if and only if the table does in fact already
1124 exist.
1126 Subclasses may override this method, but usually should not need to.
1127 """
1128 # TODO: if _engine is used to make a table then it uses separate
1129 # connection and should not interfere with current transaction
1130 assert not self.isInTransaction(), "Table creation interrupts transactions."
1131 assert self._metadata is not None, "Static tables must be declared before dynamic tables."
1132 table = self.getExistingTable(name, spec)
1133 if table is not None:
1134 return table
1135 if not self.isWriteable():
1136 raise ReadOnlyDatabaseError(
1137 f"Table {name} does not exist, and cannot be created because database {self} is read-only."
1138 )
1139 table = self._convertTableSpec(name, spec, self._metadata)
1140 for foreignKeySpec in spec.foreignKeys:
1141 table.append_constraint(self._convertForeignKeySpec(name, foreignKeySpec, self._metadata))
1142 try:
1143 with self._transaction() as (_, connection):
1144 table.create(connection)
1145 except sqlalchemy.exc.DatabaseError:
1146 # Some other process could have created the table meanwhile, which
1147 # usually causes OperationalError or ProgrammingError. We cannot
1148 # use IF NOT EXISTS clause in this case due to PostgreSQL race
1149 # condition on server side which causes IntegrityError. Instead we
1150 # catch these exceptions (they all inherit DatabaseError) and
1151 # re-check whether table is now there.
1152 table = self.getExistingTable(name, spec)
1153 if table is None:
1154 raise
1155 return table
1157 def getExistingTable(self, name: str, spec: ddl.TableSpec) -> sqlalchemy.schema.Table | None:
1158 """Obtain an existing table with the given name and specification.
1160 Parameters
1161 ----------
1162 name : `str`
1163 Name of the table (not including namespace qualifiers).
1164 spec : `TableSpec`
1165 Specification for the table. This will be used when creating the
1166 SQLAlchemy representation of the table, and it is used to
1167 check that the actual table in the database is consistent.
1169 Returns
1170 -------
1171 table : `sqlalchemy.schema.Table` or `None`
1172 SQLAlchemy representation of the table, or `None` if it does not
1173 exist.
1175 Raises
1176 ------
1177 DatabaseConflictError
1178 Raised if the table exists but ``spec`` is inconsistent with its
1179 definition.
1181 Notes
1182 -----
1183 This method can be called within transactions and never modifies the
1184 database.
1186 Subclasses may override this method, but usually should not need to.
1187 """
1188 assert self._metadata is not None, "Static tables must be declared before dynamic tables."
1189 name = self._mangleTableName(name)
1190 table = self._metadata.tables.get(name if self.namespace is None else f"{self.namespace}.{name}")
1191 if table is not None:
1192 if spec.fields.names != set(table.columns.keys()):
1193 raise DatabaseConflictError(
1194 f"Table '{name}' has already been defined differently; the new "
1195 f"specification has columns {list(spec.fields.names)}, while "
1196 f"the previous definition has {list(table.columns.keys())}."
1197 )
1198 else:
1199 inspector = sqlalchemy.inspect(
1200 self._engine if self._session_connection is None else self._session_connection, raiseerr=True
1201 )
1202 if name in inspector.get_table_names(schema=self.namespace):
1203 _checkExistingTableDefinition(name, spec, inspector.get_columns(name, schema=self.namespace))
1204 table = self._convertTableSpec(name, spec, self._metadata)
1205 for foreignKeySpec in spec.foreignKeys:
1206 table.append_constraint(self._convertForeignKeySpec(name, foreignKeySpec, self._metadata))
1207 return table
1208 return table
1210 def _make_temporary_table(
1211 self,
1212 connection: sqlalchemy.engine.Connection,
1213 spec: ddl.TableSpec,
1214 name: str | None = None,
1215 **kwargs: Any,
1216 ) -> sqlalchemy.schema.Table:
1217 """Create a temporary table.
1219 Parameters
1220 ----------
1221 connection : `sqlalchemy.engine.Connection`
1222 Connection to use when creating the table.
1223 spec : `TableSpec`
1224 Specification for the table.
1225 name : `str`, optional
1226 A unique (within this session/connetion) name for the table.
1227 Subclasses may override to modify the actual name used. If not
1228 provided, a unique name will be generated.
1229 **kwargs
1230 Additional keyword arguments to forward to the
1231 `sqlalchemy.schema.Table` constructor. This is provided to make it
1232 easier for derived classes to delegate to ``super()`` while making
1233 only minor changes.
1235 Returns
1236 -------
1237 table : `sqlalchemy.schema.Table`
1238 SQLAlchemy representation of the table.
1239 """
1240 if name is None:
1241 name = f"tmp_{uuid.uuid4().hex}"
1242 metadata = self._metadata
1243 if metadata is None:
1244 raise RuntimeError("Cannot create temporary table before static schema is defined.")
1245 table = self._convertTableSpec(
1246 name, spec, metadata, prefixes=["TEMPORARY"], schema=sqlalchemy.schema.BLANK_SCHEMA, **kwargs
1247 )
1248 if table.key in self._temp_tables and table.key != name:
1249 raise ValueError(
1250 f"A temporary table with name {name} (transformed to {table.key} by "
1251 "Database) already exists."
1252 )
1253 for foreignKeySpec in spec.foreignKeys:
1254 table.append_constraint(self._convertForeignKeySpec(name, foreignKeySpec, metadata))
1255 with self._transaction():
1256 table.create(connection)
1257 return table
1259 @classmethod
1260 def getTimespanRepresentation(cls) -> type[TimespanDatabaseRepresentation]:
1261 """Return a `type` that encapsulates the way `Timespan` objects are
1262 stored in this database.
1264 `Database` does not automatically use the return type of this method
1265 anywhere else; calling code is responsible for making sure that DDL
1266 and queries are consistent with it.
1268 Returns
1269 -------
1270 TimespanReprClass : `type` (`TimespanDatabaseRepresention` subclass)
1271 A type that encapsulates the way `Timespan` objects should be
1272 stored in this database.
1274 Notes
1275 -----
1276 There are two big reasons we've decided to keep timespan-mangling logic
1277 outside the `Database` implementations, even though the choice of
1278 representation is ultimately up to a `Database` implementation:
1280 - Timespans appear in relatively few tables and queries in our
1281 typical usage, and the code that operates on them is already aware
1282 that it is working with timespans. In contrast, a
1283 timespan-representation-aware implementation of, say, `insert`,
1284 would need to have extra logic to identify when timespan-mangling
1285 needed to occur, which would usually be useless overhead.
1287 - SQLAlchemy's rich SELECT query expression system has no way to wrap
1288 multiple columns in a single expression object (the ORM does, but
1289 we are not using the ORM). So we would have to wrap _much_ more of
1290 that code in our own interfaces to encapsulate timespan
1291 representations there.
1292 """
1293 return TimespanDatabaseRepresentation.Compound
1295 def sync(
1296 self,
1297 table: sqlalchemy.schema.Table,
1298 *,
1299 keys: dict[str, Any],
1300 compared: dict[str, Any] | None = None,
1301 extra: dict[str, Any] | None = None,
1302 returning: Sequence[str] | None = None,
1303 update: bool = False,
1304 ) -> tuple[dict[str, Any] | None, bool | dict[str, Any]]:
1305 """Insert into a table as necessary to ensure database contains
1306 values equivalent to the given ones.
1308 Parameters
1309 ----------
1310 table : `sqlalchemy.schema.Table`
1311 Table to be queried and possibly inserted into.
1312 keys : `dict`
1313 Column name-value pairs used to search for an existing row; must
1314 be a combination that can be used to select a single row if one
1315 exists. If such a row does not exist, these values are used in
1316 the insert.
1317 compared : `dict`, optional
1318 Column name-value pairs that are compared to those in any existing
1319 row. If such a row does not exist, these rows are used in the
1320 insert.
1321 extra : `dict`, optional
1322 Column name-value pairs that are ignored if a matching row exists,
1323 but used in an insert if one is necessary.
1324 returning : `~collections.abc.Sequence` of `str`, optional
1325 The names of columns whose values should be returned.
1326 update : `bool`, optional
1327 If `True` (`False` is default), update the existing row with the
1328 values in ``compared`` instead of raising `DatabaseConflictError`.
1330 Returns
1331 -------
1332 row : `dict`, optional
1333 The value of the fields indicated by ``returning``, or `None` if
1334 ``returning`` is `None`.
1335 inserted_or_updated : `bool` or `dict`
1336 If `True`, a new row was inserted; if `False`, a matching row
1337 already existed. If a `dict` (only possible if ``update=True``),
1338 then an existing row was updated, and the dict maps the names of
1339 the updated columns to their *old* values (new values can be
1340 obtained from ``compared``).
1342 Raises
1343 ------
1344 DatabaseConflictError
1345 Raised if the values in ``compared`` do not match the values in the
1346 database.
1347 ReadOnlyDatabaseError
1348 Raised if `isWriteable` returns `False`, and no matching record
1349 already exists.
1351 Notes
1352 -----
1353 May be used inside transaction contexts, so implementations may not
1354 perform operations that interrupt transactions.
1356 It may be called on read-only databases if and only if the matching row
1357 does in fact already exist.
1358 """
1360 def check() -> tuple[int, dict[str, Any] | None, list | None]:
1361 """Query for a row that matches the ``key`` argument, and compare
1362 to what was given by the caller.
1364 Returns
1365 -------
1366 n : `int`
1367 Number of matching rows. ``n != 1`` is always an error, but
1368 it's a different kind of error depending on where `check` is
1369 being called.
1370 bad : `dict` or `None`
1371 The subset of the keys of ``compared`` for which the existing
1372 values did not match the given one, mapped to the existing
1373 values in the database. Once again, ``not bad`` is always an
1374 error, but a different kind on context. `None` if ``n != 1``.
1375 result : `list` or `None`
1376 Results in the database that correspond to the columns given
1377 in ``returning``, or `None` if ``returning is None``.
1378 """
1379 toSelect: set[str] = set()
1380 if compared is not None:
1381 toSelect.update(compared.keys())
1382 if returning is not None:
1383 toSelect.update(returning)
1384 if not toSelect:
1385 # Need to select some column, even if we just want to see
1386 # how many rows we get back.
1387 toSelect.add(next(iter(keys.keys())))
1388 selectSql = (
1389 sqlalchemy.sql.select(*[table.columns[k].label(k) for k in toSelect])
1390 .select_from(table)
1391 .where(sqlalchemy.sql.and_(*[table.columns[k] == v for k, v in keys.items()]))
1392 )
1393 with self._transaction() as (_, connection):
1394 fetched = list(connection.execute(selectSql).mappings())
1395 if len(fetched) != 1:
1396 return len(fetched), None, None
1397 existing = fetched[0]
1398 if compared is not None:
1400 def safeNotEqual(a: Any, b: Any) -> bool:
1401 if isinstance(a, astropy.time.Time):
1402 return not time_utils.TimeConverter().times_equal(a, b)
1403 return a != b
1405 inconsistencies = {
1406 k: existing[k] for k, v in compared.items() if safeNotEqual(existing[k], v)
1407 }
1408 else:
1409 inconsistencies = {}
1410 if returning is not None:
1411 toReturn: list | None = [existing[k] for k in returning]
1412 else:
1413 toReturn = None
1414 return 1, inconsistencies, toReturn
1416 def _format_bad(inconsistencies: dict[str, Any]) -> str:
1417 """Format the 'bad' dictionary of existing values returned by
1418 ``check`` into a string suitable for an error message.
1419 """
1420 assert compared is not None, "Should not be able to get inconsistencies without comparing."
1421 return ", ".join(f"{k}: {v!r} != {compared[k]!r}" for k, v in inconsistencies.items())
1423 if self.isTableWriteable(table):
1424 # Try an insert first, but allow it to fail (in only specific
1425 # ways).
1426 row = keys.copy()
1427 if compared is not None:
1428 row.update(compared)
1429 if extra is not None:
1430 row.update(extra)
1431 with self.transaction():
1432 inserted = bool(self.ensure(table, row))
1433 inserted_or_updated: bool | dict[str, Any]
1434 # Need to perform check() for this branch inside the
1435 # transaction, so we roll back an insert that didn't do
1436 # what we expected. That limits the extent to which we
1437 # can reduce duplication between this block and the other
1438 # ones that perform similar logic.
1439 n, bad, result = check()
1440 if n < 1:
1441 raise ConflictingDefinitionError(
1442 f"Attempted to ensure {row} exists by inserting it with ON CONFLICT IGNORE, "
1443 f"but a post-insert query on {keys} returned no results. "
1444 f"Insert was {'' if inserted else 'not '}reported as successful. "
1445 "This can occur if the insert violated a database constraint other than the "
1446 "unique constraint or primary key used to identify the row in this call."
1447 )
1448 elif n > 1:
1449 raise RuntimeError(
1450 f"Keys passed to sync {keys.keys()} do not comprise a "
1451 f"unique constraint for table {table.name}."
1452 )
1453 elif bad:
1454 assert (
1455 compared is not None
1456 ), "Should not be able to get inconsistencies without comparing."
1457 if inserted:
1458 raise RuntimeError(
1459 f"Conflict ({bad}) in sync after successful insert; this is "
1460 "possible if the same table is being updated by a concurrent "
1461 "process that isn't using sync, but it may also be a bug in "
1462 "daf_butler."
1463 )
1464 elif update:
1465 with self._transaction() as (_, connection):
1466 connection.execute(
1467 table.update()
1468 .where(sqlalchemy.sql.and_(*[table.columns[k] == v for k, v in keys.items()]))
1469 .values(**{k: compared[k] for k in bad})
1470 )
1471 inserted_or_updated = bad
1472 else:
1473 raise DatabaseConflictError(
1474 f"Conflict in sync for table {table.name} on column(s) {_format_bad(bad)}."
1475 )
1476 else:
1477 inserted_or_updated = inserted
1478 else:
1479 # Database is not writeable; just see if the row exists.
1480 n, bad, result = check()
1481 if n < 1:
1482 raise ReadOnlyDatabaseError("sync needs to insert, but database is read-only.")
1483 elif n > 1:
1484 raise RuntimeError("Keys passed to sync do not comprise a unique constraint.")
1485 elif bad:
1486 if update:
1487 raise ReadOnlyDatabaseError("sync needs to update, but database is read-only.")
1488 else:
1489 raise DatabaseConflictError(
1490 f"Conflict in sync for table {table.name} on column(s) {_format_bad(bad)}."
1491 )
1492 inserted_or_updated = False
1493 if returning is None:
1494 return None, inserted_or_updated
1495 else:
1496 assert result is not None
1497 return dict(zip(returning, result, strict=True)), inserted_or_updated
1499 def insert(
1500 self,
1501 table: sqlalchemy.schema.Table,
1502 *rows: dict,
1503 returnIds: bool = False,
1504 select: sqlalchemy.sql.expression.SelectBase | None = None,
1505 names: Iterable[str] | None = None,
1506 ) -> list[int] | None:
1507 """Insert one or more rows into a table, optionally returning
1508 autoincrement primary key values.
1510 Parameters
1511 ----------
1512 table : `sqlalchemy.schema.Table`
1513 Table rows should be inserted into.
1514 *rows : `dict`
1515 Positional arguments are the rows to be inserted, as dictionaries
1516 mapping column name to value. The keys in all dictionaries must
1517 be the same.
1518 returnIds : `bool`, optional
1519 If `True` (`False` is default), return the values of the table's
1520 autoincrement primary key field (which much exist).
1521 select : `sqlalchemy.sql.SelectBase`, optional
1522 A SELECT query expression to insert rows from. Cannot be provided
1523 with either ``rows`` or ``returnIds=True``.
1524 names : `~collections.abc.Iterable` [ `str` ], optional
1525 Names of columns in ``table`` to be populated, ordered to match the
1526 columns returned by ``select``. Ignored if ``select`` is `None`.
1527 If not provided, the columns returned by ``select`` must be named
1528 to match the desired columns of ``table``.
1530 Returns
1531 -------
1532 ids : `None`, or `list` of `int`
1533 If ``returnIds`` is `True`, a `list` containing the inserted
1534 values for the table's autoincrement primary key.
1536 Raises
1537 ------
1538 ReadOnlyDatabaseError
1539 Raised if `isWriteable` returns `False` when this method is called.
1541 Notes
1542 -----
1543 The default implementation uses bulk insert syntax when ``returnIds``
1544 is `False`, and a loop over single-row insert operations when it is
1545 `True`.
1547 Derived classes should reimplement when they can provide a more
1548 efficient implementation (especially for the latter case).
1550 May be used inside transaction contexts, so implementations may not
1551 perform operations that interrupt transactions.
1552 """
1553 self.assertTableWriteable(table, f"Cannot insert into read-only table {table}.")
1554 if select is not None and (rows or returnIds):
1555 raise TypeError("'select' is incompatible with passing value rows or returnIds=True.")
1556 if not rows and select is None:
1557 if returnIds:
1558 return []
1559 else:
1560 return None
1561 with self._transaction() as (_, connection):
1562 if not returnIds:
1563 if select is not None:
1564 if names is None:
1565 # columns() is deprecated since 1.4, but
1566 # selected_columns() method did not exist in 1.3.
1567 if hasattr(select, "selected_columns"):
1568 names = select.selected_columns.keys()
1569 else:
1570 names = select.columns.keys()
1571 connection.execute(table.insert().from_select(list(names), select))
1572 else:
1573 connection.execute(table.insert(), rows)
1574 return None
1575 else:
1576 sql = table.insert()
1577 return [connection.execute(sql, row).inserted_primary_key[0] for row in rows]
1579 @abstractmethod
1580 def replace(self, table: sqlalchemy.schema.Table, *rows: dict) -> None:
1581 """Insert one or more rows into a table, replacing any existing rows
1582 for which insertion of a new row would violate the primary key
1583 constraint.
1585 Parameters
1586 ----------
1587 table : `sqlalchemy.schema.Table`
1588 Table rows should be inserted into.
1589 *rows
1590 Positional arguments are the rows to be inserted, as dictionaries
1591 mapping column name to value. The keys in all dictionaries must
1592 be the same.
1594 Raises
1595 ------
1596 ReadOnlyDatabaseError
1597 Raised if `isWriteable` returns `False` when this method is called.
1599 Notes
1600 -----
1601 May be used inside transaction contexts, so implementations may not
1602 perform operations that interrupt transactions.
1604 Implementations should raise a `sqlalchemy.exc.IntegrityError`
1605 exception when a constraint other than the primary key would be
1606 violated.
1608 Implementations are not required to support `replace` on tables
1609 with autoincrement keys.
1610 """
1611 raise NotImplementedError()
1613 @abstractmethod
1614 def ensure(self, table: sqlalchemy.schema.Table, *rows: dict, primary_key_only: bool = False) -> int:
1615 """Insert one or more rows into a table, skipping any rows for which
1616 insertion would violate a unique constraint.
1618 Parameters
1619 ----------
1620 table : `sqlalchemy.schema.Table`
1621 Table rows should be inserted into.
1622 *rows
1623 Positional arguments are the rows to be inserted, as dictionaries
1624 mapping column name to value. The keys in all dictionaries must
1625 be the same.
1626 primary_key_only : `bool`, optional
1627 If `True` (`False` is default), only skip rows that violate the
1628 primary key constraint, and raise an exception (and rollback
1629 transactions) for other constraint violations.
1631 Returns
1632 -------
1633 count : `int`
1634 The number of rows actually inserted.
1636 Raises
1637 ------
1638 ReadOnlyDatabaseError
1639 Raised if `isWriteable` returns `False` when this method is called.
1640 This is raised even if the operation would do nothing even on a
1641 writeable database.
1643 Notes
1644 -----
1645 May be used inside transaction contexts, so implementations may not
1646 perform operations that interrupt transactions.
1648 Implementations are not required to support `ensure` on tables
1649 with autoincrement keys.
1650 """
1651 raise NotImplementedError()
1653 def delete(self, table: sqlalchemy.schema.Table, columns: Iterable[str], *rows: dict) -> int:
1654 """Delete one or more rows from a table.
1656 Parameters
1657 ----------
1658 table : `sqlalchemy.schema.Table`
1659 Table that rows should be deleted from.
1660 columns : `~collections.abc.Iterable` of `str`
1661 The names of columns that will be used to constrain the rows to
1662 be deleted; these will be combined via ``AND`` to form the
1663 ``WHERE`` clause of the delete query.
1664 *rows
1665 Positional arguments are the keys of rows to be deleted, as
1666 dictionaries mapping column name to value. The keys in all
1667 dictionaries must be exactly the names in ``columns``.
1669 Returns
1670 -------
1671 count : `int`
1672 Number of rows deleted.
1674 Raises
1675 ------
1676 ReadOnlyDatabaseError
1677 Raised if `isWriteable` returns `False` when this method is called.
1679 Notes
1680 -----
1681 May be used inside transaction contexts, so implementations may not
1682 perform operations that interrupt transactions.
1684 The default implementation should be sufficient for most derived
1685 classes.
1686 """
1687 self.assertTableWriteable(table, f"Cannot delete from read-only table {table}.")
1688 if columns and not rows:
1689 # If there are no columns, this operation is supposed to delete
1690 # everything (so we proceed as usual). But if there are columns,
1691 # but no rows, it was a constrained bulk operation where the
1692 # constraint is that no rows match, and we should short-circuit
1693 # while reporting that no rows were affected.
1694 return 0
1695 sql = table.delete()
1696 columns = list(columns) # Force iterators to list
1698 # More efficient to use IN operator if there is only one
1699 # variable changing across all rows.
1700 content: dict[str, set] = defaultdict(set)
1701 if len(columns) == 1:
1702 # Nothing to calculate since we can always use IN
1703 column = columns[0]
1704 changing_columns = [column]
1705 content[column] = {row[column] for row in rows}
1706 else:
1707 for row in rows:
1708 for k, v in row.items():
1709 content[k].add(v)
1710 changing_columns = [col for col, values in content.items() if len(values) > 1]
1712 if len(changing_columns) != 1:
1713 # More than one column changes each time so do explicit bind
1714 # parameters and have each row processed separately.
1715 whereTerms = [table.columns[name] == sqlalchemy.sql.bindparam(name) for name in columns]
1716 if whereTerms:
1717 sql = sql.where(sqlalchemy.sql.and_(*whereTerms))
1718 with self._transaction() as (_, connection):
1719 return connection.execute(sql, rows).rowcount
1720 else:
1721 # One of the columns has changing values but any others are
1722 # fixed. In this case we can use an IN operator and be more
1723 # efficient.
1724 name = changing_columns.pop()
1726 # Simple where clause for the unchanging columns
1727 clauses = []
1728 for k, v in content.items():
1729 if k == name:
1730 continue
1731 column = table.columns[k]
1732 # The set only has one element
1733 clauses.append(column == v.pop())
1735 # The IN operator will not work for "infinite" numbers of
1736 # rows so must batch it up into distinct calls.
1737 in_content = list(content[name])
1738 n_elements = len(in_content)
1740 rowcount = 0
1741 iposn = 0
1742 n_per_loop = 1_000 # Controls how many items to put in IN clause
1743 with self._transaction() as (_, connection):
1744 for iposn in range(0, n_elements, n_per_loop):
1745 endpos = iposn + n_per_loop
1746 in_clause = table.columns[name].in_(in_content[iposn:endpos])
1748 newsql = sql.where(sqlalchemy.sql.and_(*clauses, in_clause))
1749 rowcount += connection.execute(newsql).rowcount
1750 return rowcount
1752 def deleteWhere(self, table: sqlalchemy.schema.Table, where: sqlalchemy.sql.ColumnElement) -> int:
1753 """Delete rows from a table with pre-constructed WHERE clause.
1755 Parameters
1756 ----------
1757 table : `sqlalchemy.schema.Table`
1758 Table that rows should be deleted from.
1759 where : `sqlalchemy.sql.ClauseElement`
1760 The names of columns that will be used to constrain the rows to
1761 be deleted; these will be combined via ``AND`` to form the
1762 ``WHERE`` clause of the delete query.
1764 Returns
1765 -------
1766 count : `int`
1767 Number of rows deleted.
1769 Raises
1770 ------
1771 ReadOnlyDatabaseError
1772 Raised if `isWriteable` returns `False` when this method is called.
1774 Notes
1775 -----
1776 May be used inside transaction contexts, so implementations may not
1777 perform operations that interrupt transactions.
1779 The default implementation should be sufficient for most derived
1780 classes.
1781 """
1782 self.assertTableWriteable(table, f"Cannot delete from read-only table {table}.")
1784 sql = table.delete().where(where)
1785 with self._transaction() as (_, connection):
1786 return connection.execute(sql).rowcount
1788 def update(self, table: sqlalchemy.schema.Table, where: dict[str, str], *rows: dict) -> int:
1789 """Update one or more rows in a table.
1791 Parameters
1792 ----------
1793 table : `sqlalchemy.schema.Table`
1794 Table containing the rows to be updated.
1795 where : `dict` [`str`, `str`]
1796 A mapping from the names of columns that will be used to search for
1797 existing rows to the keys that will hold these values in the
1798 ``rows`` dictionaries. Note that these may not be the same due to
1799 SQLAlchemy limitations.
1800 *rows
1801 Positional arguments are the rows to be updated. The keys in all
1802 dictionaries must be the same, and may correspond to either a
1803 value in the ``where`` dictionary or the name of a column to be
1804 updated.
1806 Returns
1807 -------
1808 count : `int`
1809 Number of rows matched (regardless of whether the update actually
1810 modified them).
1812 Raises
1813 ------
1814 ReadOnlyDatabaseError
1815 Raised if `isWriteable` returns `False` when this method is called.
1817 Notes
1818 -----
1819 May be used inside transaction contexts, so implementations may not
1820 perform operations that interrupt transactions.
1822 The default implementation should be sufficient for most derived
1823 classes.
1824 """
1825 self.assertTableWriteable(table, f"Cannot update read-only table {table}.")
1826 if not rows:
1827 return 0
1828 sql = table.update().where(
1829 sqlalchemy.sql.and_(*[table.columns[k] == sqlalchemy.sql.bindparam(v) for k, v in where.items()])
1830 )
1831 with self._transaction() as (_, connection):
1832 return connection.execute(sql, rows).rowcount
1834 @contextmanager
1835 def query(
1836 self,
1837 sql: sqlalchemy.sql.expression.Executable | sqlalchemy.sql.expression.SelectBase,
1838 *args: Any,
1839 **kwargs: Any,
1840 ) -> Iterator[sqlalchemy.engine.CursorResult]:
1841 """Run a SELECT query against the database.
1843 Parameters
1844 ----------
1845 sql : `sqlalchemy.sql.expression.SelectBase`
1846 A SQLAlchemy representation of a ``SELECT`` query.
1847 *args
1848 Additional positional arguments are forwarded to
1849 `sqlalchemy.engine.Connection.execute`.
1850 **kwargs
1851 Additional keyword arguments are forwarded to
1852 `sqlalchemy.engine.Connection.execute`.
1854 Returns
1855 -------
1856 result_context : `sqlalchemy.engine.CursorResults`
1857 Context manager that returns the query result object when entered.
1858 These results are invalidated when the context is exited.
1859 """
1860 if self._session_connection is None:
1861 connection = self._engine.connect()
1862 else:
1863 connection = self._session_connection
1864 # TODO: SelectBase is not good for execute(), but it used everywhere,
1865 # e.g. in daf_relation. We should switch to Executable at some point.
1866 result = connection.execute(cast(sqlalchemy.sql.expression.Executable, sql), *args, **kwargs)
1867 try:
1868 yield result
1869 finally:
1870 if connection is not self._session_connection:
1871 connection.close()
1873 @abstractmethod
1874 def constant_rows(
1875 self,
1876 fields: NamedValueAbstractSet[ddl.FieldSpec],
1877 *rows: dict,
1878 name: str | None = None,
1879 ) -> sqlalchemy.sql.FromClause:
1880 """Return a SQLAlchemy object that represents a small number of
1881 constant-valued rows.
1883 Parameters
1884 ----------
1885 fields : `NamedValueAbstractSet` [ `ddl.FieldSpec` ]
1886 The columns of the rows. Unique and foreign key constraints are
1887 ignored.
1888 *rows : `dict`
1889 Values for the rows.
1890 name : `str`, optional
1891 If provided, the name of the SQL construct. If not provided, an
1892 opaque but unique identifier is generated.
1894 Returns
1895 -------
1896 from_clause : `sqlalchemy.sql.FromClause`
1897 SQLAlchemy object representing the given rows. This is guaranteed
1898 to be something that can be directly joined into a ``SELECT``
1899 query's ``FROM`` clause, and will not involve a temporary table
1900 that needs to be cleaned up later.
1902 Notes
1903 -----
1904 The default implementation uses the SQL-standard ``VALUES`` construct,
1905 but support for that construct is varied enough across popular RDBMSs
1906 that the method is still marked abstract to force explicit opt-in via
1907 delegation to `super`.
1908 """
1909 if name is None:
1910 name = f"tmp_{uuid.uuid4().hex}"
1911 return sqlalchemy.sql.values(
1912 *[sqlalchemy.Column(field.name, field.getSizedColumnType()) for field in fields],
1913 name=name,
1914 ).data([tuple(row[name] for name in fields.names) for row in rows])
1916 def get_constant_rows_max(self) -> int:
1917 """Return the maximum number of rows that should be passed to
1918 `constant_rows` for this backend.
1920 Returns
1921 -------
1922 max : `int`
1923 Maximum number of rows.
1925 Notes
1926 -----
1927 This should reflect typical performance profiles (or a guess at these),
1928 not just hard database engine limits.
1929 """
1930 return 100
1932 @property
1933 @abstractmethod
1934 def has_distinct_on(self) -> bool:
1935 """Whether this database supports the ``DISTINCT ON`` SQL construct."""
1936 raise NotImplementedError()
1938 @property
1939 @abstractmethod
1940 def has_any_aggregate(self) -> bool:
1941 """Whether this database supports the ``ANY_VALUE`` aggregate function
1942 or something equivalent.
1943 """
1944 raise NotImplementedError()
1946 @abstractmethod
1947 def apply_any_aggregate(self, column: sqlalchemy.ColumnElement[Any]) -> sqlalchemy.ColumnElement[Any]:
1948 """Wrap the given SQLAlchemy column in the ``ANY_VALUE`` aggregate
1949 function or its equivalent.
1951 Parameters
1952 ----------
1953 column : `sqlalchemy.ColumnElement`
1954 Original column to wrap.
1956 Returns
1957 -------
1958 wrapped : `sqlalchemy.ColumnElement`
1959 A column element of the same SQL type that can appear in the
1960 ``SELECT`` clause even when this column does not appear in the
1961 ``GROUP BY`` clause.
1963 Notes
1964 -----
1965 This method's behavior is unspecified when `has_any_aggregate` is
1966 `False`; the caller is responsible for checking that property first.
1967 """
1968 raise NotImplementedError()
1970 origin: int
1971 """An integer ID that should be used as the default for any datasets,
1972 quanta, or other entities that use a (autoincrement, origin) compound
1973 primary key (`int`).
1974 """
1976 namespace: str | None
1977 """The schema or namespace this database instance is associated with
1978 (`str` or `None`).
1979 """