Coverage for python/lsst/daf/butler/registry/interfaces/_database.py: 25%
421 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-03-26 02:48 -0700
« prev ^ index » next coverage.py v7.4.4, created at 2024-03-26 02:48 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
27from __future__ import annotations
29from ... import ddl, time_utils
31__all__ = [
32 "Database",
33 "ReadOnlyDatabaseError",
34 "DatabaseConflictError",
35 "DatabaseInsertMode",
36 "SchemaAlreadyDefinedError",
37 "StaticTablesContext",
38]
40import enum
41import uuid
42import warnings
43from abc import ABC, abstractmethod
44from collections import defaultdict
45from collections.abc import Callable, Iterable, Iterator, Sequence
46from contextlib import contextmanager
47from typing import Any, cast, final
49import astropy.time
50import sqlalchemy
52from ..._named import NamedValueAbstractSet
53from ...timespan_database_representation import TimespanDatabaseRepresentation
54from .._exceptions import ConflictingDefinitionError
57class DatabaseInsertMode(enum.Enum):
58 """Mode options available for inserting database records."""
60 INSERT = enum.auto()
61 """Insert records, failing if they already exist."""
63 REPLACE = enum.auto()
64 """Replace records, overwriting existing."""
66 ENSURE = enum.auto()
67 """Insert records, skipping any that already exist."""
70# TODO: method is called with list[ReflectedColumn] in SA 2, and
71# ReflectedColumn does not exist in 1.4.
72def _checkExistingTableDefinition(name: str, spec: ddl.TableSpec, inspection: list) -> None:
73 """Test that the definition of a table in a `ddl.TableSpec` and from
74 database introspection are consistent.
76 Parameters
77 ----------
78 name : `str`
79 Name of the table (only used in error messages).
80 spec : `ddl.TableSpec`
81 Specification of the table.
82 inspection : `dict`
83 Dictionary returned by
84 `sqlalchemy.engine.reflection.Inspector.get_columns`.
86 Raises
87 ------
88 DatabaseConflictError
89 Raised if the definitions are inconsistent.
90 """
91 columnNames = [c["name"] for c in inspection]
92 if spec.fields.names != set(columnNames):
93 raise DatabaseConflictError(
94 f"Table '{name}' exists but is defined differently in the database; "
95 f"specification has columns {list(spec.fields.names)}, while the "
96 f"table in the database has {columnNames}."
97 )
100class ReadOnlyDatabaseError(RuntimeError):
101 """Exception raised when a write operation is called on a read-only
102 `Database`.
103 """
106class DatabaseConflictError(ConflictingDefinitionError):
107 """Exception raised when database content (row values or schema entities)
108 are inconsistent with what this client expects.
109 """
112class SchemaAlreadyDefinedError(RuntimeError):
113 """Exception raised when trying to initialize database schema when some
114 tables already exist.
115 """
118class StaticTablesContext:
119 """Helper class used to declare the static schema for a registry layer
120 in a database.
122 An instance of this class is returned by `Database.declareStaticTables`,
123 which should be the only way it should be constructed.
125 Parameters
126 ----------
127 db : `Database`
128 The database.
129 connection : `sqlalchemy.engine.Connection`
130 The connection object.
131 """
133 def __init__(self, db: Database, connection: sqlalchemy.engine.Connection):
134 self._db = db
135 self._foreignKeys: list[tuple[sqlalchemy.schema.Table, sqlalchemy.schema.ForeignKeyConstraint]] = []
136 self._inspector = sqlalchemy.inspect(connection)
137 self._tableNames = frozenset(self._inspector.get_table_names(schema=self._db.namespace))
138 self._initializers: list[Callable[[Database], None]] = []
140 def addTable(self, name: str, spec: ddl.TableSpec) -> sqlalchemy.schema.Table:
141 """Add a new table to the schema, returning its sqlalchemy
142 representation.
144 Parameters
145 ----------
146 name : `str`
147 The name of the table.
148 spec : `ddl.TableSpec`
149 The specification of the table.
151 Returns
152 -------
153 table : `sqlalchemy.schema.Table`
154 The created table.
156 Notes
157 -----
158 The new table may not actually be created until the end of the
159 context created by `Database.declareStaticTables`, allowing tables
160 to be declared in any order even in the presence of foreign key
161 relationships.
162 """
163 name = self._db._mangleTableName(name)
164 metadata = self._db._metadata
165 assert metadata is not None, "Guaranteed by context manager that returns this object."
166 table = self._db._convertTableSpec(name, spec, metadata)
167 for foreignKeySpec in spec.foreignKeys:
168 self._foreignKeys.append((table, self._db._convertForeignKeySpec(name, foreignKeySpec, metadata)))
169 return table
171 def addTableTuple(self, specs: tuple[ddl.TableSpec, ...]) -> tuple[sqlalchemy.schema.Table, ...]:
172 """Add a named tuple of tables to the schema, returning their
173 SQLAlchemy representations in a named tuple of the same type.
175 The new tables may not actually be created until the end of the
176 context created by `Database.declareStaticTables`, allowing tables
177 to be declared in any order even in the presence of foreign key
178 relationships.
180 Parameters
181 ----------
182 specs : `tuple` of `ddl.TableSpec`
183 Specifications of multiple tables.
185 Returns
186 -------
187 tables : `tuple` of `sqlalchemy.schema.Table`
188 All the tables created.
190 Notes
191 -----
192 ``specs`` *must* be an instance of a type created by
193 `collections.namedtuple`, not just regular tuple, and the returned
194 object is guaranteed to be the same. Because `~collections.namedtuple`
195 is just a factory for `type` objects, not an actual type itself,
196 we cannot represent this with type annotations.
197 """
198 return specs._make( # type: ignore
199 self.addTable(name, spec) for name, spec in zip(specs._fields, specs, strict=True) # type: ignore
200 )
202 def addInitializer(self, initializer: Callable[[Database], None]) -> None:
203 """Add a method that does one-time initialization of a database.
205 Initialization can mean anything that changes state of a database
206 and needs to be done exactly once after database schema was created.
207 An example for that could be population of schema attributes.
209 Parameters
210 ----------
211 initializer : `~collections.abc.Callable`
212 Method of a single argument which is a `Database` instance.
213 """
214 self._initializers.append(initializer)
217class Database(ABC):
218 """An abstract interface that represents a particular database engine's
219 representation of a single schema/namespace/database.
221 Parameters
222 ----------
223 origin : `int`
224 An integer ID that should be used as the default for any datasets,
225 quanta, or other entities that use a (autoincrement, origin) compound
226 primary key.
227 engine : `sqlalchemy.engine.Engine`
228 The SQLAlchemy engine for this `Database`.
229 namespace : `str`, optional
230 Name of the schema or namespace this instance is associated with.
231 This is passed as the ``schema`` argument when constructing a
232 `sqlalchemy.schema.MetaData` instance. We use ``namespace`` instead to
233 avoid confusion between "schema means namespace" and "schema means
234 table definitions".
235 metadata : `sqlalchemy.schema.MetaData`, optional
236 Object representing the tables and other schema entities. If not
237 provided, will be generated during the next call to
238 ``declareStaticTables``.
240 Notes
241 -----
242 `Database` requires all write operations to go through its special named
243 methods. Our write patterns are sufficiently simple that we don't really
244 need the full flexibility of SQL insert/update/delete syntax, and we need
245 non-standard (but common) functionality in these operations sufficiently
246 often that it seems worthwhile to provide our own generic API.
248 In contrast, `Database.query` allows arbitrary ``SELECT`` queries (via
249 their SQLAlchemy representation) to be run, as we expect these to require
250 significantly more sophistication while still being limited to standard
251 SQL.
253 `Database` itself has several underscore-prefixed attributes:
255 - ``_engine``: SQLAlchemy object representing its engine.
256 - ``_connection``: method returning a context manager for
257 `sqlalchemy.engine.Connection` object.
258 - ``_metadata``: the `sqlalchemy.schema.MetaData` object representing
259 the tables and other schema entities.
261 These are considered protected (derived classes may access them, but other
262 code should not), and read-only, aside from executing SQL via
263 ``_connection``.
264 """
266 def __init__(
267 self,
268 *,
269 origin: int,
270 engine: sqlalchemy.engine.Engine,
271 namespace: str | None = None,
272 metadata: sqlalchemy.schema.MetaData | None = None,
273 ):
274 self.origin = origin
275 self.namespace = namespace
276 self._engine = engine
277 self._session_connection: sqlalchemy.engine.Connection | None = None
278 self._metadata = metadata
279 self._temp_tables: set[str] = set()
281 def __repr__(self) -> str:
282 # Rather than try to reproduce all the parameters used to create
283 # the object, instead report the more useful information of the
284 # connection URL.
285 if self._engine.url.password is not None:
286 uri = str(self._engine.url.set(password="***"))
287 else:
288 uri = str(self._engine.url)
289 if self.namespace:
290 uri += f"#{self.namespace}"
291 return f'{type(self).__name__}("{uri}")'
293 @classmethod
294 def makeDefaultUri(cls, root: str) -> str | None:
295 """Create a default connection URI appropriate for the given root
296 directory, or `None` if there can be no such default.
298 Parameters
299 ----------
300 root : `str`
301 Root string to use to build connection URI.
303 Returns
304 -------
305 uri : `str` or `None`
306 The URI string or `None`.
307 """
308 return None
310 @classmethod
311 def fromUri(
312 cls,
313 uri: str | sqlalchemy.engine.URL,
314 *,
315 origin: int,
316 namespace: str | None = None,
317 writeable: bool = True,
318 ) -> Database:
319 """Construct a database from a SQLAlchemy URI.
321 Parameters
322 ----------
323 uri : `str` or `sqlalchemy.engine.URL`
324 A SQLAlchemy URI connection string.
325 origin : `int`
326 An integer ID that should be used as the default for any datasets,
327 quanta, or other entities that use a (autoincrement, origin)
328 compound primary key.
329 namespace : `str`, optional
330 A database namespace (i.e. schema) the new instance should be
331 associated with. If `None` (default), the namespace (if any) is
332 inferred from the URI.
333 writeable : `bool`, optional
334 If `True`, allow write operations on the database, including
335 ``CREATE TABLE``.
337 Returns
338 -------
339 db : `Database`
340 A new `Database` instance.
341 """
342 return cls.fromEngine(
343 cls.makeEngine(uri, writeable=writeable), origin=origin, namespace=namespace, writeable=writeable
344 )
346 @abstractmethod
347 def clone(self) -> Database:
348 """Make an independent copy of this `Database` object.
350 Returns
351 -------
352 db : `Database`
353 A new `Database` instance with the same configuration as this
354 instance.
355 """
356 raise NotImplementedError()
358 @classmethod
359 @abstractmethod
360 def makeEngine(
361 cls, uri: str | sqlalchemy.engine.URL, *, writeable: bool = True
362 ) -> sqlalchemy.engine.Engine:
363 """Create a `sqlalchemy.engine.Engine` from a SQLAlchemy URI.
365 Parameters
366 ----------
367 uri : `str` or `sqlalchemy.engine.URL`
368 A SQLAlchemy URI connection string.
369 writeable : `bool`, optional
370 If `True`, allow write operations on the database, including
371 ``CREATE TABLE``.
373 Returns
374 -------
375 engine : `sqlalchemy.engine.Engine`
376 A database engine.
378 Notes
379 -----
380 Subclasses that support other ways to connect to a database are
381 encouraged to add optional arguments to their implementation of this
382 method, as long as they maintain compatibility with the base class
383 call signature.
384 """
385 raise NotImplementedError()
387 @classmethod
388 @abstractmethod
389 def fromEngine(
390 cls,
391 engine: sqlalchemy.engine.Engine,
392 *,
393 origin: int,
394 namespace: str | None = None,
395 writeable: bool = True,
396 ) -> Database:
397 """Create a new `Database` from an existing `sqlalchemy.engine.Engine`.
399 Parameters
400 ----------
401 engine : `sqlalchemy.engine.Engine`
402 The engine for the database. May be shared between `Database`
403 instances.
404 origin : `int`
405 An integer ID that should be used as the default for any datasets,
406 quanta, or other entities that use a (autoincrement, origin)
407 compound primary key.
408 namespace : `str`, optional
409 A different database namespace (i.e. schema) the new instance
410 should be associated with. If `None` (default), the namespace
411 (if any) is inferred from the connection.
412 writeable : `bool`, optional
413 If `True`, allow write operations on the database, including
414 ``CREATE TABLE``.
416 Returns
417 -------
418 db : `Database`
419 A new `Database` instance.
421 Notes
422 -----
423 This method allows different `Database` instances to share the same
424 engine, which is desirable when they represent different namespaces
425 can be queried together.
426 """
427 raise NotImplementedError()
429 @final
430 @contextmanager
431 def session(self) -> Iterator[None]:
432 """Return a context manager that represents a session (persistent
433 connection to a database).
435 Returns
436 -------
437 context : `AbstractContextManager` [ `None` ]
438 A context manager that does not return a value when entered.
440 Notes
441 -----
442 This method should be used when a sequence of read-only SQL operations
443 will be performed in rapid succession *without* a requirement that they
444 yield consistent results in the presence of concurrent writes (or, more
445 rarely, when conflicting concurrent writes are rare/impossible and the
446 session will be open long enough that a transaction is inadvisable).
447 """
448 with self._session():
449 yield
451 @final
452 @contextmanager
453 def transaction(
454 self,
455 *,
456 interrupting: bool = False,
457 savepoint: bool = False,
458 lock: Iterable[sqlalchemy.schema.Table] = (),
459 for_temp_tables: bool = False,
460 ) -> Iterator[None]:
461 """Return a context manager that represents a transaction.
463 Parameters
464 ----------
465 interrupting : `bool`, optional
466 If `True` (`False` is default), this transaction block may not be
467 nested without an outer one, and attempting to do so is a logic
468 (i.e. assertion) error.
469 savepoint : `bool`, optional
470 If `True` (`False` is default), create a `SAVEPOINT`, allowing
471 exceptions raised by the database (e.g. due to constraint
472 violations) during this transaction's context to be caught outside
473 it without also rolling back all operations in an outer transaction
474 block. If `False`, transactions may still be nested, but a
475 rollback may be generated at any level and affects all levels, and
476 commits are deferred until the outermost block completes. If any
477 outer transaction block was created with ``savepoint=True``, all
478 inner blocks will be as well (regardless of the actual value
479 passed). This has no effect if this is the outermost transaction.
480 lock : `~collections.abc.Iterable` [ `sqlalchemy.schema.Table` ], \
481 optional
482 A list of tables to lock for the duration of this transaction.
483 These locks are guaranteed to prevent concurrent writes and allow
484 this transaction (only) to acquire the same locks (others should
485 block), but only prevent concurrent reads if the database engine
486 requires that in order to block concurrent writes.
487 for_temp_tables : `bool`, optional
488 If `True`, this transaction may involve creating temporary tables.
490 Returns
491 -------
492 context : `AbstractContextManager` [ `None` ]
493 A context manager that commits the transaction when it is exited
494 without error and rolls back the transactoin when it is exited via
495 an exception.
497 Notes
498 -----
499 All transactions on a connection managed by one or more `Database`
500 instances _must_ go through this method, or transaction state will not
501 be correctly managed.
502 """
503 with self._transaction(
504 interrupting=interrupting, savepoint=savepoint, lock=lock, for_temp_tables=for_temp_tables
505 ):
506 yield
508 @contextmanager
509 def temporary_table(
510 self, spec: ddl.TableSpec, name: str | None = None
511 ) -> Iterator[sqlalchemy.schema.Table]:
512 """Return a context manager that creates and then drops a temporary
513 table.
515 Parameters
516 ----------
517 spec : `ddl.TableSpec`
518 Specification for the columns. Unique and foreign key constraints
519 may be ignored.
520 name : `str`, optional
521 If provided, the name of the SQL construct. If not provided, an
522 opaque but unique identifier is generated.
524 Returns
525 -------
526 context : `AbstractContextManager` [ `sqlalchemy.schema.Table` ]
527 A context manager that returns a SQLAlchemy representation of the
528 temporary table when entered.
530 Notes
531 -----
532 Temporary tables may be created, dropped, and written to even in
533 read-only databases - at least according to the Python-level
534 protections in the `Database` classes. Server permissions may say
535 otherwise, but in that case they probably need to be modified to
536 support the full range of expected read-only butler behavior.
537 """
538 with self._session() as connection:
539 table = self._make_temporary_table(connection, spec=spec, name=name)
540 self._temp_tables.add(table.key)
541 try:
542 yield table
543 finally:
544 with self._transaction():
545 table.drop(connection)
546 self._temp_tables.remove(table.key)
548 @contextmanager
549 def _session(self) -> Iterator[sqlalchemy.engine.Connection]:
550 """Protected implementation for `session` that actually returns the
551 connection.
553 This method is for internal `Database` calls that need the actual
554 SQLAlchemy connection object. It should be overridden by subclasses
555 instead of `session` itself.
557 Returns
558 -------
559 context : `AbstractContextManager` [ `sqlalchemy.engine.Connection` ]
560 A context manager that returns a SQLALchemy connection when
561 entered.
563 """
564 if self._session_connection is not None:
565 # session already started, just reuse that
566 yield self._session_connection
567 else:
568 try:
569 # open new connection and close it when done
570 self._session_connection = self._engine.connect()
571 yield self._session_connection
572 finally:
573 if self._session_connection is not None:
574 self._session_connection.close()
575 self._session_connection = None
576 # Temporary tables only live within session
577 self._temp_tables = set()
579 @contextmanager
580 def _transaction(
581 self,
582 *,
583 interrupting: bool = False,
584 savepoint: bool = False,
585 lock: Iterable[sqlalchemy.schema.Table] = (),
586 for_temp_tables: bool = False,
587 ) -> Iterator[tuple[bool, sqlalchemy.engine.Connection]]:
588 """Protected implementation for `transaction` that actually returns the
589 connection and whether this is a new outermost transaction.
591 This method is for internal `Database` calls that need the actual
592 SQLAlchemy connection object. It should be overridden by subclasses
593 instead of `transaction` itself.
595 Parameters
596 ----------
597 interrupting : `bool`, optional
598 If `True` (`False` is default), this transaction block may not be
599 nested without an outer one, and attempting to do so is a logic
600 (i.e. assertion) error.
601 savepoint : `bool`, optional
602 If `True` (`False` is default), create a `SAVEPOINT`, allowing
603 exceptions raised by the database (e.g. due to constraint
604 violations) during this transaction's context to be caught outside
605 it without also rolling back all operations in an outer transaction
606 block. If `False`, transactions may still be nested, but a
607 rollback may be generated at any level and affects all levels, and
608 commits are deferred until the outermost block completes. If any
609 outer transaction block was created with ``savepoint=True``, all
610 inner blocks will be as well (regardless of the actual value
611 passed). This has no effect if this is the outermost transaction.
612 lock : `~collections.abc.Iterable` [ `sqlalchemy.schema.Table` ], \
613 optional
614 A list of tables to lock for the duration of this transaction.
615 These locks are guaranteed to prevent concurrent writes and allow
616 this transaction (only) to acquire the same locks (others should
617 block), but only prevent concurrent reads if the database engine
618 requires that in order to block concurrent writes.
619 for_temp_tables : `bool`, optional
620 If `True`, this transaction may involve creating temporary tables.
622 Returns
623 -------
624 context : `AbstractContextManager` [ `tuple` [ `bool`,
625 `sqlalchemy.engine.Connection` ] ]
626 A context manager that commits the transaction when it is exited
627 without error and rolls back the transactoin when it is exited via
628 an exception. When entered, it returns a tuple of:
630 - ``is_new`` (`bool`): whether this is a new (outermost)
631 transaction;
632 - ``connection`` (`sqlalchemy.engine.Connection`): the connection.
633 """
634 with self._session() as connection:
635 already_in_transaction = connection.in_transaction()
636 assert not (interrupting and already_in_transaction), (
637 "Logic error in transaction nesting: an operation that would "
638 "interrupt the active transaction context has been requested."
639 )
640 savepoint = savepoint or connection.in_nested_transaction()
641 trans: sqlalchemy.engine.Transaction | None
642 if already_in_transaction:
643 if savepoint:
644 trans = connection.begin_nested()
645 else:
646 # Nested non-savepoint transactions don't do anything.
647 trans = None
648 else:
649 # Use a regular (non-savepoint) transaction always for the
650 # outermost context.
651 trans = connection.begin()
652 self._lockTables(connection, lock)
653 try:
654 yield not already_in_transaction, connection
655 if trans is not None:
656 trans.commit()
657 except BaseException:
658 if trans is not None:
659 trans.rollback()
660 raise
662 @abstractmethod
663 def _lockTables(
664 self, connection: sqlalchemy.engine.Connection, tables: Iterable[sqlalchemy.schema.Table] = ()
665 ) -> None:
666 """Acquire locks on the given tables.
668 This is an implementation hook for subclasses, called by `transaction`.
669 It should not be called directly by other code.
671 Parameters
672 ----------
673 connection : `sqlalchemy.engine.Connection`
674 Database connection object. It is guaranteed that transaction is
675 already in a progress for this connection.
676 tables : `~collections.abc.Iterable` [ `sqlalchemy.schema.Table` ], \
677 optional
678 A list of tables to lock for the duration of this transaction.
679 These locks are guaranteed to prevent concurrent writes and allow
680 this transaction (only) to acquire the same locks (others should
681 block), but only prevent concurrent reads if the database engine
682 requires that in order to block concurrent writes.
683 """
684 raise NotImplementedError()
686 def isTableWriteable(self, table: sqlalchemy.schema.Table) -> bool:
687 """Check whether a table is writeable, either because the database
688 connection is read-write or the table is a temporary table.
690 Parameters
691 ----------
692 table : `sqlalchemy.schema.Table`
693 SQLAlchemy table object to check.
695 Returns
696 -------
697 writeable : `bool`
698 Whether this table is writeable.
699 """
700 return self.isWriteable() or table.key in self._temp_tables
702 def assertTableWriteable(self, table: sqlalchemy.schema.Table, msg: str) -> None:
703 """Raise if the given table is not writeable, either because the
704 database connection is read-write or the table is a temporary table.
706 Parameters
707 ----------
708 table : `sqlalchemy.schema.Table`
709 SQLAlchemy table object to check.
710 msg : `str`, optional
711 If provided, raise `ReadOnlyDatabaseError` instead of returning
712 `False`, with this message.
713 """
714 if not self.isTableWriteable(table):
715 raise ReadOnlyDatabaseError(msg)
717 @contextmanager
718 def declareStaticTables(self, *, create: bool) -> Iterator[StaticTablesContext]:
719 """Return a context manager in which the database's static DDL schema
720 can be declared.
722 Parameters
723 ----------
724 create : `bool`
725 If `True`, attempt to create all tables at the end of the context.
726 If `False`, they will be assumed to already exist.
728 Returns
729 -------
730 schema : `StaticTablesContext`
731 A helper object that is used to add new tables.
733 Raises
734 ------
735 ReadOnlyDatabaseError
736 Raised if ``create`` is `True`, `Database.isWriteable` is `False`,
737 and one or more declared tables do not already exist.
739 Examples
740 --------
741 Given a `Database` instance ``db``::
743 with db.declareStaticTables(create=True) as schema:
744 schema.addTable("table1", TableSpec(...))
745 schema.addTable("table2", TableSpec(...))
747 Notes
748 -----
749 A database's static DDL schema must be declared before any dynamic
750 tables are managed via calls to `ensureTableExists` or
751 `getExistingTable`. The order in which static schema tables are added
752 inside the context block is unimportant; they will automatically be
753 sorted and added in an order consistent with their foreign key
754 relationships.
755 """
756 if create and not self.isWriteable():
757 raise ReadOnlyDatabaseError(f"Cannot create tables in read-only database {self}.")
758 self._metadata = sqlalchemy.MetaData(schema=self.namespace)
759 try:
760 with self._transaction() as (_, connection):
761 context = StaticTablesContext(self, connection)
762 if create and context._tableNames:
763 # Looks like database is already initalized, to avoid
764 # danger of modifying/destroying valid schema we refuse to
765 # do anything in this case
766 raise SchemaAlreadyDefinedError(f"Cannot create tables in non-empty database {self}.")
767 yield context
768 for table, foreignKey in context._foreignKeys:
769 table.append_constraint(foreignKey)
770 if create:
771 if (
772 self.namespace is not None
773 and self.namespace not in context._inspector.get_schema_names()
774 ):
775 connection.execute(sqlalchemy.schema.CreateSchema(self.namespace))
776 # In our tables we have columns that make use of sqlalchemy
777 # Sequence objects. There is currently a bug in sqlalchemy
778 # that causes a deprecation warning to be thrown on a
779 # property of the Sequence object when the repr for the
780 # sequence is created. Here a filter is used to catch these
781 # deprecation warnings when tables are created.
782 with warnings.catch_warnings():
783 warnings.simplefilter("ignore", category=sqlalchemy.exc.SADeprecationWarning)
784 self._metadata.create_all(connection)
785 # call all initializer methods sequentially
786 for init in context._initializers:
787 init(self)
788 except BaseException:
789 self._metadata = None
790 raise
792 @abstractmethod
793 def isWriteable(self) -> bool:
794 """Return `True` if this database can be modified by this client."""
795 raise NotImplementedError()
797 @abstractmethod
798 def __str__(self) -> str:
799 """Return a human-readable identifier for this `Database`, including
800 any namespace or schema that identifies its names within a `Registry`.
801 """
802 raise NotImplementedError()
804 @property
805 def dialect(self) -> sqlalchemy.engine.Dialect:
806 """The SQLAlchemy dialect for this database engine
807 (`sqlalchemy.engine.Dialect`).
808 """
809 return self._engine.dialect
811 def shrinkDatabaseEntityName(self, original: str) -> str:
812 """Return a version of the given name that fits within this database
813 engine's length limits for table, constraint, indexes, and sequence
814 names.
816 Implementations should not assume that simple truncation is safe,
817 because multiple long names often begin with the same prefix.
819 The default implementation simply returns the given name.
821 Parameters
822 ----------
823 original : `str`
824 The original name.
826 Returns
827 -------
828 shrunk : `str`
829 The new, possibly shortened name.
830 """
831 return original
833 def expandDatabaseEntityName(self, shrunk: str) -> str:
834 """Retrieve the original name for a database entity that was too long
835 to fit within the database engine's limits.
837 Parameters
838 ----------
839 shrunk : `str`
840 The original name.
842 Returns
843 -------
844 shrunk : `str`
845 The new, possibly shortened name.
846 """
847 return shrunk
849 def _mangleTableName(self, name: str) -> str:
850 """Map a logical, user-visible table name to the true table name used
851 in the database.
853 The default implementation returns the given name unchanged.
855 Parameters
856 ----------
857 name : `str`
858 Input table name. Should not include a namespace (i.e. schema)
859 prefix.
861 Returns
862 -------
863 mangled : `str`
864 Mangled version of the table name (still with no namespace prefix).
866 Notes
867 -----
868 Reimplementations of this method must be idempotent - mangling an
869 already-mangled name must have no effect.
870 """
871 return name
873 def _makeColumnConstraints(self, table: str, spec: ddl.FieldSpec) -> list[sqlalchemy.CheckConstraint]:
874 """Create constraints based on this spec.
876 Parameters
877 ----------
878 table : `str`
879 Name of the table this column is being added to.
880 spec : `FieldSpec`
881 Specification for the field to be added.
883 Returns
884 -------
885 constraint : `list` of `sqlalchemy.CheckConstraint`
886 Constraint added for this column.
887 """
888 # By default we return no additional constraints
889 return []
891 def _convertFieldSpec(
892 self, table: str, spec: ddl.FieldSpec, metadata: sqlalchemy.MetaData, **kwargs: Any
893 ) -> sqlalchemy.schema.Column:
894 """Convert a `FieldSpec` to a `sqlalchemy.schema.Column`.
896 Parameters
897 ----------
898 table : `str`
899 Name of the table this column is being added to.
900 spec : `FieldSpec`
901 Specification for the field to be added.
902 metadata : `sqlalchemy.MetaData`
903 SQLAlchemy representation of the DDL schema this field's table is
904 being added to.
905 **kwargs
906 Additional keyword arguments to forward to the
907 `sqlalchemy.schema.Column` constructor. This is provided to make
908 it easier for derived classes to delegate to ``super()`` while
909 making only minor changes.
911 Returns
912 -------
913 column : `sqlalchemy.schema.Column`
914 SQLAlchemy representation of the field.
915 """
916 args = []
917 if spec.autoincrement:
918 # Generate a sequence to use for auto incrementing for databases
919 # that do not support it natively. This will be ignored by
920 # sqlalchemy for databases that do support it.
921 args.append(
922 sqlalchemy.Sequence(
923 self.shrinkDatabaseEntityName(f"{table}_seq_{spec.name}"), metadata=metadata
924 )
925 )
926 assert spec.doc is None or isinstance(spec.doc, str), f"Bad doc for {table}.{spec.name}."
927 return sqlalchemy.schema.Column(
928 spec.name,
929 spec.getSizedColumnType(),
930 *args,
931 nullable=spec.nullable,
932 primary_key=spec.primaryKey,
933 comment=spec.doc,
934 server_default=spec.default,
935 **kwargs,
936 )
938 def _convertForeignKeySpec(
939 self, table: str, spec: ddl.ForeignKeySpec, metadata: sqlalchemy.MetaData, **kwargs: Any
940 ) -> sqlalchemy.schema.ForeignKeyConstraint:
941 """Convert a `ForeignKeySpec` to a
942 `sqlalchemy.schema.ForeignKeyConstraint`.
944 Parameters
945 ----------
946 table : `str`
947 Name of the table this foreign key is being added to.
948 spec : `ForeignKeySpec`
949 Specification for the foreign key to be added.
950 metadata : `sqlalchemy.MetaData`
951 SQLAlchemy representation of the DDL schema this constraint is
952 being added to.
953 **kwargs
954 Additional keyword arguments to forward to the
955 `sqlalchemy.schema.ForeignKeyConstraint` constructor. This is
956 provided to make it easier for derived classes to delegate to
957 ``super()`` while making only minor changes.
959 Returns
960 -------
961 constraint : `sqlalchemy.schema.ForeignKeyConstraint`
962 SQLAlchemy representation of the constraint.
963 """
964 name = self.shrinkDatabaseEntityName(
965 "_".join(
966 ["fkey", table, self._mangleTableName(spec.table)] + list(spec.target) + list(spec.source)
967 )
968 )
969 return sqlalchemy.schema.ForeignKeyConstraint(
970 spec.source,
971 [f"{self._mangleTableName(spec.table)}.{col}" for col in spec.target],
972 name=name,
973 ondelete=spec.onDelete,
974 )
976 def _convertExclusionConstraintSpec(
977 self,
978 table: str,
979 spec: tuple[str | type[TimespanDatabaseRepresentation], ...],
980 metadata: sqlalchemy.MetaData,
981 ) -> sqlalchemy.schema.Constraint:
982 """Convert a `tuple` from `ddl.TableSpec.exclusion` into a SQLAlchemy
983 constraint representation.
985 Parameters
986 ----------
987 table : `str`
988 Name of the table this constraint is being added to.
989 spec : `tuple` [ `str` or `type` ]
990 A tuple of `str` column names and the `type` object returned by
991 `getTimespanRepresentation` (which must appear exactly once),
992 indicating the order of the columns in the index used to back the
993 constraint.
994 metadata : `sqlalchemy.MetaData`
995 SQLAlchemy representation of the DDL schema this constraint is
996 being added to.
998 Returns
999 -------
1000 constraint : `sqlalchemy.schema.Constraint`
1001 SQLAlchemy representation of the constraint.
1003 Raises
1004 ------
1005 NotImplementedError
1006 Raised if this database does not support exclusion constraints.
1007 """
1008 raise NotImplementedError(f"Database {self} does not support exclusion constraints.")
1010 def _convertTableSpec(
1011 self, name: str, spec: ddl.TableSpec, metadata: sqlalchemy.MetaData, **kwargs: Any
1012 ) -> sqlalchemy.schema.Table:
1013 """Convert a `TableSpec` to a `sqlalchemy.schema.Table`.
1015 Parameters
1016 ----------
1017 name : `str`
1018 The name of the table.
1019 spec : `TableSpec`
1020 Specification for the foreign key to be added.
1021 metadata : `sqlalchemy.MetaData`
1022 SQLAlchemy representation of the DDL schema this table is being
1023 added to.
1024 **kwargs
1025 Additional keyword arguments to forward to the
1026 `sqlalchemy.schema.Table` constructor. This is provided to make it
1027 easier for derived classes to delegate to ``super()`` while making
1028 only minor changes.
1030 Returns
1031 -------
1032 table : `sqlalchemy.schema.Table`
1033 SQLAlchemy representation of the table.
1035 Notes
1036 -----
1037 This method does not handle ``spec.foreignKeys`` at all, in order to
1038 avoid circular dependencies. These are added by higher-level logic in
1039 `ensureTableExists`, `getExistingTable`, and `declareStaticTables`.
1040 """
1041 name = self._mangleTableName(name)
1042 args: list[sqlalchemy.schema.SchemaItem] = [
1043 self._convertFieldSpec(name, fieldSpec, metadata) for fieldSpec in spec.fields
1044 ]
1046 # Add any column constraints
1047 for fieldSpec in spec.fields:
1048 args.extend(self._makeColumnConstraints(name, fieldSpec))
1050 # Track indexes added for primary key and unique constraints, to make
1051 # sure we don't add duplicate explicit or foreign key indexes for
1052 # those.
1053 allIndexes = {tuple(fieldSpec.name for fieldSpec in spec.fields if fieldSpec.primaryKey)}
1054 args.extend(
1055 sqlalchemy.schema.UniqueConstraint(
1056 *columns, name=self.shrinkDatabaseEntityName("_".join([name, "unq"] + list(columns)))
1057 )
1058 for columns in spec.unique
1059 )
1060 allIndexes.update(spec.unique)
1061 args.extend(
1062 sqlalchemy.schema.Index(
1063 self.shrinkDatabaseEntityName("_".join([name, "idx"] + list(index.columns))),
1064 *index.columns,
1065 unique=(index.columns in spec.unique),
1066 **index.kwargs,
1067 )
1068 for index in spec.indexes
1069 if index.columns not in allIndexes
1070 )
1071 allIndexes.update(index.columns for index in spec.indexes)
1072 args.extend(
1073 sqlalchemy.schema.Index(
1074 self.shrinkDatabaseEntityName("_".join((name, "fkidx") + fk.source)),
1075 *fk.source,
1076 )
1077 for fk in spec.foreignKeys
1078 if fk.addIndex and fk.source not in allIndexes
1079 )
1081 args.extend(self._convertExclusionConstraintSpec(name, excl, metadata) for excl in spec.exclusion)
1083 assert spec.doc is None or isinstance(spec.doc, str), f"Bad doc for {name}."
1084 return sqlalchemy.schema.Table(name, metadata, *args, comment=spec.doc, info={"spec": spec}, **kwargs)
1086 def ensureTableExists(self, name: str, spec: ddl.TableSpec) -> sqlalchemy.schema.Table:
1087 """Ensure that a table with the given name and specification exists,
1088 creating it if necessary.
1090 Parameters
1091 ----------
1092 name : `str`
1093 Name of the table (not including namespace qualifiers).
1094 spec : `TableSpec`
1095 Specification for the table. This will be used when creating the
1096 table, and *may* be used when obtaining an existing table to check
1097 for consistency, but no such check is guaranteed.
1099 Returns
1100 -------
1101 table : `sqlalchemy.schema.Table`
1102 SQLAlchemy representation of the table.
1104 Raises
1105 ------
1106 ReadOnlyDatabaseError
1107 Raised if `isWriteable` returns `False`, and the table does not
1108 already exist.
1109 DatabaseConflictError
1110 Raised if the table exists but ``spec`` is inconsistent with its
1111 definition.
1113 Notes
1114 -----
1115 This method may not be called within transactions. It may be called on
1116 read-only databases if and only if the table does in fact already
1117 exist.
1119 Subclasses may override this method, but usually should not need to.
1120 """
1121 # TODO: if _engine is used to make a table then it uses separate
1122 # connection and should not interfere with current transaction
1123 assert (
1124 self._session_connection is None or not self._session_connection.in_transaction()
1125 ), "Table creation interrupts transactions."
1126 assert self._metadata is not None, "Static tables must be declared before dynamic tables."
1127 table = self.getExistingTable(name, spec)
1128 if table is not None:
1129 return table
1130 if not self.isWriteable():
1131 raise ReadOnlyDatabaseError(
1132 f"Table {name} does not exist, and cannot be created because database {self} is read-only."
1133 )
1134 table = self._convertTableSpec(name, spec, self._metadata)
1135 for foreignKeySpec in spec.foreignKeys:
1136 table.append_constraint(self._convertForeignKeySpec(name, foreignKeySpec, self._metadata))
1137 try:
1138 with self._transaction() as (_, connection):
1139 table.create(connection)
1140 except sqlalchemy.exc.DatabaseError:
1141 # Some other process could have created the table meanwhile, which
1142 # usually causes OperationalError or ProgrammingError. We cannot
1143 # use IF NOT EXISTS clause in this case due to PostgreSQL race
1144 # condition on server side which causes IntegrityError. Instead we
1145 # catch these exceptions (they all inherit DatabaseError) and
1146 # re-check whether table is now there.
1147 table = self.getExistingTable(name, spec)
1148 if table is None:
1149 raise
1150 return table
1152 def getExistingTable(self, name: str, spec: ddl.TableSpec) -> sqlalchemy.schema.Table | None:
1153 """Obtain an existing table with the given name and specification.
1155 Parameters
1156 ----------
1157 name : `str`
1158 Name of the table (not including namespace qualifiers).
1159 spec : `TableSpec`
1160 Specification for the table. This will be used when creating the
1161 SQLAlchemy representation of the table, and it is used to
1162 check that the actual table in the database is consistent.
1164 Returns
1165 -------
1166 table : `sqlalchemy.schema.Table` or `None`
1167 SQLAlchemy representation of the table, or `None` if it does not
1168 exist.
1170 Raises
1171 ------
1172 DatabaseConflictError
1173 Raised if the table exists but ``spec`` is inconsistent with its
1174 definition.
1176 Notes
1177 -----
1178 This method can be called within transactions and never modifies the
1179 database.
1181 Subclasses may override this method, but usually should not need to.
1182 """
1183 assert self._metadata is not None, "Static tables must be declared before dynamic tables."
1184 name = self._mangleTableName(name)
1185 table = self._metadata.tables.get(name if self.namespace is None else f"{self.namespace}.{name}")
1186 if table is not None:
1187 if spec.fields.names != set(table.columns.keys()):
1188 raise DatabaseConflictError(
1189 f"Table '{name}' has already been defined differently; the new "
1190 f"specification has columns {list(spec.fields.names)}, while "
1191 f"the previous definition has {list(table.columns.keys())}."
1192 )
1193 else:
1194 inspector = sqlalchemy.inspect(
1195 self._engine if self._session_connection is None else self._session_connection, raiseerr=True
1196 )
1197 if name in inspector.get_table_names(schema=self.namespace):
1198 _checkExistingTableDefinition(name, spec, inspector.get_columns(name, schema=self.namespace))
1199 table = self._convertTableSpec(name, spec, self._metadata)
1200 for foreignKeySpec in spec.foreignKeys:
1201 table.append_constraint(self._convertForeignKeySpec(name, foreignKeySpec, self._metadata))
1202 return table
1203 return table
1205 def _make_temporary_table(
1206 self,
1207 connection: sqlalchemy.engine.Connection,
1208 spec: ddl.TableSpec,
1209 name: str | None = None,
1210 **kwargs: Any,
1211 ) -> sqlalchemy.schema.Table:
1212 """Create a temporary table.
1214 Parameters
1215 ----------
1216 connection : `sqlalchemy.engine.Connection`
1217 Connection to use when creating the table.
1218 spec : `TableSpec`
1219 Specification for the table.
1220 name : `str`, optional
1221 A unique (within this session/connetion) name for the table.
1222 Subclasses may override to modify the actual name used. If not
1223 provided, a unique name will be generated.
1224 **kwargs
1225 Additional keyword arguments to forward to the
1226 `sqlalchemy.schema.Table` constructor. This is provided to make it
1227 easier for derived classes to delegate to ``super()`` while making
1228 only minor changes.
1230 Returns
1231 -------
1232 table : `sqlalchemy.schema.Table`
1233 SQLAlchemy representation of the table.
1234 """
1235 if name is None:
1236 name = f"tmp_{uuid.uuid4().hex}"
1237 metadata = self._metadata
1238 if metadata is None:
1239 raise RuntimeError("Cannot create temporary table before static schema is defined.")
1240 table = self._convertTableSpec(
1241 name, spec, metadata, prefixes=["TEMPORARY"], schema=sqlalchemy.schema.BLANK_SCHEMA, **kwargs
1242 )
1243 if table.key in self._temp_tables and table.key != name:
1244 raise ValueError(
1245 f"A temporary table with name {name} (transformed to {table.key} by "
1246 "Database) already exists."
1247 )
1248 for foreignKeySpec in spec.foreignKeys:
1249 table.append_constraint(self._convertForeignKeySpec(name, foreignKeySpec, metadata))
1250 with self._transaction():
1251 table.create(connection)
1252 return table
1254 @classmethod
1255 def getTimespanRepresentation(cls) -> type[TimespanDatabaseRepresentation]:
1256 """Return a `type` that encapsulates the way `Timespan` objects are
1257 stored in this database.
1259 `Database` does not automatically use the return type of this method
1260 anywhere else; calling code is responsible for making sure that DDL
1261 and queries are consistent with it.
1263 Returns
1264 -------
1265 TimespanReprClass : `type` (`TimespanDatabaseRepresention` subclass)
1266 A type that encapsulates the way `Timespan` objects should be
1267 stored in this database.
1269 Notes
1270 -----
1271 There are two big reasons we've decided to keep timespan-mangling logic
1272 outside the `Database` implementations, even though the choice of
1273 representation is ultimately up to a `Database` implementation:
1275 - Timespans appear in relatively few tables and queries in our
1276 typical usage, and the code that operates on them is already aware
1277 that it is working with timespans. In contrast, a
1278 timespan-representation-aware implementation of, say, `insert`,
1279 would need to have extra logic to identify when timespan-mangling
1280 needed to occur, which would usually be useless overhead.
1282 - SQLAlchemy's rich SELECT query expression system has no way to wrap
1283 multiple columns in a single expression object (the ORM does, but
1284 we are not using the ORM). So we would have to wrap _much_ more of
1285 that code in our own interfaces to encapsulate timespan
1286 representations there.
1287 """
1288 return TimespanDatabaseRepresentation.Compound
1290 def sync(
1291 self,
1292 table: sqlalchemy.schema.Table,
1293 *,
1294 keys: dict[str, Any],
1295 compared: dict[str, Any] | None = None,
1296 extra: dict[str, Any] | None = None,
1297 returning: Sequence[str] | None = None,
1298 update: bool = False,
1299 ) -> tuple[dict[str, Any] | None, bool | dict[str, Any]]:
1300 """Insert into a table as necessary to ensure database contains
1301 values equivalent to the given ones.
1303 Parameters
1304 ----------
1305 table : `sqlalchemy.schema.Table`
1306 Table to be queried and possibly inserted into.
1307 keys : `dict`
1308 Column name-value pairs used to search for an existing row; must
1309 be a combination that can be used to select a single row if one
1310 exists. If such a row does not exist, these values are used in
1311 the insert.
1312 compared : `dict`, optional
1313 Column name-value pairs that are compared to those in any existing
1314 row. If such a row does not exist, these rows are used in the
1315 insert.
1316 extra : `dict`, optional
1317 Column name-value pairs that are ignored if a matching row exists,
1318 but used in an insert if one is necessary.
1319 returning : `~collections.abc.Sequence` of `str`, optional
1320 The names of columns whose values should be returned.
1321 update : `bool`, optional
1322 If `True` (`False` is default), update the existing row with the
1323 values in ``compared`` instead of raising `DatabaseConflictError`.
1325 Returns
1326 -------
1327 row : `dict`, optional
1328 The value of the fields indicated by ``returning``, or `None` if
1329 ``returning`` is `None`.
1330 inserted_or_updated : `bool` or `dict`
1331 If `True`, a new row was inserted; if `False`, a matching row
1332 already existed. If a `dict` (only possible if ``update=True``),
1333 then an existing row was updated, and the dict maps the names of
1334 the updated columns to their *old* values (new values can be
1335 obtained from ``compared``).
1337 Raises
1338 ------
1339 DatabaseConflictError
1340 Raised if the values in ``compared`` do not match the values in the
1341 database.
1342 ReadOnlyDatabaseError
1343 Raised if `isWriteable` returns `False`, and no matching record
1344 already exists.
1346 Notes
1347 -----
1348 May be used inside transaction contexts, so implementations may not
1349 perform operations that interrupt transactions.
1351 It may be called on read-only databases if and only if the matching row
1352 does in fact already exist.
1353 """
1355 def check() -> tuple[int, dict[str, Any] | None, list | None]:
1356 """Query for a row that matches the ``key`` argument, and compare
1357 to what was given by the caller.
1359 Returns
1360 -------
1361 n : `int`
1362 Number of matching rows. ``n != 1`` is always an error, but
1363 it's a different kind of error depending on where `check` is
1364 being called.
1365 bad : `dict` or `None`
1366 The subset of the keys of ``compared`` for which the existing
1367 values did not match the given one, mapped to the existing
1368 values in the database. Once again, ``not bad`` is always an
1369 error, but a different kind on context. `None` if ``n != 1``.
1370 result : `list` or `None`
1371 Results in the database that correspond to the columns given
1372 in ``returning``, or `None` if ``returning is None``.
1373 """
1374 toSelect: set[str] = set()
1375 if compared is not None:
1376 toSelect.update(compared.keys())
1377 if returning is not None:
1378 toSelect.update(returning)
1379 if not toSelect:
1380 # Need to select some column, even if we just want to see
1381 # how many rows we get back.
1382 toSelect.add(next(iter(keys.keys())))
1383 selectSql = (
1384 sqlalchemy.sql.select(*[table.columns[k].label(k) for k in toSelect])
1385 .select_from(table)
1386 .where(sqlalchemy.sql.and_(*[table.columns[k] == v for k, v in keys.items()]))
1387 )
1388 with self._transaction() as (_, connection):
1389 fetched = list(connection.execute(selectSql).mappings())
1390 if len(fetched) != 1:
1391 return len(fetched), None, None
1392 existing = fetched[0]
1393 if compared is not None:
1395 def safeNotEqual(a: Any, b: Any) -> bool:
1396 if isinstance(a, astropy.time.Time):
1397 return not time_utils.TimeConverter().times_equal(a, b)
1398 return a != b
1400 inconsistencies = {
1401 k: existing[k] for k, v in compared.items() if safeNotEqual(existing[k], v)
1402 }
1403 else:
1404 inconsistencies = {}
1405 if returning is not None:
1406 toReturn: list | None = [existing[k] for k in returning]
1407 else:
1408 toReturn = None
1409 return 1, inconsistencies, toReturn
1411 def _format_bad(inconsistencies: dict[str, Any]) -> str:
1412 """Format the 'bad' dictionary of existing values returned by
1413 ``check`` into a string suitable for an error message.
1414 """
1415 assert compared is not None, "Should not be able to get inconsistencies without comparing."
1416 return ", ".join(f"{k}: {v!r} != {compared[k]!r}" for k, v in inconsistencies.items())
1418 if self.isTableWriteable(table):
1419 # Try an insert first, but allow it to fail (in only specific
1420 # ways).
1421 row = keys.copy()
1422 if compared is not None:
1423 row.update(compared)
1424 if extra is not None:
1425 row.update(extra)
1426 with self.transaction():
1427 inserted = bool(self.ensure(table, row))
1428 inserted_or_updated: bool | dict[str, Any]
1429 # Need to perform check() for this branch inside the
1430 # transaction, so we roll back an insert that didn't do
1431 # what we expected. That limits the extent to which we
1432 # can reduce duplication between this block and the other
1433 # ones that perform similar logic.
1434 n, bad, result = check()
1435 if n < 1:
1436 raise ConflictingDefinitionError(
1437 f"Attempted to ensure {row} exists by inserting it with ON CONFLICT IGNORE, "
1438 f"but a post-insert query on {keys} returned no results. "
1439 f"Insert was {'' if inserted else 'not '}reported as successful. "
1440 "This can occur if the insert violated a database constraint other than the "
1441 "unique constraint or primary key used to identify the row in this call."
1442 )
1443 elif n > 1:
1444 raise RuntimeError(
1445 f"Keys passed to sync {keys.keys()} do not comprise a "
1446 f"unique constraint for table {table.name}."
1447 )
1448 elif bad:
1449 assert (
1450 compared is not None
1451 ), "Should not be able to get inconsistencies without comparing."
1452 if inserted:
1453 raise RuntimeError(
1454 f"Conflict ({bad}) in sync after successful insert; this is "
1455 "possible if the same table is being updated by a concurrent "
1456 "process that isn't using sync, but it may also be a bug in "
1457 "daf_butler."
1458 )
1459 elif update:
1460 with self._transaction() as (_, connection):
1461 connection.execute(
1462 table.update()
1463 .where(sqlalchemy.sql.and_(*[table.columns[k] == v for k, v in keys.items()]))
1464 .values(**{k: compared[k] for k in bad})
1465 )
1466 inserted_or_updated = bad
1467 else:
1468 raise DatabaseConflictError(
1469 f"Conflict in sync for table {table.name} on column(s) {_format_bad(bad)}."
1470 )
1471 else:
1472 inserted_or_updated = inserted
1473 else:
1474 # Database is not writeable; just see if the row exists.
1475 n, bad, result = check()
1476 if n < 1:
1477 raise ReadOnlyDatabaseError("sync needs to insert, but database is read-only.")
1478 elif n > 1:
1479 raise RuntimeError("Keys passed to sync do not comprise a unique constraint.")
1480 elif bad:
1481 if update:
1482 raise ReadOnlyDatabaseError("sync needs to update, but database is read-only.")
1483 else:
1484 raise DatabaseConflictError(
1485 f"Conflict in sync for table {table.name} on column(s) {_format_bad(bad)}."
1486 )
1487 inserted_or_updated = False
1488 if returning is None:
1489 return None, inserted_or_updated
1490 else:
1491 assert result is not None
1492 return dict(zip(returning, result, strict=True)), inserted_or_updated
1494 def insert(
1495 self,
1496 table: sqlalchemy.schema.Table,
1497 *rows: dict,
1498 returnIds: bool = False,
1499 select: sqlalchemy.sql.expression.SelectBase | None = None,
1500 names: Iterable[str] | None = None,
1501 ) -> list[int] | None:
1502 """Insert one or more rows into a table, optionally returning
1503 autoincrement primary key values.
1505 Parameters
1506 ----------
1507 table : `sqlalchemy.schema.Table`
1508 Table rows should be inserted into.
1509 *rows : `dict`
1510 Positional arguments are the rows to be inserted, as dictionaries
1511 mapping column name to value. The keys in all dictionaries must
1512 be the same.
1513 returnIds : `bool`, optional
1514 If `True` (`False` is default), return the values of the table's
1515 autoincrement primary key field (which much exist).
1516 select : `sqlalchemy.sql.SelectBase`, optional
1517 A SELECT query expression to insert rows from. Cannot be provided
1518 with either ``rows`` or ``returnIds=True``.
1519 names : `~collections.abc.Iterable` [ `str` ], optional
1520 Names of columns in ``table`` to be populated, ordered to match the
1521 columns returned by ``select``. Ignored if ``select`` is `None`.
1522 If not provided, the columns returned by ``select`` must be named
1523 to match the desired columns of ``table``.
1525 Returns
1526 -------
1527 ids : `None`, or `list` of `int`
1528 If ``returnIds`` is `True`, a `list` containing the inserted
1529 values for the table's autoincrement primary key.
1531 Raises
1532 ------
1533 ReadOnlyDatabaseError
1534 Raised if `isWriteable` returns `False` when this method is called.
1536 Notes
1537 -----
1538 The default implementation uses bulk insert syntax when ``returnIds``
1539 is `False`, and a loop over single-row insert operations when it is
1540 `True`.
1542 Derived classes should reimplement when they can provide a more
1543 efficient implementation (especially for the latter case).
1545 May be used inside transaction contexts, so implementations may not
1546 perform operations that interrupt transactions.
1547 """
1548 self.assertTableWriteable(table, f"Cannot insert into read-only table {table}.")
1549 if select is not None and (rows or returnIds):
1550 raise TypeError("'select' is incompatible with passing value rows or returnIds=True.")
1551 if not rows and select is None:
1552 if returnIds:
1553 return []
1554 else:
1555 return None
1556 with self._transaction() as (_, connection):
1557 if not returnIds:
1558 if select is not None:
1559 if names is None:
1560 # columns() is deprecated since 1.4, but
1561 # selected_columns() method did not exist in 1.3.
1562 if hasattr(select, "selected_columns"):
1563 names = select.selected_columns.keys()
1564 else:
1565 names = select.columns.keys()
1566 connection.execute(table.insert().from_select(list(names), select))
1567 else:
1568 connection.execute(table.insert(), rows)
1569 return None
1570 else:
1571 sql = table.insert()
1572 return [connection.execute(sql, row).inserted_primary_key[0] for row in rows]
1574 @abstractmethod
1575 def replace(self, table: sqlalchemy.schema.Table, *rows: dict) -> None:
1576 """Insert one or more rows into a table, replacing any existing rows
1577 for which insertion of a new row would violate the primary key
1578 constraint.
1580 Parameters
1581 ----------
1582 table : `sqlalchemy.schema.Table`
1583 Table rows should be inserted into.
1584 *rows
1585 Positional arguments are the rows to be inserted, as dictionaries
1586 mapping column name to value. The keys in all dictionaries must
1587 be the same.
1589 Raises
1590 ------
1591 ReadOnlyDatabaseError
1592 Raised if `isWriteable` returns `False` when this method is called.
1594 Notes
1595 -----
1596 May be used inside transaction contexts, so implementations may not
1597 perform operations that interrupt transactions.
1599 Implementations should raise a `sqlalchemy.exc.IntegrityError`
1600 exception when a constraint other than the primary key would be
1601 violated.
1603 Implementations are not required to support `replace` on tables
1604 with autoincrement keys.
1605 """
1606 raise NotImplementedError()
1608 @abstractmethod
1609 def ensure(self, table: sqlalchemy.schema.Table, *rows: dict, primary_key_only: bool = False) -> int:
1610 """Insert one or more rows into a table, skipping any rows for which
1611 insertion would violate a unique constraint.
1613 Parameters
1614 ----------
1615 table : `sqlalchemy.schema.Table`
1616 Table rows should be inserted into.
1617 *rows
1618 Positional arguments are the rows to be inserted, as dictionaries
1619 mapping column name to value. The keys in all dictionaries must
1620 be the same.
1621 primary_key_only : `bool`, optional
1622 If `True` (`False` is default), only skip rows that violate the
1623 primary key constraint, and raise an exception (and rollback
1624 transactions) for other constraint violations.
1626 Returns
1627 -------
1628 count : `int`
1629 The number of rows actually inserted.
1631 Raises
1632 ------
1633 ReadOnlyDatabaseError
1634 Raised if `isWriteable` returns `False` when this method is called.
1635 This is raised even if the operation would do nothing even on a
1636 writeable database.
1638 Notes
1639 -----
1640 May be used inside transaction contexts, so implementations may not
1641 perform operations that interrupt transactions.
1643 Implementations are not required to support `ensure` on tables
1644 with autoincrement keys.
1645 """
1646 raise NotImplementedError()
1648 def delete(self, table: sqlalchemy.schema.Table, columns: Iterable[str], *rows: dict) -> int:
1649 """Delete one or more rows from a table.
1651 Parameters
1652 ----------
1653 table : `sqlalchemy.schema.Table`
1654 Table that rows should be deleted from.
1655 columns : `~collections.abc.Iterable` of `str`
1656 The names of columns that will be used to constrain the rows to
1657 be deleted; these will be combined via ``AND`` to form the
1658 ``WHERE`` clause of the delete query.
1659 *rows
1660 Positional arguments are the keys of rows to be deleted, as
1661 dictionaries mapping column name to value. The keys in all
1662 dictionaries must be exactly the names in ``columns``.
1664 Returns
1665 -------
1666 count : `int`
1667 Number of rows deleted.
1669 Raises
1670 ------
1671 ReadOnlyDatabaseError
1672 Raised if `isWriteable` returns `False` when this method is called.
1674 Notes
1675 -----
1676 May be used inside transaction contexts, so implementations may not
1677 perform operations that interrupt transactions.
1679 The default implementation should be sufficient for most derived
1680 classes.
1681 """
1682 self.assertTableWriteable(table, f"Cannot delete from read-only table {table}.")
1683 if columns and not rows:
1684 # If there are no columns, this operation is supposed to delete
1685 # everything (so we proceed as usual). But if there are columns,
1686 # but no rows, it was a constrained bulk operation where the
1687 # constraint is that no rows match, and we should short-circuit
1688 # while reporting that no rows were affected.
1689 return 0
1690 sql = table.delete()
1691 columns = list(columns) # Force iterators to list
1693 # More efficient to use IN operator if there is only one
1694 # variable changing across all rows.
1695 content: dict[str, set] = defaultdict(set)
1696 if len(columns) == 1:
1697 # Nothing to calculate since we can always use IN
1698 column = columns[0]
1699 changing_columns = [column]
1700 content[column] = {row[column] for row in rows}
1701 else:
1702 for row in rows:
1703 for k, v in row.items():
1704 content[k].add(v)
1705 changing_columns = [col for col, values in content.items() if len(values) > 1]
1707 if len(changing_columns) != 1:
1708 # More than one column changes each time so do explicit bind
1709 # parameters and have each row processed separately.
1710 whereTerms = [table.columns[name] == sqlalchemy.sql.bindparam(name) for name in columns]
1711 if whereTerms:
1712 sql = sql.where(sqlalchemy.sql.and_(*whereTerms))
1713 with self._transaction() as (_, connection):
1714 return connection.execute(sql, rows).rowcount
1715 else:
1716 # One of the columns has changing values but any others are
1717 # fixed. In this case we can use an IN operator and be more
1718 # efficient.
1719 name = changing_columns.pop()
1721 # Simple where clause for the unchanging columns
1722 clauses = []
1723 for k, v in content.items():
1724 if k == name:
1725 continue
1726 column = table.columns[k]
1727 # The set only has one element
1728 clauses.append(column == v.pop())
1730 # The IN operator will not work for "infinite" numbers of
1731 # rows so must batch it up into distinct calls.
1732 in_content = list(content[name])
1733 n_elements = len(in_content)
1735 rowcount = 0
1736 iposn = 0
1737 n_per_loop = 1_000 # Controls how many items to put in IN clause
1738 with self._transaction() as (_, connection):
1739 for iposn in range(0, n_elements, n_per_loop):
1740 endpos = iposn + n_per_loop
1741 in_clause = table.columns[name].in_(in_content[iposn:endpos])
1743 newsql = sql.where(sqlalchemy.sql.and_(*clauses, in_clause))
1744 rowcount += connection.execute(newsql).rowcount
1745 return rowcount
1747 def deleteWhere(self, table: sqlalchemy.schema.Table, where: sqlalchemy.sql.ColumnElement) -> int:
1748 """Delete rows from a table with pre-constructed WHERE clause.
1750 Parameters
1751 ----------
1752 table : `sqlalchemy.schema.Table`
1753 Table that rows should be deleted from.
1754 where : `sqlalchemy.sql.ClauseElement`
1755 The names of columns that will be used to constrain the rows to
1756 be deleted; these will be combined via ``AND`` to form the
1757 ``WHERE`` clause of the delete query.
1759 Returns
1760 -------
1761 count : `int`
1762 Number of rows deleted.
1764 Raises
1765 ------
1766 ReadOnlyDatabaseError
1767 Raised if `isWriteable` returns `False` when this method is called.
1769 Notes
1770 -----
1771 May be used inside transaction contexts, so implementations may not
1772 perform operations that interrupt transactions.
1774 The default implementation should be sufficient for most derived
1775 classes.
1776 """
1777 self.assertTableWriteable(table, f"Cannot delete from read-only table {table}.")
1779 sql = table.delete().where(where)
1780 with self._transaction() as (_, connection):
1781 return connection.execute(sql).rowcount
1783 def update(self, table: sqlalchemy.schema.Table, where: dict[str, str], *rows: dict) -> int:
1784 """Update one or more rows in a table.
1786 Parameters
1787 ----------
1788 table : `sqlalchemy.schema.Table`
1789 Table containing the rows to be updated.
1790 where : `dict` [`str`, `str`]
1791 A mapping from the names of columns that will be used to search for
1792 existing rows to the keys that will hold these values in the
1793 ``rows`` dictionaries. Note that these may not be the same due to
1794 SQLAlchemy limitations.
1795 *rows
1796 Positional arguments are the rows to be updated. The keys in all
1797 dictionaries must be the same, and may correspond to either a
1798 value in the ``where`` dictionary or the name of a column to be
1799 updated.
1801 Returns
1802 -------
1803 count : `int`
1804 Number of rows matched (regardless of whether the update actually
1805 modified them).
1807 Raises
1808 ------
1809 ReadOnlyDatabaseError
1810 Raised if `isWriteable` returns `False` when this method is called.
1812 Notes
1813 -----
1814 May be used inside transaction contexts, so implementations may not
1815 perform operations that interrupt transactions.
1817 The default implementation should be sufficient for most derived
1818 classes.
1819 """
1820 self.assertTableWriteable(table, f"Cannot update read-only table {table}.")
1821 if not rows:
1822 return 0
1823 sql = table.update().where(
1824 sqlalchemy.sql.and_(*[table.columns[k] == sqlalchemy.sql.bindparam(v) for k, v in where.items()])
1825 )
1826 with self._transaction() as (_, connection):
1827 return connection.execute(sql, rows).rowcount
1829 @contextmanager
1830 def query(
1831 self,
1832 sql: sqlalchemy.sql.expression.Executable | sqlalchemy.sql.expression.SelectBase,
1833 *args: Any,
1834 **kwargs: Any,
1835 ) -> Iterator[sqlalchemy.engine.CursorResult]:
1836 """Run a SELECT query against the database.
1838 Parameters
1839 ----------
1840 sql : `sqlalchemy.sql.expression.SelectBase`
1841 A SQLAlchemy representation of a ``SELECT`` query.
1842 *args
1843 Additional positional arguments are forwarded to
1844 `sqlalchemy.engine.Connection.execute`.
1845 **kwargs
1846 Additional keyword arguments are forwarded to
1847 `sqlalchemy.engine.Connection.execute`.
1849 Returns
1850 -------
1851 result_context : `sqlalchemy.engine.CursorResults`
1852 Context manager that returns the query result object when entered.
1853 These results are invalidated when the context is exited.
1854 """
1855 if self._session_connection is None:
1856 connection = self._engine.connect()
1857 else:
1858 connection = self._session_connection
1859 # TODO: SelectBase is not good for execute(), but it used everywhere,
1860 # e.g. in daf_relation. We should switch to Executable at some point.
1861 result = connection.execute(cast(sqlalchemy.sql.expression.Executable, sql), *args, **kwargs)
1862 try:
1863 yield result
1864 finally:
1865 if connection is not self._session_connection:
1866 connection.close()
1868 @abstractmethod
1869 def constant_rows(
1870 self,
1871 fields: NamedValueAbstractSet[ddl.FieldSpec],
1872 *rows: dict,
1873 name: str | None = None,
1874 ) -> sqlalchemy.sql.FromClause:
1875 """Return a SQLAlchemy object that represents a small number of
1876 constant-valued rows.
1878 Parameters
1879 ----------
1880 fields : `NamedValueAbstractSet` [ `ddl.FieldSpec` ]
1881 The columns of the rows. Unique and foreign key constraints are
1882 ignored.
1883 *rows : `dict`
1884 Values for the rows.
1885 name : `str`, optional
1886 If provided, the name of the SQL construct. If not provided, an
1887 opaque but unique identifier is generated.
1889 Returns
1890 -------
1891 from_clause : `sqlalchemy.sql.FromClause`
1892 SQLAlchemy object representing the given rows. This is guaranteed
1893 to be something that can be directly joined into a ``SELECT``
1894 query's ``FROM`` clause, and will not involve a temporary table
1895 that needs to be cleaned up later.
1897 Notes
1898 -----
1899 The default implementation uses the SQL-standard ``VALUES`` construct,
1900 but support for that construct is varied enough across popular RDBMSs
1901 that the method is still marked abstract to force explicit opt-in via
1902 delegation to `super`.
1903 """
1904 if name is None:
1905 name = f"tmp_{uuid.uuid4().hex}"
1906 return sqlalchemy.sql.values(
1907 *[sqlalchemy.Column(field.name, field.getSizedColumnType()) for field in fields],
1908 name=name,
1909 ).data([tuple(row[name] for name in fields.names) for row in rows])
1911 def get_constant_rows_max(self) -> int:
1912 """Return the maximum number of rows that should be passed to
1913 `constant_rows` for this backend.
1915 Returns
1916 -------
1917 max : `int`
1918 Maximum number of rows.
1920 Notes
1921 -----
1922 This should reflect typical performance profiles (or a guess at these),
1923 not just hard database engine limits.
1924 """
1925 return 100
1927 @property
1928 @abstractmethod
1929 def has_distinct_on(self) -> bool:
1930 """Whether this database supports the ``DISTINCT ON`` SQL construct."""
1931 raise NotImplementedError()
1933 @property
1934 @abstractmethod
1935 def has_any_aggregate(self) -> bool:
1936 """Whether this database supports the ``ANY_VALUE`` aggregate function
1937 or something equivalent.
1938 """
1939 raise NotImplementedError()
1941 @abstractmethod
1942 def apply_any_aggregate(self, column: sqlalchemy.ColumnElement[Any]) -> sqlalchemy.ColumnElement[Any]:
1943 """Wrap the given SQLAlchemy column in the ``ANY_VALUE`` aggregate
1944 function or its equivalent.
1946 Parameters
1947 ----------
1948 column : `sqlalchemy.ColumnElement`
1949 Original column to wrap.
1951 Returns
1952 -------
1953 wrapped : `sqlalchemy.ColumnElement`
1954 A column element of the same SQL type that can appear in the
1955 ``SELECT`` clause even when this column does not appear in the
1956 ``GROUP BY`` clause.
1958 Notes
1959 -----
1960 This method's behavior is unspecified when `has_any_aggregate` is
1961 `False`; the caller is responsible for checking that property first.
1962 """
1963 raise NotImplementedError()
1965 origin: int
1966 """An integer ID that should be used as the default for any datasets,
1967 quanta, or other entities that use a (autoincrement, origin) compound
1968 primary key (`int`).
1969 """
1971 namespace: str | None
1972 """The schema or namespace this database instance is associated with
1973 (`str` or `None`).
1974 """