Coverage for python/lsst/daf/butler/registry/interfaces/_database.py: 23%
411 statements
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-16 10:44 +0000
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-16 10:44 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
27from __future__ import annotations
29from ... import ddl, time_utils
31__all__ = [
32 "Database",
33 "ReadOnlyDatabaseError",
34 "DatabaseConflictError",
35 "DatabaseInsertMode",
36 "SchemaAlreadyDefinedError",
37 "StaticTablesContext",
38]
40import enum
41import uuid
42import warnings
43from abc import ABC, abstractmethod
44from collections import defaultdict
45from collections.abc import Callable, Iterable, Iterator, Sequence
46from contextlib import contextmanager
47from typing import Any, cast, final
49import astropy.time
50import sqlalchemy
52from ..._named import NamedValueAbstractSet
53from ...timespan_database_representation import TimespanDatabaseRepresentation
54from .._exceptions import ConflictingDefinitionError
57class DatabaseInsertMode(enum.Enum):
58 """Mode options available for inserting database records."""
60 INSERT = enum.auto()
61 """Insert records, failing if they already exist."""
63 REPLACE = enum.auto()
64 """Replace records, overwriting existing."""
66 ENSURE = enum.auto()
67 """Insert records, skipping any that already exist."""
70# TODO: method is called with list[ReflectedColumn] in SA 2, and
71# ReflectedColumn does not exist in 1.4.
72def _checkExistingTableDefinition(name: str, spec: ddl.TableSpec, inspection: list) -> None:
73 """Test that the definition of a table in a `ddl.TableSpec` and from
74 database introspection are consistent.
76 Parameters
77 ----------
78 name : `str`
79 Name of the table (only used in error messages).
80 spec : `ddl.TableSpec`
81 Specification of the table.
82 inspection : `dict`
83 Dictionary returned by
84 `sqlalchemy.engine.reflection.Inspector.get_columns`.
86 Raises
87 ------
88 DatabaseConflictError
89 Raised if the definitions are inconsistent.
90 """
91 columnNames = [c["name"] for c in inspection]
92 if spec.fields.names != set(columnNames):
93 raise DatabaseConflictError(
94 f"Table '{name}' exists but is defined differently in the database; "
95 f"specification has columns {list(spec.fields.names)}, while the "
96 f"table in the database has {columnNames}."
97 )
100class ReadOnlyDatabaseError(RuntimeError):
101 """Exception raised when a write operation is called on a read-only
102 `Database`.
103 """
106class DatabaseConflictError(ConflictingDefinitionError):
107 """Exception raised when database content (row values or schema entities)
108 are inconsistent with what this client expects.
109 """
112class SchemaAlreadyDefinedError(RuntimeError):
113 """Exception raised when trying to initialize database schema when some
114 tables already exist.
115 """
118class StaticTablesContext:
119 """Helper class used to declare the static schema for a registry layer
120 in a database.
122 An instance of this class is returned by `Database.declareStaticTables`,
123 which should be the only way it should be constructed.
125 Parameters
126 ----------
127 db : `Database`
128 The database.
129 connection : `sqlalchemy.engine.Connection`
130 The connection object.
131 """
133 def __init__(self, db: Database, connection: sqlalchemy.engine.Connection):
134 self._db = db
135 self._foreignKeys: list[tuple[sqlalchemy.schema.Table, sqlalchemy.schema.ForeignKeyConstraint]] = []
136 self._inspector = sqlalchemy.inspect(connection)
137 self._tableNames = frozenset(self._inspector.get_table_names(schema=self._db.namespace))
138 self._initializers: list[Callable[[Database], None]] = []
140 def addTable(self, name: str, spec: ddl.TableSpec) -> sqlalchemy.schema.Table:
141 """Add a new table to the schema, returning its sqlalchemy
142 representation.
144 Parameters
145 ----------
146 name : `str`
147 The name of the table.
148 spec : `ddl.TableSpec`
149 The specification of the table.
151 Returns
152 -------
153 table : `sqlalchemy.schema.Table`
154 The created table.
156 Notes
157 -----
158 The new table may not actually be created until the end of the
159 context created by `Database.declareStaticTables`, allowing tables
160 to be declared in any order even in the presence of foreign key
161 relationships.
162 """
163 name = self._db._mangleTableName(name)
164 metadata = self._db._metadata
165 assert metadata is not None, "Guaranteed by context manager that returns this object."
166 table = self._db._convertTableSpec(name, spec, metadata)
167 for foreignKeySpec in spec.foreignKeys:
168 self._foreignKeys.append((table, self._db._convertForeignKeySpec(name, foreignKeySpec, metadata)))
169 return table
171 def addTableTuple(self, specs: tuple[ddl.TableSpec, ...]) -> tuple[sqlalchemy.schema.Table, ...]:
172 """Add a named tuple of tables to the schema, returning their
173 SQLAlchemy representations in a named tuple of the same type.
175 The new tables may not actually be created until the end of the
176 context created by `Database.declareStaticTables`, allowing tables
177 to be declared in any order even in the presence of foreign key
178 relationships.
180 Parameters
181 ----------
182 specs : `tuple` of `ddl.TableSpec`
183 Specifications of multiple tables.
185 Returns
186 -------
187 tables : `tuple` of `sqlalchemy.schema.Table`
188 All the tables created.
190 Notes
191 -----
192 ``specs`` *must* be an instance of a type created by
193 `collections.namedtuple`, not just regular tuple, and the returned
194 object is guaranteed to be the same. Because `~collections.namedtuple`
195 is just a factory for `type` objects, not an actual type itself,
196 we cannot represent this with type annotations.
197 """
198 return specs._make( # type: ignore
199 self.addTable(name, spec) for name, spec in zip(specs._fields, specs, strict=True) # type: ignore
200 )
202 def addInitializer(self, initializer: Callable[[Database], None]) -> None:
203 """Add a method that does one-time initialization of a database.
205 Initialization can mean anything that changes state of a database
206 and needs to be done exactly once after database schema was created.
207 An example for that could be population of schema attributes.
209 Parameters
210 ----------
211 initializer : `~collections.abc.Callable`
212 Method of a single argument which is a `Database` instance.
213 """
214 self._initializers.append(initializer)
217class Database(ABC):
218 """An abstract interface that represents a particular database engine's
219 representation of a single schema/namespace/database.
221 Parameters
222 ----------
223 origin : `int`
224 An integer ID that should be used as the default for any datasets,
225 quanta, or other entities that use a (autoincrement, origin) compound
226 primary key.
227 engine : `sqlalchemy.engine.Engine`
228 The SQLAlchemy engine for this `Database`.
229 namespace : `str`, optional
230 Name of the schema or namespace this instance is associated with.
231 This is passed as the ``schema`` argument when constructing a
232 `sqlalchemy.schema.MetaData` instance. We use ``namespace`` instead to
233 avoid confusion between "schema means namespace" and "schema means
234 table definitions".
236 Notes
237 -----
238 `Database` requires all write operations to go through its special named
239 methods. Our write patterns are sufficiently simple that we don't really
240 need the full flexibility of SQL insert/update/delete syntax, and we need
241 non-standard (but common) functionality in these operations sufficiently
242 often that it seems worthwhile to provide our own generic API.
244 In contrast, `Database.query` allows arbitrary ``SELECT`` queries (via
245 their SQLAlchemy representation) to be run, as we expect these to require
246 significantly more sophistication while still being limited to standard
247 SQL.
249 `Database` itself has several underscore-prefixed attributes:
251 - ``_engine``: SQLAlchemy object representing its engine.
252 - ``_connection``: method returning a context manager for
253 `sqlalchemy.engine.Connection` object.
254 - ``_metadata``: the `sqlalchemy.schema.MetaData` object representing
255 the tables and other schema entities.
257 These are considered protected (derived classes may access them, but other
258 code should not), and read-only, aside from executing SQL via
259 ``_connection``.
260 """
262 def __init__(self, *, origin: int, engine: sqlalchemy.engine.Engine, namespace: str | None = None):
263 self.origin = origin
264 self.namespace = namespace
265 self._engine = engine
266 self._session_connection: sqlalchemy.engine.Connection | None = None
267 self._metadata: sqlalchemy.schema.MetaData | None = None
268 self._temp_tables: set[str] = set()
270 def __repr__(self) -> str:
271 # Rather than try to reproduce all the parameters used to create
272 # the object, instead report the more useful information of the
273 # connection URL.
274 if self._engine.url.password is not None:
275 uri = str(self._engine.url.set(password="***"))
276 else:
277 uri = str(self._engine.url)
278 if self.namespace:
279 uri += f"#{self.namespace}"
280 return f'{type(self).__name__}("{uri}")'
282 @classmethod
283 def makeDefaultUri(cls, root: str) -> str | None:
284 """Create a default connection URI appropriate for the given root
285 directory, or `None` if there can be no such default.
287 Parameters
288 ----------
289 root : `str`
290 Root string to use to build connection URI.
292 Returns
293 -------
294 uri : `str` or `None`
295 The URI string or `None`.
296 """
297 return None
299 @classmethod
300 def fromUri(
301 cls,
302 uri: str | sqlalchemy.engine.URL,
303 *,
304 origin: int,
305 namespace: str | None = None,
306 writeable: bool = True,
307 ) -> Database:
308 """Construct a database from a SQLAlchemy URI.
310 Parameters
311 ----------
312 uri : `str` or `sqlalchemy.engine.URL`
313 A SQLAlchemy URI connection string.
314 origin : `int`
315 An integer ID that should be used as the default for any datasets,
316 quanta, or other entities that use a (autoincrement, origin)
317 compound primary key.
318 namespace : `str`, optional
319 A database namespace (i.e. schema) the new instance should be
320 associated with. If `None` (default), the namespace (if any) is
321 inferred from the URI.
322 writeable : `bool`, optional
323 If `True`, allow write operations on the database, including
324 ``CREATE TABLE``.
326 Returns
327 -------
328 db : `Database`
329 A new `Database` instance.
330 """
331 return cls.fromEngine(
332 cls.makeEngine(uri, writeable=writeable), origin=origin, namespace=namespace, writeable=writeable
333 )
335 @classmethod
336 @abstractmethod
337 def makeEngine(
338 cls, uri: str | sqlalchemy.engine.URL, *, writeable: bool = True
339 ) -> sqlalchemy.engine.Engine:
340 """Create a `sqlalchemy.engine.Engine` from a SQLAlchemy URI.
342 Parameters
343 ----------
344 uri : `str` or `sqlalchemy.engine.URL`
345 A SQLAlchemy URI connection string.
346 writeable : `bool`, optional
347 If `True`, allow write operations on the database, including
348 ``CREATE TABLE``.
350 Returns
351 -------
352 engine : `sqlalchemy.engine.Engine`
353 A database engine.
355 Notes
356 -----
357 Subclasses that support other ways to connect to a database are
358 encouraged to add optional arguments to their implementation of this
359 method, as long as they maintain compatibility with the base class
360 call signature.
361 """
362 raise NotImplementedError()
364 @classmethod
365 @abstractmethod
366 def fromEngine(
367 cls,
368 engine: sqlalchemy.engine.Engine,
369 *,
370 origin: int,
371 namespace: str | None = None,
372 writeable: bool = True,
373 ) -> Database:
374 """Create a new `Database` from an existing `sqlalchemy.engine.Engine`.
376 Parameters
377 ----------
378 engine : `sqlalchemy.engine.Engine`
379 The engine for the database. May be shared between `Database`
380 instances.
381 origin : `int`
382 An integer ID that should be used as the default for any datasets,
383 quanta, or other entities that use a (autoincrement, origin)
384 compound primary key.
385 namespace : `str`, optional
386 A different database namespace (i.e. schema) the new instance
387 should be associated with. If `None` (default), the namespace
388 (if any) is inferred from the connection.
389 writeable : `bool`, optional
390 If `True`, allow write operations on the database, including
391 ``CREATE TABLE``.
393 Returns
394 -------
395 db : `Database`
396 A new `Database` instance.
398 Notes
399 -----
400 This method allows different `Database` instances to share the same
401 engine, which is desirable when they represent different namespaces
402 can be queried together.
403 """
404 raise NotImplementedError()
406 @final
407 @contextmanager
408 def session(self) -> Iterator[None]:
409 """Return a context manager that represents a session (persistent
410 connection to a database).
412 Returns
413 -------
414 context : `AbstractContextManager` [ `None` ]
415 A context manager that does not return a value when entered.
417 Notes
418 -----
419 This method should be used when a sequence of read-only SQL operations
420 will be performed in rapid succession *without* a requirement that they
421 yield consistent results in the presence of concurrent writes (or, more
422 rarely, when conflicting concurrent writes are rare/impossible and the
423 session will be open long enough that a transaction is inadvisable).
424 """
425 with self._session():
426 yield
428 @final
429 @contextmanager
430 def transaction(
431 self,
432 *,
433 interrupting: bool = False,
434 savepoint: bool = False,
435 lock: Iterable[sqlalchemy.schema.Table] = (),
436 for_temp_tables: bool = False,
437 ) -> Iterator[None]:
438 """Return a context manager that represents a transaction.
440 Parameters
441 ----------
442 interrupting : `bool`, optional
443 If `True` (`False` is default), this transaction block may not be
444 nested without an outer one, and attempting to do so is a logic
445 (i.e. assertion) error.
446 savepoint : `bool`, optional
447 If `True` (`False` is default), create a `SAVEPOINT`, allowing
448 exceptions raised by the database (e.g. due to constraint
449 violations) during this transaction's context to be caught outside
450 it without also rolling back all operations in an outer transaction
451 block. If `False`, transactions may still be nested, but a
452 rollback may be generated at any level and affects all levels, and
453 commits are deferred until the outermost block completes. If any
454 outer transaction block was created with ``savepoint=True``, all
455 inner blocks will be as well (regardless of the actual value
456 passed). This has no effect if this is the outermost transaction.
457 lock : `~collections.abc.Iterable` [ `sqlalchemy.schema.Table` ], \
458 optional
459 A list of tables to lock for the duration of this transaction.
460 These locks are guaranteed to prevent concurrent writes and allow
461 this transaction (only) to acquire the same locks (others should
462 block), but only prevent concurrent reads if the database engine
463 requires that in order to block concurrent writes.
464 for_temp_tables : `bool`, optional
465 If `True`, this transaction may involve creating temporary tables.
467 Returns
468 -------
469 context : `AbstractContextManager` [ `None` ]
470 A context manager that commits the transaction when it is exited
471 without error and rolls back the transactoin when it is exited via
472 an exception.
474 Notes
475 -----
476 All transactions on a connection managed by one or more `Database`
477 instances _must_ go through this method, or transaction state will not
478 be correctly managed.
479 """
480 with self._transaction(
481 interrupting=interrupting, savepoint=savepoint, lock=lock, for_temp_tables=for_temp_tables
482 ):
483 yield
485 @contextmanager
486 def temporary_table(
487 self, spec: ddl.TableSpec, name: str | None = None
488 ) -> Iterator[sqlalchemy.schema.Table]:
489 """Return a context manager that creates and then drops a temporary
490 table.
492 Parameters
493 ----------
494 spec : `ddl.TableSpec`
495 Specification for the columns. Unique and foreign key constraints
496 may be ignored.
497 name : `str`, optional
498 If provided, the name of the SQL construct. If not provided, an
499 opaque but unique identifier is generated.
501 Returns
502 -------
503 context : `AbstractContextManager` [ `sqlalchemy.schema.Table` ]
504 A context manager that returns a SQLAlchemy representation of the
505 temporary table when entered.
507 Notes
508 -----
509 Temporary tables may be created, dropped, and written to even in
510 read-only databases - at least according to the Python-level
511 protections in the `Database` classes. Server permissions may say
512 otherwise, but in that case they probably need to be modified to
513 support the full range of expected read-only butler behavior.
514 """
515 with self._session() as connection:
516 table = self._make_temporary_table(connection, spec=spec, name=name)
517 self._temp_tables.add(table.key)
518 try:
519 yield table
520 finally:
521 with self._transaction():
522 table.drop(connection)
523 self._temp_tables.remove(table.key)
525 @contextmanager
526 def _session(self) -> Iterator[sqlalchemy.engine.Connection]:
527 """Protected implementation for `session` that actually returns the
528 connection.
530 This method is for internal `Database` calls that need the actual
531 SQLAlchemy connection object. It should be overridden by subclasses
532 instead of `session` itself.
534 Returns
535 -------
536 context : `AbstractContextManager` [ `sqlalchemy.engine.Connection` ]
537 A context manager that returns a SQLALchemy connection when
538 entered.
540 """
541 if self._session_connection is not None:
542 # session already started, just reuse that
543 yield self._session_connection
544 else:
545 try:
546 # open new connection and close it when done
547 self._session_connection = self._engine.connect()
548 yield self._session_connection
549 finally:
550 if self._session_connection is not None:
551 self._session_connection.close()
552 self._session_connection = None
553 # Temporary tables only live within session
554 self._temp_tables = set()
556 @contextmanager
557 def _transaction(
558 self,
559 *,
560 interrupting: bool = False,
561 savepoint: bool = False,
562 lock: Iterable[sqlalchemy.schema.Table] = (),
563 for_temp_tables: bool = False,
564 ) -> Iterator[tuple[bool, sqlalchemy.engine.Connection]]:
565 """Protected implementation for `transaction` that actually returns the
566 connection and whether this is a new outermost transaction.
568 This method is for internal `Database` calls that need the actual
569 SQLAlchemy connection object. It should be overridden by subclasses
570 instead of `transaction` itself.
572 Parameters
573 ----------
574 interrupting : `bool`, optional
575 If `True` (`False` is default), this transaction block may not be
576 nested without an outer one, and attempting to do so is a logic
577 (i.e. assertion) error.
578 savepoint : `bool`, optional
579 If `True` (`False` is default), create a `SAVEPOINT`, allowing
580 exceptions raised by the database (e.g. due to constraint
581 violations) during this transaction's context to be caught outside
582 it without also rolling back all operations in an outer transaction
583 block. If `False`, transactions may still be nested, but a
584 rollback may be generated at any level and affects all levels, and
585 commits are deferred until the outermost block completes. If any
586 outer transaction block was created with ``savepoint=True``, all
587 inner blocks will be as well (regardless of the actual value
588 passed). This has no effect if this is the outermost transaction.
589 lock : `~collections.abc.Iterable` [ `sqlalchemy.schema.Table` ], \
590 optional
591 A list of tables to lock for the duration of this transaction.
592 These locks are guaranteed to prevent concurrent writes and allow
593 this transaction (only) to acquire the same locks (others should
594 block), but only prevent concurrent reads if the database engine
595 requires that in order to block concurrent writes.
596 for_temp_tables : `bool`, optional
597 If `True`, this transaction may involve creating temporary tables.
599 Returns
600 -------
601 context : `AbstractContextManager` [ `tuple` [ `bool`,
602 `sqlalchemy.engine.Connection` ] ]
603 A context manager that commits the transaction when it is exited
604 without error and rolls back the transactoin when it is exited via
605 an exception. When entered, it returns a tuple of:
607 - ``is_new`` (`bool`): whether this is a new (outermost)
608 transaction;
609 - ``connection`` (`sqlalchemy.engine.Connection`): the connection.
610 """
611 with self._session() as connection:
612 already_in_transaction = connection.in_transaction()
613 assert not (interrupting and already_in_transaction), (
614 "Logic error in transaction nesting: an operation that would "
615 "interrupt the active transaction context has been requested."
616 )
617 savepoint = savepoint or connection.in_nested_transaction()
618 trans: sqlalchemy.engine.Transaction | None
619 if already_in_transaction:
620 if savepoint:
621 trans = connection.begin_nested()
622 else:
623 # Nested non-savepoint transactions don't do anything.
624 trans = None
625 else:
626 # Use a regular (non-savepoint) transaction always for the
627 # outermost context.
628 trans = connection.begin()
629 self._lockTables(connection, lock)
630 try:
631 yield not already_in_transaction, connection
632 if trans is not None:
633 trans.commit()
634 except BaseException:
635 if trans is not None:
636 trans.rollback()
637 raise
639 @abstractmethod
640 def _lockTables(
641 self, connection: sqlalchemy.engine.Connection, tables: Iterable[sqlalchemy.schema.Table] = ()
642 ) -> None:
643 """Acquire locks on the given tables.
645 This is an implementation hook for subclasses, called by `transaction`.
646 It should not be called directly by other code.
648 Parameters
649 ----------
650 connection : `sqlalchemy.engine.Connection`
651 Database connection object. It is guaranteed that transaction is
652 already in a progress for this connection.
653 tables : `~collections.abc.Iterable` [ `sqlalchemy.schema.Table` ], \
654 optional
655 A list of tables to lock for the duration of this transaction.
656 These locks are guaranteed to prevent concurrent writes and allow
657 this transaction (only) to acquire the same locks (others should
658 block), but only prevent concurrent reads if the database engine
659 requires that in order to block concurrent writes.
660 """
661 raise NotImplementedError()
663 def isTableWriteable(self, table: sqlalchemy.schema.Table) -> bool:
664 """Check whether a table is writeable, either because the database
665 connection is read-write or the table is a temporary table.
667 Parameters
668 ----------
669 table : `sqlalchemy.schema.Table`
670 SQLAlchemy table object to check.
672 Returns
673 -------
674 writeable : `bool`
675 Whether this table is writeable.
676 """
677 return self.isWriteable() or table.key in self._temp_tables
679 def assertTableWriteable(self, table: sqlalchemy.schema.Table, msg: str) -> None:
680 """Raise if the given table is not writeable, either because the
681 database connection is read-write or the table is a temporary table.
683 Parameters
684 ----------
685 table : `sqlalchemy.schema.Table`
686 SQLAlchemy table object to check.
687 msg : `str`, optional
688 If provided, raise `ReadOnlyDatabaseError` instead of returning
689 `False`, with this message.
690 """
691 if not self.isTableWriteable(table):
692 raise ReadOnlyDatabaseError(msg)
694 @contextmanager
695 def declareStaticTables(self, *, create: bool) -> Iterator[StaticTablesContext]:
696 """Return a context manager in which the database's static DDL schema
697 can be declared.
699 Parameters
700 ----------
701 create : `bool`
702 If `True`, attempt to create all tables at the end of the context.
703 If `False`, they will be assumed to already exist.
705 Returns
706 -------
707 schema : `StaticTablesContext`
708 A helper object that is used to add new tables.
710 Raises
711 ------
712 ReadOnlyDatabaseError
713 Raised if ``create`` is `True`, `Database.isWriteable` is `False`,
714 and one or more declared tables do not already exist.
716 Examples
717 --------
718 Given a `Database` instance ``db``::
720 with db.declareStaticTables(create=True) as schema:
721 schema.addTable("table1", TableSpec(...))
722 schema.addTable("table2", TableSpec(...))
724 Notes
725 -----
726 A database's static DDL schema must be declared before any dynamic
727 tables are managed via calls to `ensureTableExists` or
728 `getExistingTable`. The order in which static schema tables are added
729 inside the context block is unimportant; they will automatically be
730 sorted and added in an order consistent with their foreign key
731 relationships.
732 """
733 if create and not self.isWriteable():
734 raise ReadOnlyDatabaseError(f"Cannot create tables in read-only database {self}.")
735 self._metadata = sqlalchemy.MetaData(schema=self.namespace)
736 try:
737 with self._transaction() as (_, connection):
738 context = StaticTablesContext(self, connection)
739 if create and context._tableNames:
740 # Looks like database is already initalized, to avoid
741 # danger of modifying/destroying valid schema we refuse to
742 # do anything in this case
743 raise SchemaAlreadyDefinedError(f"Cannot create tables in non-empty database {self}.")
744 yield context
745 for table, foreignKey in context._foreignKeys:
746 table.append_constraint(foreignKey)
747 if create:
748 if (
749 self.namespace is not None
750 and self.namespace not in context._inspector.get_schema_names()
751 ):
752 connection.execute(sqlalchemy.schema.CreateSchema(self.namespace))
753 # In our tables we have columns that make use of sqlalchemy
754 # Sequence objects. There is currently a bug in sqlalchemy
755 # that causes a deprecation warning to be thrown on a
756 # property of the Sequence object when the repr for the
757 # sequence is created. Here a filter is used to catch these
758 # deprecation warnings when tables are created.
759 with warnings.catch_warnings():
760 warnings.simplefilter("ignore", category=sqlalchemy.exc.SADeprecationWarning)
761 self._metadata.create_all(connection)
762 # call all initializer methods sequentially
763 for init in context._initializers:
764 init(self)
765 except BaseException:
766 self._metadata = None
767 raise
769 @abstractmethod
770 def isWriteable(self) -> bool:
771 """Return `True` if this database can be modified by this client."""
772 raise NotImplementedError()
774 @abstractmethod
775 def __str__(self) -> str:
776 """Return a human-readable identifier for this `Database`, including
777 any namespace or schema that identifies its names within a `Registry`.
778 """
779 raise NotImplementedError()
781 @property
782 def dialect(self) -> sqlalchemy.engine.Dialect:
783 """The SQLAlchemy dialect for this database engine
784 (`sqlalchemy.engine.Dialect`).
785 """
786 return self._engine.dialect
788 def shrinkDatabaseEntityName(self, original: str) -> str:
789 """Return a version of the given name that fits within this database
790 engine's length limits for table, constraint, indexes, and sequence
791 names.
793 Implementations should not assume that simple truncation is safe,
794 because multiple long names often begin with the same prefix.
796 The default implementation simply returns the given name.
798 Parameters
799 ----------
800 original : `str`
801 The original name.
803 Returns
804 -------
805 shrunk : `str`
806 The new, possibly shortened name.
807 """
808 return original
810 def expandDatabaseEntityName(self, shrunk: str) -> str:
811 """Retrieve the original name for a database entity that was too long
812 to fit within the database engine's limits.
814 Parameters
815 ----------
816 shrunk : `str`
817 The original name.
819 Returns
820 -------
821 shrunk : `str`
822 The new, possibly shortened name.
823 """
824 return shrunk
826 def _mangleTableName(self, name: str) -> str:
827 """Map a logical, user-visible table name to the true table name used
828 in the database.
830 The default implementation returns the given name unchanged.
832 Parameters
833 ----------
834 name : `str`
835 Input table name. Should not include a namespace (i.e. schema)
836 prefix.
838 Returns
839 -------
840 mangled : `str`
841 Mangled version of the table name (still with no namespace prefix).
843 Notes
844 -----
845 Reimplementations of this method must be idempotent - mangling an
846 already-mangled name must have no effect.
847 """
848 return name
850 def _makeColumnConstraints(self, table: str, spec: ddl.FieldSpec) -> list[sqlalchemy.CheckConstraint]:
851 """Create constraints based on this spec.
853 Parameters
854 ----------
855 table : `str`
856 Name of the table this column is being added to.
857 spec : `FieldSpec`
858 Specification for the field to be added.
860 Returns
861 -------
862 constraint : `list` of `sqlalchemy.CheckConstraint`
863 Constraint added for this column.
864 """
865 # By default we return no additional constraints
866 return []
868 def _convertFieldSpec(
869 self, table: str, spec: ddl.FieldSpec, metadata: sqlalchemy.MetaData, **kwargs: Any
870 ) -> sqlalchemy.schema.Column:
871 """Convert a `FieldSpec` to a `sqlalchemy.schema.Column`.
873 Parameters
874 ----------
875 table : `str`
876 Name of the table this column is being added to.
877 spec : `FieldSpec`
878 Specification for the field to be added.
879 metadata : `sqlalchemy.MetaData`
880 SQLAlchemy representation of the DDL schema this field's table is
881 being added to.
882 **kwargs
883 Additional keyword arguments to forward to the
884 `sqlalchemy.schema.Column` constructor. This is provided to make
885 it easier for derived classes to delegate to ``super()`` while
886 making only minor changes.
888 Returns
889 -------
890 column : `sqlalchemy.schema.Column`
891 SQLAlchemy representation of the field.
892 """
893 args = []
894 if spec.autoincrement:
895 # Generate a sequence to use for auto incrementing for databases
896 # that do not support it natively. This will be ignored by
897 # sqlalchemy for databases that do support it.
898 args.append(
899 sqlalchemy.Sequence(
900 self.shrinkDatabaseEntityName(f"{table}_seq_{spec.name}"), metadata=metadata
901 )
902 )
903 assert spec.doc is None or isinstance(spec.doc, str), f"Bad doc for {table}.{spec.name}."
904 return sqlalchemy.schema.Column(
905 spec.name,
906 spec.getSizedColumnType(),
907 *args,
908 nullable=spec.nullable,
909 primary_key=spec.primaryKey,
910 comment=spec.doc,
911 server_default=spec.default,
912 **kwargs,
913 )
915 def _convertForeignKeySpec(
916 self, table: str, spec: ddl.ForeignKeySpec, metadata: sqlalchemy.MetaData, **kwargs: Any
917 ) -> sqlalchemy.schema.ForeignKeyConstraint:
918 """Convert a `ForeignKeySpec` to a
919 `sqlalchemy.schema.ForeignKeyConstraint`.
921 Parameters
922 ----------
923 table : `str`
924 Name of the table this foreign key is being added to.
925 spec : `ForeignKeySpec`
926 Specification for the foreign key to be added.
927 metadata : `sqlalchemy.MetaData`
928 SQLAlchemy representation of the DDL schema this constraint is
929 being added to.
930 **kwargs
931 Additional keyword arguments to forward to the
932 `sqlalchemy.schema.ForeignKeyConstraint` constructor. This is
933 provided to make it easier for derived classes to delegate to
934 ``super()`` while making only minor changes.
936 Returns
937 -------
938 constraint : `sqlalchemy.schema.ForeignKeyConstraint`
939 SQLAlchemy representation of the constraint.
940 """
941 name = self.shrinkDatabaseEntityName(
942 "_".join(
943 ["fkey", table, self._mangleTableName(spec.table)] + list(spec.target) + list(spec.source)
944 )
945 )
946 return sqlalchemy.schema.ForeignKeyConstraint(
947 spec.source,
948 [f"{self._mangleTableName(spec.table)}.{col}" for col in spec.target],
949 name=name,
950 ondelete=spec.onDelete,
951 )
953 def _convertExclusionConstraintSpec(
954 self,
955 table: str,
956 spec: tuple[str | type[TimespanDatabaseRepresentation], ...],
957 metadata: sqlalchemy.MetaData,
958 ) -> sqlalchemy.schema.Constraint:
959 """Convert a `tuple` from `ddl.TableSpec.exclusion` into a SQLAlchemy
960 constraint representation.
962 Parameters
963 ----------
964 table : `str`
965 Name of the table this constraint is being added to.
966 spec : `tuple` [ `str` or `type` ]
967 A tuple of `str` column names and the `type` object returned by
968 `getTimespanRepresentation` (which must appear exactly once),
969 indicating the order of the columns in the index used to back the
970 constraint.
971 metadata : `sqlalchemy.MetaData`
972 SQLAlchemy representation of the DDL schema this constraint is
973 being added to.
975 Returns
976 -------
977 constraint : `sqlalchemy.schema.Constraint`
978 SQLAlchemy representation of the constraint.
980 Raises
981 ------
982 NotImplementedError
983 Raised if this database does not support exclusion constraints.
984 """
985 raise NotImplementedError(f"Database {self} does not support exclusion constraints.")
987 def _convertTableSpec(
988 self, name: str, spec: ddl.TableSpec, metadata: sqlalchemy.MetaData, **kwargs: Any
989 ) -> sqlalchemy.schema.Table:
990 """Convert a `TableSpec` to a `sqlalchemy.schema.Table`.
992 Parameters
993 ----------
994 name : `str`
995 The name of the table.
996 spec : `TableSpec`
997 Specification for the foreign key to be added.
998 metadata : `sqlalchemy.MetaData`
999 SQLAlchemy representation of the DDL schema this table is being
1000 added to.
1001 **kwargs
1002 Additional keyword arguments to forward to the
1003 `sqlalchemy.schema.Table` constructor. This is provided to make it
1004 easier for derived classes to delegate to ``super()`` while making
1005 only minor changes.
1007 Returns
1008 -------
1009 table : `sqlalchemy.schema.Table`
1010 SQLAlchemy representation of the table.
1012 Notes
1013 -----
1014 This method does not handle ``spec.foreignKeys`` at all, in order to
1015 avoid circular dependencies. These are added by higher-level logic in
1016 `ensureTableExists`, `getExistingTable`, and `declareStaticTables`.
1017 """
1018 name = self._mangleTableName(name)
1019 args: list[sqlalchemy.schema.SchemaItem] = [
1020 self._convertFieldSpec(name, fieldSpec, metadata) for fieldSpec in spec.fields
1021 ]
1023 # Add any column constraints
1024 for fieldSpec in spec.fields:
1025 args.extend(self._makeColumnConstraints(name, fieldSpec))
1027 # Track indexes added for primary key and unique constraints, to make
1028 # sure we don't add duplicate explicit or foreign key indexes for
1029 # those.
1030 allIndexes = {tuple(fieldSpec.name for fieldSpec in spec.fields if fieldSpec.primaryKey)}
1031 args.extend(
1032 sqlalchemy.schema.UniqueConstraint(
1033 *columns, name=self.shrinkDatabaseEntityName("_".join([name, "unq"] + list(columns)))
1034 )
1035 for columns in spec.unique
1036 )
1037 allIndexes.update(spec.unique)
1038 args.extend(
1039 sqlalchemy.schema.Index(
1040 self.shrinkDatabaseEntityName("_".join([name, "idx"] + list(index.columns))),
1041 *index.columns,
1042 unique=(index.columns in spec.unique),
1043 **index.kwargs,
1044 )
1045 for index in spec.indexes
1046 if index.columns not in allIndexes
1047 )
1048 allIndexes.update(index.columns for index in spec.indexes)
1049 args.extend(
1050 sqlalchemy.schema.Index(
1051 self.shrinkDatabaseEntityName("_".join((name, "fkidx") + fk.source)),
1052 *fk.source,
1053 )
1054 for fk in spec.foreignKeys
1055 if fk.addIndex and fk.source not in allIndexes
1056 )
1058 args.extend(self._convertExclusionConstraintSpec(name, excl, metadata) for excl in spec.exclusion)
1060 assert spec.doc is None or isinstance(spec.doc, str), f"Bad doc for {name}."
1061 return sqlalchemy.schema.Table(name, metadata, *args, comment=spec.doc, info={"spec": spec}, **kwargs)
1063 def ensureTableExists(self, name: str, spec: ddl.TableSpec) -> sqlalchemy.schema.Table:
1064 """Ensure that a table with the given name and specification exists,
1065 creating it if necessary.
1067 Parameters
1068 ----------
1069 name : `str`
1070 Name of the table (not including namespace qualifiers).
1071 spec : `TableSpec`
1072 Specification for the table. This will be used when creating the
1073 table, and *may* be used when obtaining an existing table to check
1074 for consistency, but no such check is guaranteed.
1076 Returns
1077 -------
1078 table : `sqlalchemy.schema.Table`
1079 SQLAlchemy representation of the table.
1081 Raises
1082 ------
1083 ReadOnlyDatabaseError
1084 Raised if `isWriteable` returns `False`, and the table does not
1085 already exist.
1086 DatabaseConflictError
1087 Raised if the table exists but ``spec`` is inconsistent with its
1088 definition.
1090 Notes
1091 -----
1092 This method may not be called within transactions. It may be called on
1093 read-only databases if and only if the table does in fact already
1094 exist.
1096 Subclasses may override this method, but usually should not need to.
1097 """
1098 # TODO: if _engine is used to make a table then it uses separate
1099 # connection and should not interfere with current transaction
1100 assert (
1101 self._session_connection is None or not self._session_connection.in_transaction()
1102 ), "Table creation interrupts transactions."
1103 assert self._metadata is not None, "Static tables must be declared before dynamic tables."
1104 table = self.getExistingTable(name, spec)
1105 if table is not None:
1106 return table
1107 if not self.isWriteable():
1108 raise ReadOnlyDatabaseError(
1109 f"Table {name} does not exist, and cannot be created because database {self} is read-only."
1110 )
1111 table = self._convertTableSpec(name, spec, self._metadata)
1112 for foreignKeySpec in spec.foreignKeys:
1113 table.append_constraint(self._convertForeignKeySpec(name, foreignKeySpec, self._metadata))
1114 try:
1115 with self._transaction() as (_, connection):
1116 table.create(connection)
1117 except sqlalchemy.exc.DatabaseError:
1118 # Some other process could have created the table meanwhile, which
1119 # usually causes OperationalError or ProgrammingError. We cannot
1120 # use IF NOT EXISTS clause in this case due to PostgreSQL race
1121 # condition on server side which causes IntegrityError. Instead we
1122 # catch these exceptions (they all inherit DatabaseError) and
1123 # re-check whether table is now there.
1124 table = self.getExistingTable(name, spec)
1125 if table is None:
1126 raise
1127 return table
1129 def getExistingTable(self, name: str, spec: ddl.TableSpec) -> sqlalchemy.schema.Table | None:
1130 """Obtain an existing table with the given name and specification.
1132 Parameters
1133 ----------
1134 name : `str`
1135 Name of the table (not including namespace qualifiers).
1136 spec : `TableSpec`
1137 Specification for the table. This will be used when creating the
1138 SQLAlchemy representation of the table, and it is used to
1139 check that the actual table in the database is consistent.
1141 Returns
1142 -------
1143 table : `sqlalchemy.schema.Table` or `None`
1144 SQLAlchemy representation of the table, or `None` if it does not
1145 exist.
1147 Raises
1148 ------
1149 DatabaseConflictError
1150 Raised if the table exists but ``spec`` is inconsistent with its
1151 definition.
1153 Notes
1154 -----
1155 This method can be called within transactions and never modifies the
1156 database.
1158 Subclasses may override this method, but usually should not need to.
1159 """
1160 assert self._metadata is not None, "Static tables must be declared before dynamic tables."
1161 name = self._mangleTableName(name)
1162 table = self._metadata.tables.get(name if self.namespace is None else f"{self.namespace}.{name}")
1163 if table is not None:
1164 if spec.fields.names != set(table.columns.keys()):
1165 raise DatabaseConflictError(
1166 f"Table '{name}' has already been defined differently; the new "
1167 f"specification has columns {list(spec.fields.names)}, while "
1168 f"the previous definition has {list(table.columns.keys())}."
1169 )
1170 else:
1171 inspector = sqlalchemy.inspect(
1172 self._engine if self._session_connection is None else self._session_connection, raiseerr=True
1173 )
1174 if name in inspector.get_table_names(schema=self.namespace):
1175 _checkExistingTableDefinition(name, spec, inspector.get_columns(name, schema=self.namespace))
1176 table = self._convertTableSpec(name, spec, self._metadata)
1177 for foreignKeySpec in spec.foreignKeys:
1178 table.append_constraint(self._convertForeignKeySpec(name, foreignKeySpec, self._metadata))
1179 return table
1180 return table
1182 def _make_temporary_table(
1183 self,
1184 connection: sqlalchemy.engine.Connection,
1185 spec: ddl.TableSpec,
1186 name: str | None = None,
1187 **kwargs: Any,
1188 ) -> sqlalchemy.schema.Table:
1189 """Create a temporary table.
1191 Parameters
1192 ----------
1193 connection : `sqlalchemy.engine.Connection`
1194 Connection to use when creating the table.
1195 spec : `TableSpec`
1196 Specification for the table.
1197 name : `str`, optional
1198 A unique (within this session/connetion) name for the table.
1199 Subclasses may override to modify the actual name used. If not
1200 provided, a unique name will be generated.
1201 **kwargs
1202 Additional keyword arguments to forward to the
1203 `sqlalchemy.schema.Table` constructor. This is provided to make it
1204 easier for derived classes to delegate to ``super()`` while making
1205 only minor changes.
1207 Returns
1208 -------
1209 table : `sqlalchemy.schema.Table`
1210 SQLAlchemy representation of the table.
1211 """
1212 if name is None:
1213 name = f"tmp_{uuid.uuid4().hex}"
1214 metadata = self._metadata
1215 if metadata is None:
1216 raise RuntimeError("Cannot create temporary table before static schema is defined.")
1217 table = self._convertTableSpec(
1218 name, spec, metadata, prefixes=["TEMPORARY"], schema=sqlalchemy.schema.BLANK_SCHEMA, **kwargs
1219 )
1220 if table.key in self._temp_tables and table.key != name:
1221 raise ValueError(
1222 f"A temporary table with name {name} (transformed to {table.key} by "
1223 "Database) already exists."
1224 )
1225 for foreignKeySpec in spec.foreignKeys:
1226 table.append_constraint(self._convertForeignKeySpec(name, foreignKeySpec, metadata))
1227 with self._transaction():
1228 table.create(connection)
1229 return table
1231 @classmethod
1232 def getTimespanRepresentation(cls) -> type[TimespanDatabaseRepresentation]:
1233 """Return a `type` that encapsulates the way `Timespan` objects are
1234 stored in this database.
1236 `Database` does not automatically use the return type of this method
1237 anywhere else; calling code is responsible for making sure that DDL
1238 and queries are consistent with it.
1240 Returns
1241 -------
1242 TimespanReprClass : `type` (`TimespanDatabaseRepresention` subclass)
1243 A type that encapsulates the way `Timespan` objects should be
1244 stored in this database.
1246 Notes
1247 -----
1248 There are two big reasons we've decided to keep timespan-mangling logic
1249 outside the `Database` implementations, even though the choice of
1250 representation is ultimately up to a `Database` implementation:
1252 - Timespans appear in relatively few tables and queries in our
1253 typical usage, and the code that operates on them is already aware
1254 that it is working with timespans. In contrast, a
1255 timespan-representation-aware implementation of, say, `insert`,
1256 would need to have extra logic to identify when timespan-mangling
1257 needed to occur, which would usually be useless overhead.
1259 - SQLAlchemy's rich SELECT query expression system has no way to wrap
1260 multiple columns in a single expression object (the ORM does, but
1261 we are not using the ORM). So we would have to wrap _much_ more of
1262 that code in our own interfaces to encapsulate timespan
1263 representations there.
1264 """
1265 return TimespanDatabaseRepresentation.Compound
1267 def sync(
1268 self,
1269 table: sqlalchemy.schema.Table,
1270 *,
1271 keys: dict[str, Any],
1272 compared: dict[str, Any] | None = None,
1273 extra: dict[str, Any] | None = None,
1274 returning: Sequence[str] | None = None,
1275 update: bool = False,
1276 ) -> tuple[dict[str, Any] | None, bool | dict[str, Any]]:
1277 """Insert into a table as necessary to ensure database contains
1278 values equivalent to the given ones.
1280 Parameters
1281 ----------
1282 table : `sqlalchemy.schema.Table`
1283 Table to be queried and possibly inserted into.
1284 keys : `dict`
1285 Column name-value pairs used to search for an existing row; must
1286 be a combination that can be used to select a single row if one
1287 exists. If such a row does not exist, these values are used in
1288 the insert.
1289 compared : `dict`, optional
1290 Column name-value pairs that are compared to those in any existing
1291 row. If such a row does not exist, these rows are used in the
1292 insert.
1293 extra : `dict`, optional
1294 Column name-value pairs that are ignored if a matching row exists,
1295 but used in an insert if one is necessary.
1296 returning : `~collections.abc.Sequence` of `str`, optional
1297 The names of columns whose values should be returned.
1298 update : `bool`, optional
1299 If `True` (`False` is default), update the existing row with the
1300 values in ``compared`` instead of raising `DatabaseConflictError`.
1302 Returns
1303 -------
1304 row : `dict`, optional
1305 The value of the fields indicated by ``returning``, or `None` if
1306 ``returning`` is `None`.
1307 inserted_or_updated : `bool` or `dict`
1308 If `True`, a new row was inserted; if `False`, a matching row
1309 already existed. If a `dict` (only possible if ``update=True``),
1310 then an existing row was updated, and the dict maps the names of
1311 the updated columns to their *old* values (new values can be
1312 obtained from ``compared``).
1314 Raises
1315 ------
1316 DatabaseConflictError
1317 Raised if the values in ``compared`` do not match the values in the
1318 database.
1319 ReadOnlyDatabaseError
1320 Raised if `isWriteable` returns `False`, and no matching record
1321 already exists.
1323 Notes
1324 -----
1325 May be used inside transaction contexts, so implementations may not
1326 perform operations that interrupt transactions.
1328 It may be called on read-only databases if and only if the matching row
1329 does in fact already exist.
1330 """
1332 def check() -> tuple[int, dict[str, Any] | None, list | None]:
1333 """Query for a row that matches the ``key`` argument, and compare
1334 to what was given by the caller.
1336 Returns
1337 -------
1338 n : `int`
1339 Number of matching rows. ``n != 1`` is always an error, but
1340 it's a different kind of error depending on where `check` is
1341 being called.
1342 bad : `dict` or `None`
1343 The subset of the keys of ``compared`` for which the existing
1344 values did not match the given one, mapped to the existing
1345 values in the database. Once again, ``not bad`` is always an
1346 error, but a different kind on context. `None` if ``n != 1``.
1347 result : `list` or `None`
1348 Results in the database that correspond to the columns given
1349 in ``returning``, or `None` if ``returning is None``.
1350 """
1351 toSelect: set[str] = set()
1352 if compared is not None:
1353 toSelect.update(compared.keys())
1354 if returning is not None:
1355 toSelect.update(returning)
1356 if not toSelect:
1357 # Need to select some column, even if we just want to see
1358 # how many rows we get back.
1359 toSelect.add(next(iter(keys.keys())))
1360 selectSql = (
1361 sqlalchemy.sql.select(*[table.columns[k].label(k) for k in toSelect])
1362 .select_from(table)
1363 .where(sqlalchemy.sql.and_(*[table.columns[k] == v for k, v in keys.items()]))
1364 )
1365 with self._transaction() as (_, connection):
1366 fetched = list(connection.execute(selectSql).mappings())
1367 if len(fetched) != 1:
1368 return len(fetched), None, None
1369 existing = fetched[0]
1370 if compared is not None:
1372 def safeNotEqual(a: Any, b: Any) -> bool:
1373 if isinstance(a, astropy.time.Time):
1374 return not time_utils.TimeConverter().times_equal(a, b)
1375 return a != b
1377 inconsistencies = {
1378 k: existing[k] for k, v in compared.items() if safeNotEqual(existing[k], v)
1379 }
1380 else:
1381 inconsistencies = {}
1382 if returning is not None:
1383 toReturn: list | None = [existing[k] for k in returning]
1384 else:
1385 toReturn = None
1386 return 1, inconsistencies, toReturn
1388 def _format_bad(inconsistencies: dict[str, Any]) -> str:
1389 """Format the 'bad' dictionary of existing values returned by
1390 ``check`` into a string suitable for an error message.
1391 """
1392 assert compared is not None, "Should not be able to get inconsistencies without comparing."
1393 return ", ".join(f"{k}: {v!r} != {compared[k]!r}" for k, v in inconsistencies.items())
1395 if self.isTableWriteable(table):
1396 # Try an insert first, but allow it to fail (in only specific
1397 # ways).
1398 row = keys.copy()
1399 if compared is not None:
1400 row.update(compared)
1401 if extra is not None:
1402 row.update(extra)
1403 with self.transaction():
1404 inserted = bool(self.ensure(table, row))
1405 inserted_or_updated: bool | dict[str, Any]
1406 # Need to perform check() for this branch inside the
1407 # transaction, so we roll back an insert that didn't do
1408 # what we expected. That limits the extent to which we
1409 # can reduce duplication between this block and the other
1410 # ones that perform similar logic.
1411 n, bad, result = check()
1412 if n < 1:
1413 raise ConflictingDefinitionError(
1414 f"Attempted to ensure {row} exists by inserting it with ON CONFLICT IGNORE, "
1415 f"but a post-insert query on {keys} returned no results. "
1416 f"Insert was {'' if inserted else 'not '}reported as successful. "
1417 "This can occur if the insert violated a database constraint other than the "
1418 "unique constraint or primary key used to identify the row in this call."
1419 )
1420 elif n > 1:
1421 raise RuntimeError(
1422 f"Keys passed to sync {keys.keys()} do not comprise a "
1423 f"unique constraint for table {table.name}."
1424 )
1425 elif bad:
1426 assert (
1427 compared is not None
1428 ), "Should not be able to get inconsistencies without comparing."
1429 if inserted:
1430 raise RuntimeError(
1431 f"Conflict ({bad}) in sync after successful insert; this is "
1432 "possible if the same table is being updated by a concurrent "
1433 "process that isn't using sync, but it may also be a bug in "
1434 "daf_butler."
1435 )
1436 elif update:
1437 with self._transaction() as (_, connection):
1438 connection.execute(
1439 table.update()
1440 .where(sqlalchemy.sql.and_(*[table.columns[k] == v for k, v in keys.items()]))
1441 .values(**{k: compared[k] for k in bad})
1442 )
1443 inserted_or_updated = bad
1444 else:
1445 raise DatabaseConflictError(
1446 f"Conflict in sync for table {table.name} on column(s) {_format_bad(bad)}."
1447 )
1448 else:
1449 inserted_or_updated = inserted
1450 else:
1451 # Database is not writeable; just see if the row exists.
1452 n, bad, result = check()
1453 if n < 1:
1454 raise ReadOnlyDatabaseError("sync needs to insert, but database is read-only.")
1455 elif n > 1:
1456 raise RuntimeError("Keys passed to sync do not comprise a unique constraint.")
1457 elif bad:
1458 if update:
1459 raise ReadOnlyDatabaseError("sync needs to update, but database is read-only.")
1460 else:
1461 raise DatabaseConflictError(
1462 f"Conflict in sync for table {table.name} on column(s) {_format_bad(bad)}."
1463 )
1464 inserted_or_updated = False
1465 if returning is None:
1466 return None, inserted_or_updated
1467 else:
1468 assert result is not None
1469 return dict(zip(returning, result, strict=True)), inserted_or_updated
1471 def insert(
1472 self,
1473 table: sqlalchemy.schema.Table,
1474 *rows: dict,
1475 returnIds: bool = False,
1476 select: sqlalchemy.sql.expression.SelectBase | None = None,
1477 names: Iterable[str] | None = None,
1478 ) -> list[int] | None:
1479 """Insert one or more rows into a table, optionally returning
1480 autoincrement primary key values.
1482 Parameters
1483 ----------
1484 table : `sqlalchemy.schema.Table`
1485 Table rows should be inserted into.
1486 *rows : `dict`
1487 Positional arguments are the rows to be inserted, as dictionaries
1488 mapping column name to value. The keys in all dictionaries must
1489 be the same.
1490 returnIds : `bool`, optional
1491 If `True` (`False` is default), return the values of the table's
1492 autoincrement primary key field (which much exist).
1493 select : `sqlalchemy.sql.SelectBase`, optional
1494 A SELECT query expression to insert rows from. Cannot be provided
1495 with either ``rows`` or ``returnIds=True``.
1496 names : `~collections.abc.Iterable` [ `str` ], optional
1497 Names of columns in ``table`` to be populated, ordered to match the
1498 columns returned by ``select``. Ignored if ``select`` is `None`.
1499 If not provided, the columns returned by ``select`` must be named
1500 to match the desired columns of ``table``.
1502 Returns
1503 -------
1504 ids : `None`, or `list` of `int`
1505 If ``returnIds`` is `True`, a `list` containing the inserted
1506 values for the table's autoincrement primary key.
1508 Raises
1509 ------
1510 ReadOnlyDatabaseError
1511 Raised if `isWriteable` returns `False` when this method is called.
1513 Notes
1514 -----
1515 The default implementation uses bulk insert syntax when ``returnIds``
1516 is `False`, and a loop over single-row insert operations when it is
1517 `True`.
1519 Derived classes should reimplement when they can provide a more
1520 efficient implementation (especially for the latter case).
1522 May be used inside transaction contexts, so implementations may not
1523 perform operations that interrupt transactions.
1524 """
1525 self.assertTableWriteable(table, f"Cannot insert into read-only table {table}.")
1526 if select is not None and (rows or returnIds):
1527 raise TypeError("'select' is incompatible with passing value rows or returnIds=True.")
1528 if not rows and select is None:
1529 if returnIds:
1530 return []
1531 else:
1532 return None
1533 with self._transaction() as (_, connection):
1534 if not returnIds:
1535 if select is not None:
1536 if names is None:
1537 # columns() is deprecated since 1.4, but
1538 # selected_columns() method did not exist in 1.3.
1539 if hasattr(select, "selected_columns"):
1540 names = select.selected_columns.keys()
1541 else:
1542 names = select.columns.keys()
1543 connection.execute(table.insert().from_select(list(names), select))
1544 else:
1545 connection.execute(table.insert(), rows)
1546 return None
1547 else:
1548 sql = table.insert()
1549 return [connection.execute(sql, row).inserted_primary_key[0] for row in rows]
1551 @abstractmethod
1552 def replace(self, table: sqlalchemy.schema.Table, *rows: dict) -> None:
1553 """Insert one or more rows into a table, replacing any existing rows
1554 for which insertion of a new row would violate the primary key
1555 constraint.
1557 Parameters
1558 ----------
1559 table : `sqlalchemy.schema.Table`
1560 Table rows should be inserted into.
1561 *rows
1562 Positional arguments are the rows to be inserted, as dictionaries
1563 mapping column name to value. The keys in all dictionaries must
1564 be the same.
1566 Raises
1567 ------
1568 ReadOnlyDatabaseError
1569 Raised if `isWriteable` returns `False` when this method is called.
1571 Notes
1572 -----
1573 May be used inside transaction contexts, so implementations may not
1574 perform operations that interrupt transactions.
1576 Implementations should raise a `sqlalchemy.exc.IntegrityError`
1577 exception when a constraint other than the primary key would be
1578 violated.
1580 Implementations are not required to support `replace` on tables
1581 with autoincrement keys.
1582 """
1583 raise NotImplementedError()
1585 @abstractmethod
1586 def ensure(self, table: sqlalchemy.schema.Table, *rows: dict, primary_key_only: bool = False) -> int:
1587 """Insert one or more rows into a table, skipping any rows for which
1588 insertion would violate a unique constraint.
1590 Parameters
1591 ----------
1592 table : `sqlalchemy.schema.Table`
1593 Table rows should be inserted into.
1594 *rows
1595 Positional arguments are the rows to be inserted, as dictionaries
1596 mapping column name to value. The keys in all dictionaries must
1597 be the same.
1598 primary_key_only : `bool`, optional
1599 If `True` (`False` is default), only skip rows that violate the
1600 primary key constraint, and raise an exception (and rollback
1601 transactions) for other constraint violations.
1603 Returns
1604 -------
1605 count : `int`
1606 The number of rows actually inserted.
1608 Raises
1609 ------
1610 ReadOnlyDatabaseError
1611 Raised if `isWriteable` returns `False` when this method is called.
1612 This is raised even if the operation would do nothing even on a
1613 writeable database.
1615 Notes
1616 -----
1617 May be used inside transaction contexts, so implementations may not
1618 perform operations that interrupt transactions.
1620 Implementations are not required to support `ensure` on tables
1621 with autoincrement keys.
1622 """
1623 raise NotImplementedError()
1625 def delete(self, table: sqlalchemy.schema.Table, columns: Iterable[str], *rows: dict) -> int:
1626 """Delete one or more rows from a table.
1628 Parameters
1629 ----------
1630 table : `sqlalchemy.schema.Table`
1631 Table that rows should be deleted from.
1632 columns : `~collections.abc.Iterable` of `str`
1633 The names of columns that will be used to constrain the rows to
1634 be deleted; these will be combined via ``AND`` to form the
1635 ``WHERE`` clause of the delete query.
1636 *rows
1637 Positional arguments are the keys of rows to be deleted, as
1638 dictionaries mapping column name to value. The keys in all
1639 dictionaries must be exactly the names in ``columns``.
1641 Returns
1642 -------
1643 count : `int`
1644 Number of rows deleted.
1646 Raises
1647 ------
1648 ReadOnlyDatabaseError
1649 Raised if `isWriteable` returns `False` when this method is called.
1651 Notes
1652 -----
1653 May be used inside transaction contexts, so implementations may not
1654 perform operations that interrupt transactions.
1656 The default implementation should be sufficient for most derived
1657 classes.
1658 """
1659 self.assertTableWriteable(table, f"Cannot delete from read-only table {table}.")
1660 if columns and not rows:
1661 # If there are no columns, this operation is supposed to delete
1662 # everything (so we proceed as usual). But if there are columns,
1663 # but no rows, it was a constrained bulk operation where the
1664 # constraint is that no rows match, and we should short-circuit
1665 # while reporting that no rows were affected.
1666 return 0
1667 sql = table.delete()
1668 columns = list(columns) # Force iterators to list
1670 # More efficient to use IN operator if there is only one
1671 # variable changing across all rows.
1672 content: dict[str, set] = defaultdict(set)
1673 if len(columns) == 1:
1674 # Nothing to calculate since we can always use IN
1675 column = columns[0]
1676 changing_columns = [column]
1677 content[column] = {row[column] for row in rows}
1678 else:
1679 for row in rows:
1680 for k, v in row.items():
1681 content[k].add(v)
1682 changing_columns = [col for col, values in content.items() if len(values) > 1]
1684 if len(changing_columns) != 1:
1685 # More than one column changes each time so do explicit bind
1686 # parameters and have each row processed separately.
1687 whereTerms = [table.columns[name] == sqlalchemy.sql.bindparam(name) for name in columns]
1688 if whereTerms:
1689 sql = sql.where(sqlalchemy.sql.and_(*whereTerms))
1690 with self._transaction() as (_, connection):
1691 return connection.execute(sql, rows).rowcount
1692 else:
1693 # One of the columns has changing values but any others are
1694 # fixed. In this case we can use an IN operator and be more
1695 # efficient.
1696 name = changing_columns.pop()
1698 # Simple where clause for the unchanging columns
1699 clauses = []
1700 for k, v in content.items():
1701 if k == name:
1702 continue
1703 column = table.columns[k]
1704 # The set only has one element
1705 clauses.append(column == v.pop())
1707 # The IN operator will not work for "infinite" numbers of
1708 # rows so must batch it up into distinct calls.
1709 in_content = list(content[name])
1710 n_elements = len(in_content)
1712 rowcount = 0
1713 iposn = 0
1714 n_per_loop = 1_000 # Controls how many items to put in IN clause
1715 with self._transaction() as (_, connection):
1716 for iposn in range(0, n_elements, n_per_loop):
1717 endpos = iposn + n_per_loop
1718 in_clause = table.columns[name].in_(in_content[iposn:endpos])
1720 newsql = sql.where(sqlalchemy.sql.and_(*clauses, in_clause))
1721 rowcount += connection.execute(newsql).rowcount
1722 return rowcount
1724 def deleteWhere(self, table: sqlalchemy.schema.Table, where: sqlalchemy.sql.ColumnElement) -> int:
1725 """Delete rows from a table with pre-constructed WHERE clause.
1727 Parameters
1728 ----------
1729 table : `sqlalchemy.schema.Table`
1730 Table that rows should be deleted from.
1731 where : `sqlalchemy.sql.ClauseElement`
1732 The names of columns that will be used to constrain the rows to
1733 be deleted; these will be combined via ``AND`` to form the
1734 ``WHERE`` clause of the delete query.
1736 Returns
1737 -------
1738 count : `int`
1739 Number of rows deleted.
1741 Raises
1742 ------
1743 ReadOnlyDatabaseError
1744 Raised if `isWriteable` returns `False` when this method is called.
1746 Notes
1747 -----
1748 May be used inside transaction contexts, so implementations may not
1749 perform operations that interrupt transactions.
1751 The default implementation should be sufficient for most derived
1752 classes.
1753 """
1754 self.assertTableWriteable(table, f"Cannot delete from read-only table {table}.")
1756 sql = table.delete().where(where)
1757 with self._transaction() as (_, connection):
1758 return connection.execute(sql).rowcount
1760 def update(self, table: sqlalchemy.schema.Table, where: dict[str, str], *rows: dict) -> int:
1761 """Update one or more rows in a table.
1763 Parameters
1764 ----------
1765 table : `sqlalchemy.schema.Table`
1766 Table containing the rows to be updated.
1767 where : `dict` [`str`, `str`]
1768 A mapping from the names of columns that will be used to search for
1769 existing rows to the keys that will hold these values in the
1770 ``rows`` dictionaries. Note that these may not be the same due to
1771 SQLAlchemy limitations.
1772 *rows
1773 Positional arguments are the rows to be updated. The keys in all
1774 dictionaries must be the same, and may correspond to either a
1775 value in the ``where`` dictionary or the name of a column to be
1776 updated.
1778 Returns
1779 -------
1780 count : `int`
1781 Number of rows matched (regardless of whether the update actually
1782 modified them).
1784 Raises
1785 ------
1786 ReadOnlyDatabaseError
1787 Raised if `isWriteable` returns `False` when this method is called.
1789 Notes
1790 -----
1791 May be used inside transaction contexts, so implementations may not
1792 perform operations that interrupt transactions.
1794 The default implementation should be sufficient for most derived
1795 classes.
1796 """
1797 self.assertTableWriteable(table, f"Cannot update read-only table {table}.")
1798 if not rows:
1799 return 0
1800 sql = table.update().where(
1801 sqlalchemy.sql.and_(*[table.columns[k] == sqlalchemy.sql.bindparam(v) for k, v in where.items()])
1802 )
1803 with self._transaction() as (_, connection):
1804 return connection.execute(sql, rows).rowcount
1806 @contextmanager
1807 def query(
1808 self,
1809 sql: sqlalchemy.sql.expression.Executable | sqlalchemy.sql.expression.SelectBase,
1810 *args: Any,
1811 **kwargs: Any,
1812 ) -> Iterator[sqlalchemy.engine.CursorResult]:
1813 """Run a SELECT query against the database.
1815 Parameters
1816 ----------
1817 sql : `sqlalchemy.sql.expression.SelectBase`
1818 A SQLAlchemy representation of a ``SELECT`` query.
1819 *args
1820 Additional positional arguments are forwarded to
1821 `sqlalchemy.engine.Connection.execute`.
1822 **kwargs
1823 Additional keyword arguments are forwarded to
1824 `sqlalchemy.engine.Connection.execute`.
1826 Returns
1827 -------
1828 result_context : `sqlalchemy.engine.CursorResults`
1829 Context manager that returns the query result object when entered.
1830 These results are invalidated when the context is exited.
1831 """
1832 if self._session_connection is None:
1833 connection = self._engine.connect()
1834 else:
1835 connection = self._session_connection
1836 # TODO: SelectBase is not good for execute(), but it used everywhere,
1837 # e.g. in daf_relation. We should switch to Executable at some point.
1838 result = connection.execute(cast(sqlalchemy.sql.expression.Executable, sql), *args, **kwargs)
1839 try:
1840 yield result
1841 finally:
1842 if connection is not self._session_connection:
1843 connection.close()
1845 @abstractmethod
1846 def constant_rows(
1847 self,
1848 fields: NamedValueAbstractSet[ddl.FieldSpec],
1849 *rows: dict,
1850 name: str | None = None,
1851 ) -> sqlalchemy.sql.FromClause:
1852 """Return a SQLAlchemy object that represents a small number of
1853 constant-valued rows.
1855 Parameters
1856 ----------
1857 fields : `NamedValueAbstractSet` [ `ddl.FieldSpec` ]
1858 The columns of the rows. Unique and foreign key constraints are
1859 ignored.
1860 *rows : `dict`
1861 Values for the rows.
1862 name : `str`, optional
1863 If provided, the name of the SQL construct. If not provided, an
1864 opaque but unique identifier is generated.
1866 Returns
1867 -------
1868 from_clause : `sqlalchemy.sql.FromClause`
1869 SQLAlchemy object representing the given rows. This is guaranteed
1870 to be something that can be directly joined into a ``SELECT``
1871 query's ``FROM`` clause, and will not involve a temporary table
1872 that needs to be cleaned up later.
1874 Notes
1875 -----
1876 The default implementation uses the SQL-standard ``VALUES`` construct,
1877 but support for that construct is varied enough across popular RDBMSs
1878 that the method is still marked abstract to force explicit opt-in via
1879 delegation to `super`.
1880 """
1881 if name is None:
1882 name = f"tmp_{uuid.uuid4().hex}"
1883 return sqlalchemy.sql.values(
1884 *[sqlalchemy.Column(field.name, field.getSizedColumnType()) for field in fields],
1885 name=name,
1886 ).data([tuple(row[name] for name in fields.names) for row in rows])
1888 def get_constant_rows_max(self) -> int:
1889 """Return the maximum number of rows that should be passed to
1890 `constant_rows` for this backend.
1892 Returns
1893 -------
1894 max : `int`
1895 Maximum number of rows.
1897 Notes
1898 -----
1899 This should reflect typical performance profiles (or a guess at these),
1900 not just hard database engine limits.
1901 """
1902 return 100
1904 origin: int
1905 """An integer ID that should be used as the default for any datasets,
1906 quanta, or other entities that use a (autoincrement, origin) compound
1907 primary key (`int`).
1908 """
1910 namespace: str | None
1911 """The schema or namespace this database instance is associated with
1912 (`str` or `None`).
1913 """