Coverage for python/lsst/daf/butler/registry/interfaces/_database.py: 22%
413 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-27 09:44 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-27 09:44 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
27from __future__ import annotations
29from ... import ddl, time_utils
31__all__ = [
32 "Database",
33 "ReadOnlyDatabaseError",
34 "DatabaseConflictError",
35 "DatabaseInsertMode",
36 "SchemaAlreadyDefinedError",
37 "StaticTablesContext",
38]
40import enum
41import uuid
42import warnings
43from abc import ABC, abstractmethod
44from collections import defaultdict
45from collections.abc import Callable, Iterable, Iterator, Sequence
46from contextlib import contextmanager
47from typing import Any, cast, final
49import astropy.time
50import sqlalchemy
52from ..._named import NamedValueAbstractSet
53from ..._timespan import TimespanDatabaseRepresentation
54from .._exceptions import ConflictingDefinitionError
57class DatabaseInsertMode(enum.Enum):
58 """Mode options available for inserting database records."""
60 INSERT = enum.auto()
61 """Insert records, failing if they already exist."""
63 REPLACE = enum.auto()
64 """Replace records, overwriting existing."""
66 ENSURE = enum.auto()
67 """Insert records, skipping any that already exist."""
70# TODO: method is called with list[ReflectedColumn] in SA 2, and
71# ReflectedColumn does not exist in 1.4.
72def _checkExistingTableDefinition(name: str, spec: ddl.TableSpec, inspection: list) -> None:
73 """Test that the definition of a table in a `ddl.TableSpec` and from
74 database introspection are consistent.
76 Parameters
77 ----------
78 name : `str`
79 Name of the table (only used in error messages).
80 spec : `ddl.TableSpec`
81 Specification of the table.
82 inspection : `dict`
83 Dictionary returned by
84 `sqlalchemy.engine.reflection.Inspector.get_columns`.
86 Raises
87 ------
88 DatabaseConflictError
89 Raised if the definitions are inconsistent.
90 """
91 columnNames = [c["name"] for c in inspection]
92 if spec.fields.names != set(columnNames):
93 raise DatabaseConflictError(
94 f"Table '{name}' exists but is defined differently in the database; "
95 f"specification has columns {list(spec.fields.names)}, while the "
96 f"table in the database has {columnNames}."
97 )
100class ReadOnlyDatabaseError(RuntimeError):
101 """Exception raised when a write operation is called on a read-only
102 `Database`.
103 """
106class DatabaseConflictError(ConflictingDefinitionError):
107 """Exception raised when database content (row values or schema entities)
108 are inconsistent with what this client expects.
109 """
112class SchemaAlreadyDefinedError(RuntimeError):
113 """Exception raised when trying to initialize database schema when some
114 tables already exist.
115 """
118class StaticTablesContext:
119 """Helper class used to declare the static schema for a registry layer
120 in a database.
122 An instance of this class is returned by `Database.declareStaticTables`,
123 which should be the only way it should be constructed.
124 """
126 def __init__(self, db: Database, connection: sqlalchemy.engine.Connection):
127 self._db = db
128 self._foreignKeys: list[tuple[sqlalchemy.schema.Table, sqlalchemy.schema.ForeignKeyConstraint]] = []
129 self._inspector = sqlalchemy.inspect(connection)
130 self._tableNames = frozenset(self._inspector.get_table_names(schema=self._db.namespace))
131 self._initializers: list[Callable[[Database], None]] = []
133 def addTable(self, name: str, spec: ddl.TableSpec) -> sqlalchemy.schema.Table:
134 """Add a new table to the schema, returning its sqlalchemy
135 representation.
137 The new table may not actually be created until the end of the
138 context created by `Database.declareStaticTables`, allowing tables
139 to be declared in any order even in the presence of foreign key
140 relationships.
141 """
142 name = self._db._mangleTableName(name)
143 if name in self._tableNames:
144 _checkExistingTableDefinition(
145 name, spec, self._inspector.get_columns(name, schema=self._db.namespace)
146 )
147 metadata = self._db._metadata
148 assert metadata is not None, "Guaranteed by context manager that returns this object."
149 table = self._db._convertTableSpec(name, spec, metadata)
150 for foreignKeySpec in spec.foreignKeys:
151 self._foreignKeys.append((table, self._db._convertForeignKeySpec(name, foreignKeySpec, metadata)))
152 return table
154 def addTableTuple(self, specs: tuple[ddl.TableSpec, ...]) -> tuple[sqlalchemy.schema.Table, ...]:
155 """Add a named tuple of tables to the schema, returning their
156 SQLAlchemy representations in a named tuple of the same type.
158 The new tables may not actually be created until the end of the
159 context created by `Database.declareStaticTables`, allowing tables
160 to be declared in any order even in the presence of foreign key
161 relationships.
163 Notes
164 -----
165 ``specs`` *must* be an instance of a type created by
166 `collections.namedtuple`, not just regular tuple, and the returned
167 object is guaranteed to be the same. Because `~collections.namedtuple`
168 is just a factory for `type` objects, not an actual type itself,
169 we cannot represent this with type annotations.
170 """
171 return specs._make( # type: ignore
172 self.addTable(name, spec) for name, spec in zip(specs._fields, specs, strict=True) # type: ignore
173 )
175 def addInitializer(self, initializer: Callable[[Database], None]) -> None:
176 """Add a method that does one-time initialization of a database.
178 Initialization can mean anything that changes state of a database
179 and needs to be done exactly once after database schema was created.
180 An example for that could be population of schema attributes.
182 Parameters
183 ----------
184 initializer : callable
185 Method of a single argument which is a `Database` instance.
186 """
187 self._initializers.append(initializer)
190class Database(ABC):
191 """An abstract interface that represents a particular database engine's
192 representation of a single schema/namespace/database.
194 Parameters
195 ----------
196 origin : `int`
197 An integer ID that should be used as the default for any datasets,
198 quanta, or other entities that use a (autoincrement, origin) compound
199 primary key.
200 engine : `sqlalchemy.engine.Engine`
201 The SQLAlchemy engine for this `Database`.
202 namespace : `str`, optional
203 Name of the schema or namespace this instance is associated with.
204 This is passed as the ``schema`` argument when constructing a
205 `sqlalchemy.schema.MetaData` instance. We use ``namespace`` instead to
206 avoid confusion between "schema means namespace" and "schema means
207 table definitions".
209 Notes
210 -----
211 `Database` requires all write operations to go through its special named
212 methods. Our write patterns are sufficiently simple that we don't really
213 need the full flexibility of SQL insert/update/delete syntax, and we need
214 non-standard (but common) functionality in these operations sufficiently
215 often that it seems worthwhile to provide our own generic API.
217 In contrast, `Database.query` allows arbitrary ``SELECT`` queries (via
218 their SQLAlchemy representation) to be run, as we expect these to require
219 significantly more sophistication while still being limited to standard
220 SQL.
222 `Database` itself has several underscore-prefixed attributes:
224 - ``_engine``: SQLAlchemy object representing its engine.
225 - ``_connection``: method returning a context manager for
226 `sqlalchemy.engine.Connection` object.
227 - ``_metadata``: the `sqlalchemy.schema.MetaData` object representing
228 the tables and other schema entities.
230 These are considered protected (derived classes may access them, but other
231 code should not), and read-only, aside from executing SQL via
232 ``_connection``.
233 """
235 def __init__(self, *, origin: int, engine: sqlalchemy.engine.Engine, namespace: str | None = None):
236 self.origin = origin
237 self.namespace = namespace
238 self._engine = engine
239 self._session_connection: sqlalchemy.engine.Connection | None = None
240 self._metadata: sqlalchemy.schema.MetaData | None = None
241 self._temp_tables: set[str] = set()
243 def __repr__(self) -> str:
244 # Rather than try to reproduce all the parameters used to create
245 # the object, instead report the more useful information of the
246 # connection URL.
247 if self._engine.url.password is not None:
248 uri = str(self._engine.url.set(password="***"))
249 else:
250 uri = str(self._engine.url)
251 if self.namespace:
252 uri += f"#{self.namespace}"
253 return f'{type(self).__name__}("{uri}")'
255 @classmethod
256 def makeDefaultUri(cls, root: str) -> str | None:
257 """Create a default connection URI appropriate for the given root
258 directory, or `None` if there can be no such default.
259 """
260 return None
262 @classmethod
263 def fromUri(
264 cls,
265 uri: str | sqlalchemy.engine.URL,
266 *,
267 origin: int,
268 namespace: str | None = None,
269 writeable: bool = True,
270 ) -> Database:
271 """Construct a database from a SQLAlchemy URI.
273 Parameters
274 ----------
275 uri : `str` or `sqlalchemy.engine.URL`
276 A SQLAlchemy URI connection string.
277 origin : `int`
278 An integer ID that should be used as the default for any datasets,
279 quanta, or other entities that use a (autoincrement, origin)
280 compound primary key.
281 namespace : `str`, optional
282 A database namespace (i.e. schema) the new instance should be
283 associated with. If `None` (default), the namespace (if any) is
284 inferred from the URI.
285 writeable : `bool`, optional
286 If `True`, allow write operations on the database, including
287 ``CREATE TABLE``.
289 Returns
290 -------
291 db : `Database`
292 A new `Database` instance.
293 """
294 return cls.fromEngine(
295 cls.makeEngine(uri, writeable=writeable), origin=origin, namespace=namespace, writeable=writeable
296 )
298 @classmethod
299 @abstractmethod
300 def makeEngine(
301 cls, uri: str | sqlalchemy.engine.URL, *, writeable: bool = True
302 ) -> sqlalchemy.engine.Engine:
303 """Create a `sqlalchemy.engine.Engine` from a SQLAlchemy URI.
305 Parameters
306 ----------
307 uri : `str` or `sqlalchemy.engine.URL`
308 A SQLAlchemy URI connection string.
309 writeable : `bool`, optional
310 If `True`, allow write operations on the database, including
311 ``CREATE TABLE``.
313 Returns
314 -------
315 engine : `sqlalchemy.engine.Engine`
316 A database engine.
318 Notes
319 -----
320 Subclasses that support other ways to connect to a database are
321 encouraged to add optional arguments to their implementation of this
322 method, as long as they maintain compatibility with the base class
323 call signature.
324 """
325 raise NotImplementedError()
327 @classmethod
328 @abstractmethod
329 def fromEngine(
330 cls,
331 engine: sqlalchemy.engine.Engine,
332 *,
333 origin: int,
334 namespace: str | None = None,
335 writeable: bool = True,
336 ) -> Database:
337 """Create a new `Database` from an existing `sqlalchemy.engine.Engine`.
339 Parameters
340 ----------
341 engine : `sqlalchemy.engine.Engine`
342 The engine for the database. May be shared between `Database`
343 instances.
344 origin : `int`
345 An integer ID that should be used as the default for any datasets,
346 quanta, or other entities that use a (autoincrement, origin)
347 compound primary key.
348 namespace : `str`, optional
349 A different database namespace (i.e. schema) the new instance
350 should be associated with. If `None` (default), the namespace
351 (if any) is inferred from the connection.
352 writeable : `bool`, optional
353 If `True`, allow write operations on the database, including
354 ``CREATE TABLE``.
356 Returns
357 -------
358 db : `Database`
359 A new `Database` instance.
361 Notes
362 -----
363 This method allows different `Database` instances to share the same
364 engine, which is desirable when they represent different namespaces
365 can be queried together.
366 """
367 raise NotImplementedError()
369 @final
370 @contextmanager
371 def session(self) -> Iterator[None]:
372 """Return a context manager that represents a session (persistent
373 connection to a database).
375 Returns
376 -------
377 context : `AbstractContextManager` [ `None` ]
378 A context manager that does not return a value when entered.
380 Notes
381 -----
382 This method should be used when a sequence of read-only SQL operations
383 will be performed in rapid succession *without* a requirement that they
384 yield consistent results in the presence of concurrent writes (or, more
385 rarely, when conflicting concurrent writes are rare/impossible and the
386 session will be open long enough that a transaction is inadvisable).
387 """
388 with self._session():
389 yield
391 @final
392 @contextmanager
393 def transaction(
394 self,
395 *,
396 interrupting: bool = False,
397 savepoint: bool = False,
398 lock: Iterable[sqlalchemy.schema.Table] = (),
399 for_temp_tables: bool = False,
400 ) -> Iterator[None]:
401 """Return a context manager that represents a transaction.
403 Parameters
404 ----------
405 interrupting : `bool`, optional
406 If `True` (`False` is default), this transaction block may not be
407 nested without an outer one, and attempting to do so is a logic
408 (i.e. assertion) error.
409 savepoint : `bool`, optional
410 If `True` (`False` is default), create a `SAVEPOINT`, allowing
411 exceptions raised by the database (e.g. due to constraint
412 violations) during this transaction's context to be caught outside
413 it without also rolling back all operations in an outer transaction
414 block. If `False`, transactions may still be nested, but a
415 rollback may be generated at any level and affects all levels, and
416 commits are deferred until the outermost block completes. If any
417 outer transaction block was created with ``savepoint=True``, all
418 inner blocks will be as well (regardless of the actual value
419 passed). This has no effect if this is the outermost transaction.
420 lock : `~collections.abc.Iterable` [ `sqlalchemy.schema.Table` ], \
421 optional
422 A list of tables to lock for the duration of this transaction.
423 These locks are guaranteed to prevent concurrent writes and allow
424 this transaction (only) to acquire the same locks (others should
425 block), but only prevent concurrent reads if the database engine
426 requires that in order to block concurrent writes.
427 for_temp_tables : `bool`, optional
428 If `True`, this transaction may involve creating temporary tables.
430 Returns
431 -------
432 context : `AbstractContextManager` [ `None` ]
433 A context manager that commits the transaction when it is exited
434 without error and rolls back the transactoin when it is exited via
435 an exception.
437 Notes
438 -----
439 All transactions on a connection managed by one or more `Database`
440 instances _must_ go through this method, or transaction state will not
441 be correctly managed.
442 """
443 with self._transaction(
444 interrupting=interrupting, savepoint=savepoint, lock=lock, for_temp_tables=for_temp_tables
445 ):
446 yield
448 @contextmanager
449 def temporary_table(
450 self, spec: ddl.TableSpec, name: str | None = None
451 ) -> Iterator[sqlalchemy.schema.Table]:
452 """Return a context manager that creates and then drops a temporary
453 table.
455 Parameters
456 ----------
457 spec : `ddl.TableSpec`
458 Specification for the columns. Unique and foreign key constraints
459 may be ignored.
460 name : `str`, optional
461 If provided, the name of the SQL construct. If not provided, an
462 opaque but unique identifier is generated.
464 Returns
465 -------
466 context : `AbstractContextManager` [ `sqlalchemy.schema.Table` ]
467 A context manager that returns a SQLAlchemy representation of the
468 temporary table when entered.
470 Notes
471 -----
472 Temporary tables may be created, dropped, and written to even in
473 read-only databases - at least according to the Python-level
474 protections in the `Database` classes. Server permissions may say
475 otherwise, but in that case they probably need to be modified to
476 support the full range of expected read-only butler behavior.
477 """
478 with self._session() as connection:
479 table = self._make_temporary_table(connection, spec=spec, name=name)
480 self._temp_tables.add(table.key)
481 try:
482 yield table
483 finally:
484 with self._transaction():
485 table.drop(connection)
486 self._temp_tables.remove(table.key)
488 @contextmanager
489 def _session(self) -> Iterator[sqlalchemy.engine.Connection]:
490 """Protected implementation for `session` that actually returns the
491 connection.
493 This method is for internal `Database` calls that need the actual
494 SQLAlchemy connection object. It should be overridden by subclasses
495 instead of `session` itself.
497 Returns
498 -------
499 context : `AbstractContextManager` [ `sqlalchemy.engine.Connection` ]
500 A context manager that returns a SQLALchemy connection when
501 entered.
503 """
504 if self._session_connection is not None:
505 # session already started, just reuse that
506 yield self._session_connection
507 else:
508 try:
509 # open new connection and close it when done
510 self._session_connection = self._engine.connect()
511 yield self._session_connection
512 finally:
513 if self._session_connection is not None:
514 self._session_connection.close()
515 self._session_connection = None
516 # Temporary tables only live within session
517 self._temp_tables = set()
519 @contextmanager
520 def _transaction(
521 self,
522 *,
523 interrupting: bool = False,
524 savepoint: bool = False,
525 lock: Iterable[sqlalchemy.schema.Table] = (),
526 for_temp_tables: bool = False,
527 ) -> Iterator[tuple[bool, sqlalchemy.engine.Connection]]:
528 """Protected implementation for `transaction` that actually returns the
529 connection and whether this is a new outermost transaction.
531 This method is for internal `Database` calls that need the actual
532 SQLAlchemy connection object. It should be overridden by subclasses
533 instead of `transaction` itself.
535 Parameters
536 ----------
537 interrupting : `bool`, optional
538 If `True` (`False` is default), this transaction block may not be
539 nested without an outer one, and attempting to do so is a logic
540 (i.e. assertion) error.
541 savepoint : `bool`, optional
542 If `True` (`False` is default), create a `SAVEPOINT`, allowing
543 exceptions raised by the database (e.g. due to constraint
544 violations) during this transaction's context to be caught outside
545 it without also rolling back all operations in an outer transaction
546 block. If `False`, transactions may still be nested, but a
547 rollback may be generated at any level and affects all levels, and
548 commits are deferred until the outermost block completes. If any
549 outer transaction block was created with ``savepoint=True``, all
550 inner blocks will be as well (regardless of the actual value
551 passed). This has no effect if this is the outermost transaction.
552 lock : `~collections.abc.Iterable` [ `sqlalchemy.schema.Table` ], \
553 optional
554 A list of tables to lock for the duration of this transaction.
555 These locks are guaranteed to prevent concurrent writes and allow
556 this transaction (only) to acquire the same locks (others should
557 block), but only prevent concurrent reads if the database engine
558 requires that in order to block concurrent writes.
559 for_temp_tables : `bool`, optional
560 If `True`, this transaction may involve creating temporary tables.
562 Returns
563 -------
564 context : `AbstractContextManager` [ `tuple` [ `bool`,
565 `sqlalchemy.engine.Connection` ] ]
566 A context manager that commits the transaction when it is exited
567 without error and rolls back the transactoin when it is exited via
568 an exception. When entered, it returns a tuple of:
570 - ``is_new`` (`bool`): whether this is a new (outermost)
571 transaction;
572 - ``connection`` (`sqlalchemy.engine.Connection`): the connection.
573 """
574 with self._session() as connection:
575 already_in_transaction = connection.in_transaction()
576 assert not (interrupting and already_in_transaction), (
577 "Logic error in transaction nesting: an operation that would "
578 "interrupt the active transaction context has been requested."
579 )
580 savepoint = savepoint or connection.in_nested_transaction()
581 trans: sqlalchemy.engine.Transaction | None
582 if already_in_transaction:
583 if savepoint:
584 trans = connection.begin_nested()
585 else:
586 # Nested non-savepoint transactions don't do anything.
587 trans = None
588 else:
589 # Use a regular (non-savepoint) transaction always for the
590 # outermost context.
591 trans = connection.begin()
592 self._lockTables(connection, lock)
593 try:
594 yield not already_in_transaction, connection
595 if trans is not None:
596 trans.commit()
597 except BaseException:
598 if trans is not None:
599 trans.rollback()
600 raise
602 @abstractmethod
603 def _lockTables(
604 self, connection: sqlalchemy.engine.Connection, tables: Iterable[sqlalchemy.schema.Table] = ()
605 ) -> None:
606 """Acquire locks on the given tables.
608 This is an implementation hook for subclasses, called by `transaction`.
609 It should not be called directly by other code.
611 Parameters
612 ----------
613 connection : `sqlalchemy.engine.Connection`
614 Database connection object. It is guaranteed that transaction is
615 already in a progress for this connection.
616 tables : `~collections.abc.Iterable` [ `sqlalchemy.schema.Table` ], \
617 optional
618 A list of tables to lock for the duration of this transaction.
619 These locks are guaranteed to prevent concurrent writes and allow
620 this transaction (only) to acquire the same locks (others should
621 block), but only prevent concurrent reads if the database engine
622 requires that in order to block concurrent writes.
623 """
624 raise NotImplementedError()
626 def isTableWriteable(self, table: sqlalchemy.schema.Table) -> bool:
627 """Check whether a table is writeable, either because the database
628 connection is read-write or the table is a temporary table.
630 Parameters
631 ----------
632 table : `sqlalchemy.schema.Table`
633 SQLAlchemy table object to check.
635 Returns
636 -------
637 writeable : `bool`
638 Whether this table is writeable.
639 """
640 return self.isWriteable() or table.key in self._temp_tables
642 def assertTableWriteable(self, table: sqlalchemy.schema.Table, msg: str) -> None:
643 """Raise if the given table is not writeable, either because the
644 database connection is read-write or the table is a temporary table.
646 Parameters
647 ----------
648 table : `sqlalchemy.schema.Table`
649 SQLAlchemy table object to check.
650 msg : `str`, optional
651 If provided, raise `ReadOnlyDatabaseError` instead of returning
652 `False`, with this message.
653 """
654 if not self.isTableWriteable(table):
655 raise ReadOnlyDatabaseError(msg)
657 @contextmanager
658 def declareStaticTables(self, *, create: bool) -> Iterator[StaticTablesContext]:
659 """Return a context manager in which the database's static DDL schema
660 can be declared.
662 Parameters
663 ----------
664 create : `bool`
665 If `True`, attempt to create all tables at the end of the context.
666 If `False`, they will be assumed to already exist.
668 Returns
669 -------
670 schema : `StaticTablesContext`
671 A helper object that is used to add new tables.
673 Raises
674 ------
675 ReadOnlyDatabaseError
676 Raised if ``create`` is `True`, `Database.isWriteable` is `False`,
677 and one or more declared tables do not already exist.
679 Examples
680 --------
681 Given a `Database` instance ``db``::
683 with db.declareStaticTables(create=True) as schema:
684 schema.addTable("table1", TableSpec(...))
685 schema.addTable("table2", TableSpec(...))
687 Notes
688 -----
689 A database's static DDL schema must be declared before any dynamic
690 tables are managed via calls to `ensureTableExists` or
691 `getExistingTable`. The order in which static schema tables are added
692 inside the context block is unimportant; they will automatically be
693 sorted and added in an order consistent with their foreign key
694 relationships.
695 """
696 if create and not self.isWriteable():
697 raise ReadOnlyDatabaseError(f"Cannot create tables in read-only database {self}.")
698 self._metadata = sqlalchemy.MetaData(schema=self.namespace)
699 try:
700 with self._transaction() as (_, connection):
701 context = StaticTablesContext(self, connection)
702 if create and context._tableNames:
703 # Looks like database is already initalized, to avoid
704 # danger of modifying/destroying valid schema we refuse to
705 # do anything in this case
706 raise SchemaAlreadyDefinedError(f"Cannot create tables in non-empty database {self}.")
707 yield context
708 for table, foreignKey in context._foreignKeys:
709 table.append_constraint(foreignKey)
710 if create:
711 if (
712 self.namespace is not None
713 and self.namespace not in context._inspector.get_schema_names()
714 ):
715 connection.execute(sqlalchemy.schema.CreateSchema(self.namespace))
716 # In our tables we have columns that make use of sqlalchemy
717 # Sequence objects. There is currently a bug in sqlalchemy
718 # that causes a deprecation warning to be thrown on a
719 # property of the Sequence object when the repr for the
720 # sequence is created. Here a filter is used to catch these
721 # deprecation warnings when tables are created.
722 with warnings.catch_warnings():
723 warnings.simplefilter("ignore", category=sqlalchemy.exc.SADeprecationWarning)
724 self._metadata.create_all(connection)
725 # call all initializer methods sequentially
726 for init in context._initializers:
727 init(self)
728 except BaseException:
729 self._metadata = None
730 raise
732 @abstractmethod
733 def isWriteable(self) -> bool:
734 """Return `True` if this database can be modified by this client."""
735 raise NotImplementedError()
737 @abstractmethod
738 def __str__(self) -> str:
739 """Return a human-readable identifier for this `Database`, including
740 any namespace or schema that identifies its names within a `Registry`.
741 """
742 raise NotImplementedError()
744 @property
745 def dialect(self) -> sqlalchemy.engine.Dialect:
746 """The SQLAlchemy dialect for this database engine
747 (`sqlalchemy.engine.Dialect`).
748 """
749 return self._engine.dialect
751 def shrinkDatabaseEntityName(self, original: str) -> str:
752 """Return a version of the given name that fits within this database
753 engine's length limits for table, constraint, indexes, and sequence
754 names.
756 Implementations should not assume that simple truncation is safe,
757 because multiple long names often begin with the same prefix.
759 The default implementation simply returns the given name.
761 Parameters
762 ----------
763 original : `str`
764 The original name.
766 Returns
767 -------
768 shrunk : `str`
769 The new, possibly shortened name.
770 """
771 return original
773 def expandDatabaseEntityName(self, shrunk: str) -> str:
774 """Retrieve the original name for a database entity that was too long
775 to fit within the database engine's limits.
777 Parameters
778 ----------
779 original : `str`
780 The original name.
782 Returns
783 -------
784 shrunk : `str`
785 The new, possibly shortened name.
786 """
787 return shrunk
789 def _mangleTableName(self, name: str) -> str:
790 """Map a logical, user-visible table name to the true table name used
791 in the database.
793 The default implementation returns the given name unchanged.
795 Parameters
796 ----------
797 name : `str`
798 Input table name. Should not include a namespace (i.e. schema)
799 prefix.
801 Returns
802 -------
803 mangled : `str`
804 Mangled version of the table name (still with no namespace prefix).
806 Notes
807 -----
808 Reimplementations of this method must be idempotent - mangling an
809 already-mangled name must have no effect.
810 """
811 return name
813 def _makeColumnConstraints(self, table: str, spec: ddl.FieldSpec) -> list[sqlalchemy.CheckConstraint]:
814 """Create constraints based on this spec.
816 Parameters
817 ----------
818 table : `str`
819 Name of the table this column is being added to.
820 spec : `FieldSpec`
821 Specification for the field to be added.
823 Returns
824 -------
825 constraint : `list` of `sqlalchemy.CheckConstraint`
826 Constraint added for this column.
827 """
828 # By default we return no additional constraints
829 return []
831 def _convertFieldSpec(
832 self, table: str, spec: ddl.FieldSpec, metadata: sqlalchemy.MetaData, **kwargs: Any
833 ) -> sqlalchemy.schema.Column:
834 """Convert a `FieldSpec` to a `sqlalchemy.schema.Column`.
836 Parameters
837 ----------
838 table : `str`
839 Name of the table this column is being added to.
840 spec : `FieldSpec`
841 Specification for the field to be added.
842 metadata : `sqlalchemy.MetaData`
843 SQLAlchemy representation of the DDL schema this field's table is
844 being added to.
845 **kwargs
846 Additional keyword arguments to forward to the
847 `sqlalchemy.schema.Column` constructor. This is provided to make
848 it easier for derived classes to delegate to ``super()`` while
849 making only minor changes.
851 Returns
852 -------
853 column : `sqlalchemy.schema.Column`
854 SQLAlchemy representation of the field.
855 """
856 args = []
857 if spec.autoincrement:
858 # Generate a sequence to use for auto incrementing for databases
859 # that do not support it natively. This will be ignored by
860 # sqlalchemy for databases that do support it.
861 args.append(
862 sqlalchemy.Sequence(
863 self.shrinkDatabaseEntityName(f"{table}_seq_{spec.name}"), metadata=metadata
864 )
865 )
866 assert spec.doc is None or isinstance(spec.doc, str), f"Bad doc for {table}.{spec.name}."
867 return sqlalchemy.schema.Column(
868 spec.name,
869 spec.getSizedColumnType(),
870 *args,
871 nullable=spec.nullable,
872 primary_key=spec.primaryKey,
873 comment=spec.doc,
874 server_default=spec.default,
875 **kwargs,
876 )
878 def _convertForeignKeySpec(
879 self, table: str, spec: ddl.ForeignKeySpec, metadata: sqlalchemy.MetaData, **kwargs: Any
880 ) -> sqlalchemy.schema.ForeignKeyConstraint:
881 """Convert a `ForeignKeySpec` to a
882 `sqlalchemy.schema.ForeignKeyConstraint`.
884 Parameters
885 ----------
886 table : `str`
887 Name of the table this foreign key is being added to.
888 spec : `ForeignKeySpec`
889 Specification for the foreign key to be added.
890 metadata : `sqlalchemy.MetaData`
891 SQLAlchemy representation of the DDL schema this constraint is
892 being added to.
893 **kwargs
894 Additional keyword arguments to forward to the
895 `sqlalchemy.schema.ForeignKeyConstraint` constructor. This is
896 provided to make it easier for derived classes to delegate to
897 ``super()`` while making only minor changes.
899 Returns
900 -------
901 constraint : `sqlalchemy.schema.ForeignKeyConstraint`
902 SQLAlchemy representation of the constraint.
903 """
904 name = self.shrinkDatabaseEntityName(
905 "_".join(
906 ["fkey", table, self._mangleTableName(spec.table)] + list(spec.target) + list(spec.source)
907 )
908 )
909 return sqlalchemy.schema.ForeignKeyConstraint(
910 spec.source,
911 [f"{self._mangleTableName(spec.table)}.{col}" for col in spec.target],
912 name=name,
913 ondelete=spec.onDelete,
914 )
916 def _convertExclusionConstraintSpec(
917 self,
918 table: str,
919 spec: tuple[str | type[TimespanDatabaseRepresentation], ...],
920 metadata: sqlalchemy.MetaData,
921 ) -> sqlalchemy.schema.Constraint:
922 """Convert a `tuple` from `ddl.TableSpec.exclusion` into a SQLAlchemy
923 constraint representation.
925 Parameters
926 ----------
927 table : `str`
928 Name of the table this constraint is being added to.
929 spec : `tuple` [ `str` or `type` ]
930 A tuple of `str` column names and the `type` object returned by
931 `getTimespanRepresentation` (which must appear exactly once),
932 indicating the order of the columns in the index used to back the
933 constraint.
934 metadata : `sqlalchemy.MetaData`
935 SQLAlchemy representation of the DDL schema this constraint is
936 being added to.
938 Returns
939 -------
940 constraint : `sqlalchemy.schema.Constraint`
941 SQLAlchemy representation of the constraint.
943 Raises
944 ------
945 NotImplementedError
946 Raised if this database does not support exclusion constraints.
947 """
948 raise NotImplementedError(f"Database {self} does not support exclusion constraints.")
950 def _convertTableSpec(
951 self, name: str, spec: ddl.TableSpec, metadata: sqlalchemy.MetaData, **kwargs: Any
952 ) -> sqlalchemy.schema.Table:
953 """Convert a `TableSpec` to a `sqlalchemy.schema.Table`.
955 Parameters
956 ----------
957 spec : `TableSpec`
958 Specification for the foreign key to be added.
959 metadata : `sqlalchemy.MetaData`
960 SQLAlchemy representation of the DDL schema this table is being
961 added to.
962 **kwargs
963 Additional keyword arguments to forward to the
964 `sqlalchemy.schema.Table` constructor. This is provided to make it
965 easier for derived classes to delegate to ``super()`` while making
966 only minor changes.
968 Returns
969 -------
970 table : `sqlalchemy.schema.Table`
971 SQLAlchemy representation of the table.
973 Notes
974 -----
975 This method does not handle ``spec.foreignKeys`` at all, in order to
976 avoid circular dependencies. These are added by higher-level logic in
977 `ensureTableExists`, `getExistingTable`, and `declareStaticTables`.
978 """
979 name = self._mangleTableName(name)
980 args: list[sqlalchemy.schema.SchemaItem] = [
981 self._convertFieldSpec(name, fieldSpec, metadata) for fieldSpec in spec.fields
982 ]
984 # Add any column constraints
985 for fieldSpec in spec.fields:
986 args.extend(self._makeColumnConstraints(name, fieldSpec))
988 # Track indexes added for primary key and unique constraints, to make
989 # sure we don't add duplicate explicit or foreign key indexes for
990 # those.
991 allIndexes = {tuple(fieldSpec.name for fieldSpec in spec.fields if fieldSpec.primaryKey)}
992 args.extend(
993 sqlalchemy.schema.UniqueConstraint(
994 *columns, name=self.shrinkDatabaseEntityName("_".join([name, "unq"] + list(columns)))
995 )
996 for columns in spec.unique
997 )
998 allIndexes.update(spec.unique)
999 args.extend(
1000 sqlalchemy.schema.Index(
1001 self.shrinkDatabaseEntityName("_".join([name, "idx"] + list(index.columns))),
1002 *index.columns,
1003 unique=(index.columns in spec.unique),
1004 **index.kwargs,
1005 )
1006 for index in spec.indexes
1007 if index.columns not in allIndexes
1008 )
1009 allIndexes.update(index.columns for index in spec.indexes)
1010 args.extend(
1011 sqlalchemy.schema.Index(
1012 self.shrinkDatabaseEntityName("_".join((name, "fkidx") + fk.source)),
1013 *fk.source,
1014 )
1015 for fk in spec.foreignKeys
1016 if fk.addIndex and fk.source not in allIndexes
1017 )
1019 args.extend(self._convertExclusionConstraintSpec(name, excl, metadata) for excl in spec.exclusion)
1021 assert spec.doc is None or isinstance(spec.doc, str), f"Bad doc for {name}."
1022 return sqlalchemy.schema.Table(name, metadata, *args, comment=spec.doc, info={"spec": spec}, **kwargs)
1024 def ensureTableExists(self, name: str, spec: ddl.TableSpec) -> sqlalchemy.schema.Table:
1025 """Ensure that a table with the given name and specification exists,
1026 creating it if necessary.
1028 Parameters
1029 ----------
1030 name : `str`
1031 Name of the table (not including namespace qualifiers).
1032 spec : `TableSpec`
1033 Specification for the table. This will be used when creating the
1034 table, and *may* be used when obtaining an existing table to check
1035 for consistency, but no such check is guaranteed.
1037 Returns
1038 -------
1039 table : `sqlalchemy.schema.Table`
1040 SQLAlchemy representation of the table.
1042 Raises
1043 ------
1044 ReadOnlyDatabaseError
1045 Raised if `isWriteable` returns `False`, and the table does not
1046 already exist.
1047 DatabaseConflictError
1048 Raised if the table exists but ``spec`` is inconsistent with its
1049 definition.
1051 Notes
1052 -----
1053 This method may not be called within transactions. It may be called on
1054 read-only databases if and only if the table does in fact already
1055 exist.
1057 Subclasses may override this method, but usually should not need to.
1058 """
1059 # TODO: if _engine is used to make a table then it uses separate
1060 # connection and should not interfere with current transaction
1061 assert (
1062 self._session_connection is None or not self._session_connection.in_transaction()
1063 ), "Table creation interrupts transactions."
1064 assert self._metadata is not None, "Static tables must be declared before dynamic tables."
1065 table = self.getExistingTable(name, spec)
1066 if table is not None:
1067 return table
1068 if not self.isWriteable():
1069 raise ReadOnlyDatabaseError(
1070 f"Table {name} does not exist, and cannot be created because database {self} is read-only."
1071 )
1072 table = self._convertTableSpec(name, spec, self._metadata)
1073 for foreignKeySpec in spec.foreignKeys:
1074 table.append_constraint(self._convertForeignKeySpec(name, foreignKeySpec, self._metadata))
1075 try:
1076 with self._transaction() as (_, connection):
1077 table.create(connection)
1078 except sqlalchemy.exc.DatabaseError:
1079 # Some other process could have created the table meanwhile, which
1080 # usually causes OperationalError or ProgrammingError. We cannot
1081 # use IF NOT EXISTS clause in this case due to PostgreSQL race
1082 # condition on server side which causes IntegrityError. Instead we
1083 # catch these exceptions (they all inherit DatabaseError) and
1084 # re-check whether table is now there.
1085 table = self.getExistingTable(name, spec)
1086 if table is None:
1087 raise
1088 return table
1090 def getExistingTable(self, name: str, spec: ddl.TableSpec) -> sqlalchemy.schema.Table | None:
1091 """Obtain an existing table with the given name and specification.
1093 Parameters
1094 ----------
1095 name : `str`
1096 Name of the table (not including namespace qualifiers).
1097 spec : `TableSpec`
1098 Specification for the table. This will be used when creating the
1099 SQLAlchemy representation of the table, and it is used to
1100 check that the actual table in the database is consistent.
1102 Returns
1103 -------
1104 table : `sqlalchemy.schema.Table` or `None`
1105 SQLAlchemy representation of the table, or `None` if it does not
1106 exist.
1108 Raises
1109 ------
1110 DatabaseConflictError
1111 Raised if the table exists but ``spec`` is inconsistent with its
1112 definition.
1114 Notes
1115 -----
1116 This method can be called within transactions and never modifies the
1117 database.
1119 Subclasses may override this method, but usually should not need to.
1120 """
1121 assert self._metadata is not None, "Static tables must be declared before dynamic tables."
1122 name = self._mangleTableName(name)
1123 table = self._metadata.tables.get(name if self.namespace is None else f"{self.namespace}.{name}")
1124 if table is not None:
1125 if spec.fields.names != set(table.columns.keys()):
1126 raise DatabaseConflictError(
1127 f"Table '{name}' has already been defined differently; the new "
1128 f"specification has columns {list(spec.fields.names)}, while "
1129 f"the previous definition has {list(table.columns.keys())}."
1130 )
1131 else:
1132 inspector = sqlalchemy.inspect(
1133 self._engine if self._session_connection is None else self._session_connection, raiseerr=True
1134 )
1135 if name in inspector.get_table_names(schema=self.namespace):
1136 _checkExistingTableDefinition(name, spec, inspector.get_columns(name, schema=self.namespace))
1137 table = self._convertTableSpec(name, spec, self._metadata)
1138 for foreignKeySpec in spec.foreignKeys:
1139 table.append_constraint(self._convertForeignKeySpec(name, foreignKeySpec, self._metadata))
1140 return table
1141 return table
1143 def _make_temporary_table(
1144 self,
1145 connection: sqlalchemy.engine.Connection,
1146 spec: ddl.TableSpec,
1147 name: str | None = None,
1148 **kwargs: Any,
1149 ) -> sqlalchemy.schema.Table:
1150 """Create a temporary table.
1152 Parameters
1153 ----------
1154 connection : `sqlalchemy.engine.Connection`
1155 Connection to use when creating the table.
1156 spec : `TableSpec`
1157 Specification for the table.
1158 name : `str`, optional
1159 A unique (within this session/connetion) name for the table.
1160 Subclasses may override to modify the actual name used. If not
1161 provided, a unique name will be generated.
1162 **kwargs
1163 Additional keyword arguments to forward to the
1164 `sqlalchemy.schema.Table` constructor. This is provided to make it
1165 easier for derived classes to delegate to ``super()`` while making
1166 only minor changes.
1168 Returns
1169 -------
1170 table : `sqlalchemy.schema.Table`
1171 SQLAlchemy representation of the table.
1172 """
1173 if name is None:
1174 name = f"tmp_{uuid.uuid4().hex}"
1175 metadata = self._metadata
1176 if metadata is None:
1177 raise RuntimeError("Cannot create temporary table before static schema is defined.")
1178 table = self._convertTableSpec(
1179 name, spec, metadata, prefixes=["TEMPORARY"], schema=sqlalchemy.schema.BLANK_SCHEMA, **kwargs
1180 )
1181 if table.key in self._temp_tables and table.key != name:
1182 raise ValueError(
1183 f"A temporary table with name {name} (transformed to {table.key} by "
1184 "Database) already exists."
1185 )
1186 for foreignKeySpec in spec.foreignKeys:
1187 table.append_constraint(self._convertForeignKeySpec(name, foreignKeySpec, metadata))
1188 with self._transaction():
1189 table.create(connection)
1190 return table
1192 @classmethod
1193 def getTimespanRepresentation(cls) -> type[TimespanDatabaseRepresentation]:
1194 """Return a `type` that encapsulates the way `Timespan` objects are
1195 stored in this database.
1197 `Database` does not automatically use the return type of this method
1198 anywhere else; calling code is responsible for making sure that DDL
1199 and queries are consistent with it.
1201 Returns
1202 -------
1203 TimespanReprClass : `type` (`TimespanDatabaseRepresention` subclass)
1204 A type that encapsulates the way `Timespan` objects should be
1205 stored in this database.
1207 Notes
1208 -----
1209 There are two big reasons we've decided to keep timespan-mangling logic
1210 outside the `Database` implementations, even though the choice of
1211 representation is ultimately up to a `Database` implementation:
1213 - Timespans appear in relatively few tables and queries in our
1214 typical usage, and the code that operates on them is already aware
1215 that it is working with timespans. In contrast, a
1216 timespan-representation-aware implementation of, say, `insert`,
1217 would need to have extra logic to identify when timespan-mangling
1218 needed to occur, which would usually be useless overhead.
1220 - SQLAlchemy's rich SELECT query expression system has no way to wrap
1221 multiple columns in a single expression object (the ORM does, but
1222 we are not using the ORM). So we would have to wrap _much_ more of
1223 that code in our own interfaces to encapsulate timespan
1224 representations there.
1225 """
1226 return TimespanDatabaseRepresentation.Compound
1228 def sync(
1229 self,
1230 table: sqlalchemy.schema.Table,
1231 *,
1232 keys: dict[str, Any],
1233 compared: dict[str, Any] | None = None,
1234 extra: dict[str, Any] | None = None,
1235 returning: Sequence[str] | None = None,
1236 update: bool = False,
1237 ) -> tuple[dict[str, Any] | None, bool | dict[str, Any]]:
1238 """Insert into a table as necessary to ensure database contains
1239 values equivalent to the given ones.
1241 Parameters
1242 ----------
1243 table : `sqlalchemy.schema.Table`
1244 Table to be queried and possibly inserted into.
1245 keys : `dict`
1246 Column name-value pairs used to search for an existing row; must
1247 be a combination that can be used to select a single row if one
1248 exists. If such a row does not exist, these values are used in
1249 the insert.
1250 compared : `dict`, optional
1251 Column name-value pairs that are compared to those in any existing
1252 row. If such a row does not exist, these rows are used in the
1253 insert.
1254 extra : `dict`, optional
1255 Column name-value pairs that are ignored if a matching row exists,
1256 but used in an insert if one is necessary.
1257 returning : `~collections.abc.Sequence` of `str`, optional
1258 The names of columns whose values should be returned.
1259 update : `bool`, optional
1260 If `True` (`False` is default), update the existing row with the
1261 values in ``compared`` instead of raising `DatabaseConflictError`.
1263 Returns
1264 -------
1265 row : `dict`, optional
1266 The value of the fields indicated by ``returning``, or `None` if
1267 ``returning`` is `None`.
1268 inserted_or_updated : `bool` or `dict`
1269 If `True`, a new row was inserted; if `False`, a matching row
1270 already existed. If a `dict` (only possible if ``update=True``),
1271 then an existing row was updated, and the dict maps the names of
1272 the updated columns to their *old* values (new values can be
1273 obtained from ``compared``).
1275 Raises
1276 ------
1277 DatabaseConflictError
1278 Raised if the values in ``compared`` do not match the values in the
1279 database.
1280 ReadOnlyDatabaseError
1281 Raised if `isWriteable` returns `False`, and no matching record
1282 already exists.
1284 Notes
1285 -----
1286 May be used inside transaction contexts, so implementations may not
1287 perform operations that interrupt transactions.
1289 It may be called on read-only databases if and only if the matching row
1290 does in fact already exist.
1291 """
1293 def check() -> tuple[int, dict[str, Any] | None, list | None]:
1294 """Query for a row that matches the ``key`` argument, and compare
1295 to what was given by the caller.
1297 Returns
1298 -------
1299 n : `int`
1300 Number of matching rows. ``n != 1`` is always an error, but
1301 it's a different kind of error depending on where `check` is
1302 being called.
1303 bad : `dict` or `None`
1304 The subset of the keys of ``compared`` for which the existing
1305 values did not match the given one, mapped to the existing
1306 values in the database. Once again, ``not bad`` is always an
1307 error, but a different kind on context. `None` if ``n != 1``
1308 result : `list` or `None`
1309 Results in the database that correspond to the columns given
1310 in ``returning``, or `None` if ``returning is None``.
1311 """
1312 toSelect: set[str] = set()
1313 if compared is not None:
1314 toSelect.update(compared.keys())
1315 if returning is not None:
1316 toSelect.update(returning)
1317 if not toSelect:
1318 # Need to select some column, even if we just want to see
1319 # how many rows we get back.
1320 toSelect.add(next(iter(keys.keys())))
1321 selectSql = (
1322 sqlalchemy.sql.select(*[table.columns[k].label(k) for k in toSelect])
1323 .select_from(table)
1324 .where(sqlalchemy.sql.and_(*[table.columns[k] == v for k, v in keys.items()]))
1325 )
1326 with self._transaction() as (_, connection):
1327 fetched = list(connection.execute(selectSql).mappings())
1328 if len(fetched) != 1:
1329 return len(fetched), None, None
1330 existing = fetched[0]
1331 if compared is not None:
1333 def safeNotEqual(a: Any, b: Any) -> bool:
1334 if isinstance(a, astropy.time.Time):
1335 return not time_utils.TimeConverter().times_equal(a, b)
1336 return a != b
1338 inconsistencies = {
1339 k: existing[k] for k, v in compared.items() if safeNotEqual(existing[k], v)
1340 }
1341 else:
1342 inconsistencies = {}
1343 if returning is not None:
1344 toReturn: list | None = [existing[k] for k in returning]
1345 else:
1346 toReturn = None
1347 return 1, inconsistencies, toReturn
1349 def format_bad(inconsistencies: dict[str, Any]) -> str:
1350 """Format the 'bad' dictionary of existing values returned by
1351 ``check`` into a string suitable for an error message.
1352 """
1353 assert compared is not None, "Should not be able to get inconsistencies without comparing."
1354 return ", ".join(f"{k}: {v!r} != {compared[k]!r}" for k, v in inconsistencies.items())
1356 if self.isTableWriteable(table):
1357 # Try an insert first, but allow it to fail (in only specific
1358 # ways).
1359 row = keys.copy()
1360 if compared is not None:
1361 row.update(compared)
1362 if extra is not None:
1363 row.update(extra)
1364 with self.transaction():
1365 inserted = bool(self.ensure(table, row))
1366 inserted_or_updated: bool | dict[str, Any]
1367 # Need to perform check() for this branch inside the
1368 # transaction, so we roll back an insert that didn't do
1369 # what we expected. That limits the extent to which we
1370 # can reduce duplication between this block and the other
1371 # ones that perform similar logic.
1372 n, bad, result = check()
1373 if n < 1:
1374 raise ConflictingDefinitionError(
1375 f"Attempted to ensure {row} exists by inserting it with ON CONFLICT IGNORE, "
1376 f"but a post-insert query on {keys} returned no results. "
1377 f"Insert was {'' if inserted else 'not '}reported as successful. "
1378 "This can occur if the insert violated a database constraint other than the "
1379 "unique constraint or primary key used to identify the row in this call."
1380 )
1381 elif n > 1:
1382 raise RuntimeError(
1383 f"Keys passed to sync {keys.keys()} do not comprise a "
1384 f"unique constraint for table {table.name}."
1385 )
1386 elif bad:
1387 assert (
1388 compared is not None
1389 ), "Should not be able to get inconsistencies without comparing."
1390 if inserted:
1391 raise RuntimeError(
1392 f"Conflict ({bad}) in sync after successful insert; this is "
1393 "possible if the same table is being updated by a concurrent "
1394 "process that isn't using sync, but it may also be a bug in "
1395 "daf_butler."
1396 )
1397 elif update:
1398 with self._transaction() as (_, connection):
1399 connection.execute(
1400 table.update()
1401 .where(sqlalchemy.sql.and_(*[table.columns[k] == v for k, v in keys.items()]))
1402 .values(**{k: compared[k] for k in bad})
1403 )
1404 inserted_or_updated = bad
1405 else:
1406 raise DatabaseConflictError(
1407 f"Conflict in sync for table {table.name} on column(s) {format_bad(bad)}."
1408 )
1409 else:
1410 inserted_or_updated = inserted
1411 else:
1412 # Database is not writeable; just see if the row exists.
1413 n, bad, result = check()
1414 if n < 1:
1415 raise ReadOnlyDatabaseError("sync needs to insert, but database is read-only.")
1416 elif n > 1:
1417 raise RuntimeError("Keys passed to sync do not comprise a unique constraint.")
1418 elif bad:
1419 if update:
1420 raise ReadOnlyDatabaseError("sync needs to update, but database is read-only.")
1421 else:
1422 raise DatabaseConflictError(
1423 f"Conflict in sync for table {table.name} on column(s) {format_bad(bad)}."
1424 )
1425 inserted_or_updated = False
1426 if returning is None:
1427 return None, inserted_or_updated
1428 else:
1429 assert result is not None
1430 return dict(zip(returning, result, strict=True)), inserted_or_updated
1432 def insert(
1433 self,
1434 table: sqlalchemy.schema.Table,
1435 *rows: dict,
1436 returnIds: bool = False,
1437 select: sqlalchemy.sql.expression.SelectBase | None = None,
1438 names: Iterable[str] | None = None,
1439 ) -> list[int] | None:
1440 """Insert one or more rows into a table, optionally returning
1441 autoincrement primary key values.
1443 Parameters
1444 ----------
1445 table : `sqlalchemy.schema.Table`
1446 Table rows should be inserted into.
1447 returnIds: `bool`
1448 If `True` (`False` is default), return the values of the table's
1449 autoincrement primary key field (which much exist).
1450 select : `sqlalchemy.sql.SelectBase`, optional
1451 A SELECT query expression to insert rows from. Cannot be provided
1452 with either ``rows`` or ``returnIds=True``.
1453 names : `~collections.abc.Iterable` [ `str` ], optional
1454 Names of columns in ``table`` to be populated, ordered to match the
1455 columns returned by ``select``. Ignored if ``select`` is `None`.
1456 If not provided, the columns returned by ``select`` must be named
1457 to match the desired columns of ``table``.
1458 *rows
1459 Positional arguments are the rows to be inserted, as dictionaries
1460 mapping column name to value. The keys in all dictionaries must
1461 be the same.
1463 Returns
1464 -------
1465 ids : `None`, or `list` of `int`
1466 If ``returnIds`` is `True`, a `list` containing the inserted
1467 values for the table's autoincrement primary key.
1469 Raises
1470 ------
1471 ReadOnlyDatabaseError
1472 Raised if `isWriteable` returns `False` when this method is called.
1474 Notes
1475 -----
1476 The default implementation uses bulk insert syntax when ``returnIds``
1477 is `False`, and a loop over single-row insert operations when it is
1478 `True`.
1480 Derived classes should reimplement when they can provide a more
1481 efficient implementation (especially for the latter case).
1483 May be used inside transaction contexts, so implementations may not
1484 perform operations that interrupt transactions.
1485 """
1486 self.assertTableWriteable(table, f"Cannot insert into read-only table {table}.")
1487 if select is not None and (rows or returnIds):
1488 raise TypeError("'select' is incompatible with passing value rows or returnIds=True.")
1489 if not rows and select is None:
1490 if returnIds:
1491 return []
1492 else:
1493 return None
1494 with self._transaction() as (_, connection):
1495 if not returnIds:
1496 if select is not None:
1497 if names is None:
1498 # columns() is deprecated since 1.4, but
1499 # selected_columns() method did not exist in 1.3.
1500 if hasattr(select, "selected_columns"):
1501 names = select.selected_columns.keys()
1502 else:
1503 names = select.columns.keys()
1504 connection.execute(table.insert().from_select(list(names), select))
1505 else:
1506 connection.execute(table.insert(), rows)
1507 return None
1508 else:
1509 sql = table.insert()
1510 return [connection.execute(sql, row).inserted_primary_key[0] for row in rows]
1512 @abstractmethod
1513 def replace(self, table: sqlalchemy.schema.Table, *rows: dict) -> None:
1514 """Insert one or more rows into a table, replacing any existing rows
1515 for which insertion of a new row would violate the primary key
1516 constraint.
1518 Parameters
1519 ----------
1520 table : `sqlalchemy.schema.Table`
1521 Table rows should be inserted into.
1522 *rows
1523 Positional arguments are the rows to be inserted, as dictionaries
1524 mapping column name to value. The keys in all dictionaries must
1525 be the same.
1527 Raises
1528 ------
1529 ReadOnlyDatabaseError
1530 Raised if `isWriteable` returns `False` when this method is called.
1532 Notes
1533 -----
1534 May be used inside transaction contexts, so implementations may not
1535 perform operations that interrupt transactions.
1537 Implementations should raise a `sqlalchemy.exc.IntegrityError`
1538 exception when a constraint other than the primary key would be
1539 violated.
1541 Implementations are not required to support `replace` on tables
1542 with autoincrement keys.
1543 """
1544 raise NotImplementedError()
1546 @abstractmethod
1547 def ensure(self, table: sqlalchemy.schema.Table, *rows: dict, primary_key_only: bool = False) -> int:
1548 """Insert one or more rows into a table, skipping any rows for which
1549 insertion would violate a unique constraint.
1551 Parameters
1552 ----------
1553 table : `sqlalchemy.schema.Table`
1554 Table rows should be inserted into.
1555 *rows
1556 Positional arguments are the rows to be inserted, as dictionaries
1557 mapping column name to value. The keys in all dictionaries must
1558 be the same.
1559 primary_key_only : `bool`, optional
1560 If `True` (`False` is default), only skip rows that violate the
1561 primary key constraint, and raise an exception (and rollback
1562 transactions) for other constraint violations.
1564 Returns
1565 -------
1566 count : `int`
1567 The number of rows actually inserted.
1569 Raises
1570 ------
1571 ReadOnlyDatabaseError
1572 Raised if `isWriteable` returns `False` when this method is called.
1573 This is raised even if the operation would do nothing even on a
1574 writeable database.
1576 Notes
1577 -----
1578 May be used inside transaction contexts, so implementations may not
1579 perform operations that interrupt transactions.
1581 Implementations are not required to support `ensure` on tables
1582 with autoincrement keys.
1583 """
1584 raise NotImplementedError()
1586 def delete(self, table: sqlalchemy.schema.Table, columns: Iterable[str], *rows: dict) -> int:
1587 """Delete one or more rows from a table.
1589 Parameters
1590 ----------
1591 table : `sqlalchemy.schema.Table`
1592 Table that rows should be deleted from.
1593 columns: `~collections.abc.Iterable` of `str`
1594 The names of columns that will be used to constrain the rows to
1595 be deleted; these will be combined via ``AND`` to form the
1596 ``WHERE`` clause of the delete query.
1597 *rows
1598 Positional arguments are the keys of rows to be deleted, as
1599 dictionaries mapping column name to value. The keys in all
1600 dictionaries must be exactly the names in ``columns``.
1602 Returns
1603 -------
1604 count : `int`
1605 Number of rows deleted.
1607 Raises
1608 ------
1609 ReadOnlyDatabaseError
1610 Raised if `isWriteable` returns `False` when this method is called.
1612 Notes
1613 -----
1614 May be used inside transaction contexts, so implementations may not
1615 perform operations that interrupt transactions.
1617 The default implementation should be sufficient for most derived
1618 classes.
1619 """
1620 self.assertTableWriteable(table, f"Cannot delete from read-only table {table}.")
1621 if columns and not rows:
1622 # If there are no columns, this operation is supposed to delete
1623 # everything (so we proceed as usual). But if there are columns,
1624 # but no rows, it was a constrained bulk operation where the
1625 # constraint is that no rows match, and we should short-circuit
1626 # while reporting that no rows were affected.
1627 return 0
1628 sql = table.delete()
1629 columns = list(columns) # Force iterators to list
1631 # More efficient to use IN operator if there is only one
1632 # variable changing across all rows.
1633 content: dict[str, set] = defaultdict(set)
1634 if len(columns) == 1:
1635 # Nothing to calculate since we can always use IN
1636 column = columns[0]
1637 changing_columns = [column]
1638 content[column] = {row[column] for row in rows}
1639 else:
1640 for row in rows:
1641 for k, v in row.items():
1642 content[k].add(v)
1643 changing_columns = [col for col, values in content.items() if len(values) > 1]
1645 if len(changing_columns) != 1:
1646 # More than one column changes each time so do explicit bind
1647 # parameters and have each row processed separately.
1648 whereTerms = [table.columns[name] == sqlalchemy.sql.bindparam(name) for name in columns]
1649 if whereTerms:
1650 sql = sql.where(sqlalchemy.sql.and_(*whereTerms))
1651 with self._transaction() as (_, connection):
1652 return connection.execute(sql, rows).rowcount
1653 else:
1654 # One of the columns has changing values but any others are
1655 # fixed. In this case we can use an IN operator and be more
1656 # efficient.
1657 name = changing_columns.pop()
1659 # Simple where clause for the unchanging columns
1660 clauses = []
1661 for k, v in content.items():
1662 if k == name:
1663 continue
1664 column = table.columns[k]
1665 # The set only has one element
1666 clauses.append(column == v.pop())
1668 # The IN operator will not work for "infinite" numbers of
1669 # rows so must batch it up into distinct calls.
1670 in_content = list(content[name])
1671 n_elements = len(in_content)
1673 rowcount = 0
1674 iposn = 0
1675 n_per_loop = 1_000 # Controls how many items to put in IN clause
1676 with self._transaction() as (_, connection):
1677 for iposn in range(0, n_elements, n_per_loop):
1678 endpos = iposn + n_per_loop
1679 in_clause = table.columns[name].in_(in_content[iposn:endpos])
1681 newsql = sql.where(sqlalchemy.sql.and_(*clauses, in_clause))
1682 rowcount += connection.execute(newsql).rowcount
1683 return rowcount
1685 def deleteWhere(self, table: sqlalchemy.schema.Table, where: sqlalchemy.sql.ColumnElement) -> int:
1686 """Delete rows from a table with pre-constructed WHERE clause.
1688 Parameters
1689 ----------
1690 table : `sqlalchemy.schema.Table`
1691 Table that rows should be deleted from.
1692 where: `sqlalchemy.sql.ClauseElement`
1693 The names of columns that will be used to constrain the rows to
1694 be deleted; these will be combined via ``AND`` to form the
1695 ``WHERE`` clause of the delete query.
1697 Returns
1698 -------
1699 count : `int`
1700 Number of rows deleted.
1702 Raises
1703 ------
1704 ReadOnlyDatabaseError
1705 Raised if `isWriteable` returns `False` when this method is called.
1707 Notes
1708 -----
1709 May be used inside transaction contexts, so implementations may not
1710 perform operations that interrupt transactions.
1712 The default implementation should be sufficient for most derived
1713 classes.
1714 """
1715 self.assertTableWriteable(table, f"Cannot delete from read-only table {table}.")
1717 sql = table.delete().where(where)
1718 with self._transaction() as (_, connection):
1719 return connection.execute(sql).rowcount
1721 def update(self, table: sqlalchemy.schema.Table, where: dict[str, str], *rows: dict) -> int:
1722 """Update one or more rows in a table.
1724 Parameters
1725 ----------
1726 table : `sqlalchemy.schema.Table`
1727 Table containing the rows to be updated.
1728 where : `dict` [`str`, `str`]
1729 A mapping from the names of columns that will be used to search for
1730 existing rows to the keys that will hold these values in the
1731 ``rows`` dictionaries. Note that these may not be the same due to
1732 SQLAlchemy limitations.
1733 *rows
1734 Positional arguments are the rows to be updated. The keys in all
1735 dictionaries must be the same, and may correspond to either a
1736 value in the ``where`` dictionary or the name of a column to be
1737 updated.
1739 Returns
1740 -------
1741 count : `int`
1742 Number of rows matched (regardless of whether the update actually
1743 modified them).
1745 Raises
1746 ------
1747 ReadOnlyDatabaseError
1748 Raised if `isWriteable` returns `False` when this method is called.
1750 Notes
1751 -----
1752 May be used inside transaction contexts, so implementations may not
1753 perform operations that interrupt transactions.
1755 The default implementation should be sufficient for most derived
1756 classes.
1757 """
1758 self.assertTableWriteable(table, f"Cannot update read-only table {table}.")
1759 if not rows:
1760 return 0
1761 sql = table.update().where(
1762 sqlalchemy.sql.and_(*[table.columns[k] == sqlalchemy.sql.bindparam(v) for k, v in where.items()])
1763 )
1764 with self._transaction() as (_, connection):
1765 return connection.execute(sql, rows).rowcount
1767 @contextmanager
1768 def query(
1769 self,
1770 sql: sqlalchemy.sql.expression.Executable | sqlalchemy.sql.expression.SelectBase,
1771 *args: Any,
1772 **kwargs: Any,
1773 ) -> Iterator[sqlalchemy.engine.CursorResult]:
1774 """Run a SELECT query against the database.
1776 Parameters
1777 ----------
1778 sql : `sqlalchemy.sql.expression.SelectBase`
1779 A SQLAlchemy representation of a ``SELECT`` query.
1780 *args
1781 Additional positional arguments are forwarded to
1782 `sqlalchemy.engine.Connection.execute`.
1783 **kwargs
1784 Additional keyword arguments are forwarded to
1785 `sqlalchemy.engine.Connection.execute`.
1787 Returns
1788 -------
1789 result_context : `sqlalchemy.engine.CursorResults`
1790 Context manager that returns the query result object when entered.
1791 These results are invalidated when the context is exited.
1792 """
1793 if self._session_connection is None:
1794 connection = self._engine.connect()
1795 else:
1796 connection = self._session_connection
1797 # TODO: SelectBase is not good for execute(), but it used everywhere,
1798 # e.g. in daf_relation. We should switch to Executable at some point.
1799 result = connection.execute(cast(sqlalchemy.sql.expression.Executable, sql), *args, **kwargs)
1800 try:
1801 yield result
1802 finally:
1803 if connection is not self._session_connection:
1804 connection.close()
1806 @abstractmethod
1807 def constant_rows(
1808 self,
1809 fields: NamedValueAbstractSet[ddl.FieldSpec],
1810 *rows: dict,
1811 name: str | None = None,
1812 ) -> sqlalchemy.sql.FromClause:
1813 """Return a SQLAlchemy object that represents a small number of
1814 constant-valued rows.
1816 Parameters
1817 ----------
1818 fields : `NamedValueAbstractSet` [ `ddl.FieldSpec` ]
1819 The columns of the rows. Unique and foreign key constraints are
1820 ignored.
1821 *rows : `dict`
1822 Values for the rows.
1823 name : `str`, optional
1824 If provided, the name of the SQL construct. If not provided, an
1825 opaque but unique identifier is generated.
1827 Returns
1828 -------
1829 from_clause : `sqlalchemy.sql.FromClause`
1830 SQLAlchemy object representing the given rows. This is guaranteed
1831 to be something that can be directly joined into a ``SELECT``
1832 query's ``FROM`` clause, and will not involve a temporary table
1833 that needs to be cleaned up later.
1835 Notes
1836 -----
1837 The default implementation uses the SQL-standard ``VALUES`` construct,
1838 but support for that construct is varied enough across popular RDBMSs
1839 that the method is still marked abstract to force explicit opt-in via
1840 delegation to `super`.
1841 """
1842 if name is None:
1843 name = f"tmp_{uuid.uuid4().hex}"
1844 return sqlalchemy.sql.values(
1845 *[sqlalchemy.Column(field.name, field.getSizedColumnType()) for field in fields],
1846 name=name,
1847 ).data([tuple(row[name] for name in fields.names) for row in rows])
1849 def get_constant_rows_max(self) -> int:
1850 """Return the maximum number of rows that should be passed to
1851 `constant_rows` for this backend.
1853 Returns
1854 -------
1855 max : `int`
1856 Maximum number of rows.
1858 Notes
1859 -----
1860 This should reflect typical performance profiles (or a guess at these),
1861 not just hard database engine limits.
1862 """
1863 return 100
1865 origin: int
1866 """An integer ID that should be used as the default for any datasets,
1867 quanta, or other entities that use a (autoincrement, origin) compound
1868 primary key (`int`).
1869 """
1871 namespace: str | None
1872 """The schema or namespace this database instance is associated with
1873 (`str` or `None`).
1874 """