Coverage for python/lsst/daf/butler/registry/interfaces/_database.py: 23%
411 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-01 11:00 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-01 11:00 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
27from __future__ import annotations
29from ... import ddl, time_utils
31__all__ = [
32 "Database",
33 "ReadOnlyDatabaseError",
34 "DatabaseConflictError",
35 "DatabaseInsertMode",
36 "SchemaAlreadyDefinedError",
37 "StaticTablesContext",
38]
40import enum
41import uuid
42import warnings
43from abc import ABC, abstractmethod
44from collections import defaultdict
45from collections.abc import Callable, Iterable, Iterator, Sequence
46from contextlib import contextmanager
47from typing import Any, cast, final
49import astropy.time
50import sqlalchemy
52from ..._named import NamedValueAbstractSet
53from ..._timespan import TimespanDatabaseRepresentation
54from .._exceptions import ConflictingDefinitionError
57class DatabaseInsertMode(enum.Enum):
58 """Mode options available for inserting database records."""
60 INSERT = enum.auto()
61 """Insert records, failing if they already exist."""
63 REPLACE = enum.auto()
64 """Replace records, overwriting existing."""
66 ENSURE = enum.auto()
67 """Insert records, skipping any that already exist."""
70# TODO: method is called with list[ReflectedColumn] in SA 2, and
71# ReflectedColumn does not exist in 1.4.
72def _checkExistingTableDefinition(name: str, spec: ddl.TableSpec, inspection: list) -> None:
73 """Test that the definition of a table in a `ddl.TableSpec` and from
74 database introspection are consistent.
76 Parameters
77 ----------
78 name : `str`
79 Name of the table (only used in error messages).
80 spec : `ddl.TableSpec`
81 Specification of the table.
82 inspection : `dict`
83 Dictionary returned by
84 `sqlalchemy.engine.reflection.Inspector.get_columns`.
86 Raises
87 ------
88 DatabaseConflictError
89 Raised if the definitions are inconsistent.
90 """
91 columnNames = [c["name"] for c in inspection]
92 if spec.fields.names != set(columnNames):
93 raise DatabaseConflictError(
94 f"Table '{name}' exists but is defined differently in the database; "
95 f"specification has columns {list(spec.fields.names)}, while the "
96 f"table in the database has {columnNames}."
97 )
100class ReadOnlyDatabaseError(RuntimeError):
101 """Exception raised when a write operation is called on a read-only
102 `Database`.
103 """
106class DatabaseConflictError(ConflictingDefinitionError):
107 """Exception raised when database content (row values or schema entities)
108 are inconsistent with what this client expects.
109 """
112class SchemaAlreadyDefinedError(RuntimeError):
113 """Exception raised when trying to initialize database schema when some
114 tables already exist.
115 """
118class StaticTablesContext:
119 """Helper class used to declare the static schema for a registry layer
120 in a database.
122 An instance of this class is returned by `Database.declareStaticTables`,
123 which should be the only way it should be constructed.
124 """
126 def __init__(self, db: Database, connection: sqlalchemy.engine.Connection):
127 self._db = db
128 self._foreignKeys: list[tuple[sqlalchemy.schema.Table, sqlalchemy.schema.ForeignKeyConstraint]] = []
129 self._inspector = sqlalchemy.inspect(connection)
130 self._tableNames = frozenset(self._inspector.get_table_names(schema=self._db.namespace))
131 self._initializers: list[Callable[[Database], None]] = []
133 def addTable(self, name: str, spec: ddl.TableSpec) -> sqlalchemy.schema.Table:
134 """Add a new table to the schema, returning its sqlalchemy
135 representation.
137 The new table may not actually be created until the end of the
138 context created by `Database.declareStaticTables`, allowing tables
139 to be declared in any order even in the presence of foreign key
140 relationships.
141 """
142 name = self._db._mangleTableName(name)
143 metadata = self._db._metadata
144 assert metadata is not None, "Guaranteed by context manager that returns this object."
145 table = self._db._convertTableSpec(name, spec, metadata)
146 for foreignKeySpec in spec.foreignKeys:
147 self._foreignKeys.append((table, self._db._convertForeignKeySpec(name, foreignKeySpec, metadata)))
148 return table
150 def addTableTuple(self, specs: tuple[ddl.TableSpec, ...]) -> tuple[sqlalchemy.schema.Table, ...]:
151 """Add a named tuple of tables to the schema, returning their
152 SQLAlchemy representations in a named tuple of the same type.
154 The new tables may not actually be created until the end of the
155 context created by `Database.declareStaticTables`, allowing tables
156 to be declared in any order even in the presence of foreign key
157 relationships.
159 Notes
160 -----
161 ``specs`` *must* be an instance of a type created by
162 `collections.namedtuple`, not just regular tuple, and the returned
163 object is guaranteed to be the same. Because `~collections.namedtuple`
164 is just a factory for `type` objects, not an actual type itself,
165 we cannot represent this with type annotations.
166 """
167 return specs._make( # type: ignore
168 self.addTable(name, spec) for name, spec in zip(specs._fields, specs, strict=True) # type: ignore
169 )
171 def addInitializer(self, initializer: Callable[[Database], None]) -> None:
172 """Add a method that does one-time initialization of a database.
174 Initialization can mean anything that changes state of a database
175 and needs to be done exactly once after database schema was created.
176 An example for that could be population of schema attributes.
178 Parameters
179 ----------
180 initializer : callable
181 Method of a single argument which is a `Database` instance.
182 """
183 self._initializers.append(initializer)
186class Database(ABC):
187 """An abstract interface that represents a particular database engine's
188 representation of a single schema/namespace/database.
190 Parameters
191 ----------
192 origin : `int`
193 An integer ID that should be used as the default for any datasets,
194 quanta, or other entities that use a (autoincrement, origin) compound
195 primary key.
196 engine : `sqlalchemy.engine.Engine`
197 The SQLAlchemy engine for this `Database`.
198 namespace : `str`, optional
199 Name of the schema or namespace this instance is associated with.
200 This is passed as the ``schema`` argument when constructing a
201 `sqlalchemy.schema.MetaData` instance. We use ``namespace`` instead to
202 avoid confusion between "schema means namespace" and "schema means
203 table definitions".
205 Notes
206 -----
207 `Database` requires all write operations to go through its special named
208 methods. Our write patterns are sufficiently simple that we don't really
209 need the full flexibility of SQL insert/update/delete syntax, and we need
210 non-standard (but common) functionality in these operations sufficiently
211 often that it seems worthwhile to provide our own generic API.
213 In contrast, `Database.query` allows arbitrary ``SELECT`` queries (via
214 their SQLAlchemy representation) to be run, as we expect these to require
215 significantly more sophistication while still being limited to standard
216 SQL.
218 `Database` itself has several underscore-prefixed attributes:
220 - ``_engine``: SQLAlchemy object representing its engine.
221 - ``_connection``: method returning a context manager for
222 `sqlalchemy.engine.Connection` object.
223 - ``_metadata``: the `sqlalchemy.schema.MetaData` object representing
224 the tables and other schema entities.
226 These are considered protected (derived classes may access them, but other
227 code should not), and read-only, aside from executing SQL via
228 ``_connection``.
229 """
231 def __init__(self, *, origin: int, engine: sqlalchemy.engine.Engine, namespace: str | None = None):
232 self.origin = origin
233 self.namespace = namespace
234 self._engine = engine
235 self._session_connection: sqlalchemy.engine.Connection | None = None
236 self._metadata: sqlalchemy.schema.MetaData | None = None
237 self._temp_tables: set[str] = set()
239 def __repr__(self) -> str:
240 # Rather than try to reproduce all the parameters used to create
241 # the object, instead report the more useful information of the
242 # connection URL.
243 if self._engine.url.password is not None:
244 uri = str(self._engine.url.set(password="***"))
245 else:
246 uri = str(self._engine.url)
247 if self.namespace:
248 uri += f"#{self.namespace}"
249 return f'{type(self).__name__}("{uri}")'
251 @classmethod
252 def makeDefaultUri(cls, root: str) -> str | None:
253 """Create a default connection URI appropriate for the given root
254 directory, or `None` if there can be no such default.
255 """
256 return None
258 @classmethod
259 def fromUri(
260 cls,
261 uri: str | sqlalchemy.engine.URL,
262 *,
263 origin: int,
264 namespace: str | None = None,
265 writeable: bool = True,
266 ) -> Database:
267 """Construct a database from a SQLAlchemy URI.
269 Parameters
270 ----------
271 uri : `str` or `sqlalchemy.engine.URL`
272 A SQLAlchemy URI connection string.
273 origin : `int`
274 An integer ID that should be used as the default for any datasets,
275 quanta, or other entities that use a (autoincrement, origin)
276 compound primary key.
277 namespace : `str`, optional
278 A database namespace (i.e. schema) the new instance should be
279 associated with. If `None` (default), the namespace (if any) is
280 inferred from the URI.
281 writeable : `bool`, optional
282 If `True`, allow write operations on the database, including
283 ``CREATE TABLE``.
285 Returns
286 -------
287 db : `Database`
288 A new `Database` instance.
289 """
290 return cls.fromEngine(
291 cls.makeEngine(uri, writeable=writeable), origin=origin, namespace=namespace, writeable=writeable
292 )
294 @classmethod
295 @abstractmethod
296 def makeEngine(
297 cls, uri: str | sqlalchemy.engine.URL, *, writeable: bool = True
298 ) -> sqlalchemy.engine.Engine:
299 """Create a `sqlalchemy.engine.Engine` from a SQLAlchemy URI.
301 Parameters
302 ----------
303 uri : `str` or `sqlalchemy.engine.URL`
304 A SQLAlchemy URI connection string.
305 writeable : `bool`, optional
306 If `True`, allow write operations on the database, including
307 ``CREATE TABLE``.
309 Returns
310 -------
311 engine : `sqlalchemy.engine.Engine`
312 A database engine.
314 Notes
315 -----
316 Subclasses that support other ways to connect to a database are
317 encouraged to add optional arguments to their implementation of this
318 method, as long as they maintain compatibility with the base class
319 call signature.
320 """
321 raise NotImplementedError()
323 @classmethod
324 @abstractmethod
325 def fromEngine(
326 cls,
327 engine: sqlalchemy.engine.Engine,
328 *,
329 origin: int,
330 namespace: str | None = None,
331 writeable: bool = True,
332 ) -> Database:
333 """Create a new `Database` from an existing `sqlalchemy.engine.Engine`.
335 Parameters
336 ----------
337 engine : `sqlalchemy.engine.Engine`
338 The engine for the database. May be shared between `Database`
339 instances.
340 origin : `int`
341 An integer ID that should be used as the default for any datasets,
342 quanta, or other entities that use a (autoincrement, origin)
343 compound primary key.
344 namespace : `str`, optional
345 A different database namespace (i.e. schema) the new instance
346 should be associated with. If `None` (default), the namespace
347 (if any) is inferred from the connection.
348 writeable : `bool`, optional
349 If `True`, allow write operations on the database, including
350 ``CREATE TABLE``.
352 Returns
353 -------
354 db : `Database`
355 A new `Database` instance.
357 Notes
358 -----
359 This method allows different `Database` instances to share the same
360 engine, which is desirable when they represent different namespaces
361 can be queried together.
362 """
363 raise NotImplementedError()
365 @final
366 @contextmanager
367 def session(self) -> Iterator[None]:
368 """Return a context manager that represents a session (persistent
369 connection to a database).
371 Returns
372 -------
373 context : `AbstractContextManager` [ `None` ]
374 A context manager that does not return a value when entered.
376 Notes
377 -----
378 This method should be used when a sequence of read-only SQL operations
379 will be performed in rapid succession *without* a requirement that they
380 yield consistent results in the presence of concurrent writes (or, more
381 rarely, when conflicting concurrent writes are rare/impossible and the
382 session will be open long enough that a transaction is inadvisable).
383 """
384 with self._session():
385 yield
387 @final
388 @contextmanager
389 def transaction(
390 self,
391 *,
392 interrupting: bool = False,
393 savepoint: bool = False,
394 lock: Iterable[sqlalchemy.schema.Table] = (),
395 for_temp_tables: bool = False,
396 ) -> Iterator[None]:
397 """Return a context manager that represents a transaction.
399 Parameters
400 ----------
401 interrupting : `bool`, optional
402 If `True` (`False` is default), this transaction block may not be
403 nested without an outer one, and attempting to do so is a logic
404 (i.e. assertion) error.
405 savepoint : `bool`, optional
406 If `True` (`False` is default), create a `SAVEPOINT`, allowing
407 exceptions raised by the database (e.g. due to constraint
408 violations) during this transaction's context to be caught outside
409 it without also rolling back all operations in an outer transaction
410 block. If `False`, transactions may still be nested, but a
411 rollback may be generated at any level and affects all levels, and
412 commits are deferred until the outermost block completes. If any
413 outer transaction block was created with ``savepoint=True``, all
414 inner blocks will be as well (regardless of the actual value
415 passed). This has no effect if this is the outermost transaction.
416 lock : `~collections.abc.Iterable` [ `sqlalchemy.schema.Table` ], \
417 optional
418 A list of tables to lock for the duration of this transaction.
419 These locks are guaranteed to prevent concurrent writes and allow
420 this transaction (only) to acquire the same locks (others should
421 block), but only prevent concurrent reads if the database engine
422 requires that in order to block concurrent writes.
423 for_temp_tables : `bool`, optional
424 If `True`, this transaction may involve creating temporary tables.
426 Returns
427 -------
428 context : `AbstractContextManager` [ `None` ]
429 A context manager that commits the transaction when it is exited
430 without error and rolls back the transactoin when it is exited via
431 an exception.
433 Notes
434 -----
435 All transactions on a connection managed by one or more `Database`
436 instances _must_ go through this method, or transaction state will not
437 be correctly managed.
438 """
439 with self._transaction(
440 interrupting=interrupting, savepoint=savepoint, lock=lock, for_temp_tables=for_temp_tables
441 ):
442 yield
444 @contextmanager
445 def temporary_table(
446 self, spec: ddl.TableSpec, name: str | None = None
447 ) -> Iterator[sqlalchemy.schema.Table]:
448 """Return a context manager that creates and then drops a temporary
449 table.
451 Parameters
452 ----------
453 spec : `ddl.TableSpec`
454 Specification for the columns. Unique and foreign key constraints
455 may be ignored.
456 name : `str`, optional
457 If provided, the name of the SQL construct. If not provided, an
458 opaque but unique identifier is generated.
460 Returns
461 -------
462 context : `AbstractContextManager` [ `sqlalchemy.schema.Table` ]
463 A context manager that returns a SQLAlchemy representation of the
464 temporary table when entered.
466 Notes
467 -----
468 Temporary tables may be created, dropped, and written to even in
469 read-only databases - at least according to the Python-level
470 protections in the `Database` classes. Server permissions may say
471 otherwise, but in that case they probably need to be modified to
472 support the full range of expected read-only butler behavior.
473 """
474 with self._session() as connection:
475 table = self._make_temporary_table(connection, spec=spec, name=name)
476 self._temp_tables.add(table.key)
477 try:
478 yield table
479 finally:
480 with self._transaction():
481 table.drop(connection)
482 self._temp_tables.remove(table.key)
484 @contextmanager
485 def _session(self) -> Iterator[sqlalchemy.engine.Connection]:
486 """Protected implementation for `session` that actually returns the
487 connection.
489 This method is for internal `Database` calls that need the actual
490 SQLAlchemy connection object. It should be overridden by subclasses
491 instead of `session` itself.
493 Returns
494 -------
495 context : `AbstractContextManager` [ `sqlalchemy.engine.Connection` ]
496 A context manager that returns a SQLALchemy connection when
497 entered.
499 """
500 if self._session_connection is not None:
501 # session already started, just reuse that
502 yield self._session_connection
503 else:
504 try:
505 # open new connection and close it when done
506 self._session_connection = self._engine.connect()
507 yield self._session_connection
508 finally:
509 if self._session_connection is not None:
510 self._session_connection.close()
511 self._session_connection = None
512 # Temporary tables only live within session
513 self._temp_tables = set()
515 @contextmanager
516 def _transaction(
517 self,
518 *,
519 interrupting: bool = False,
520 savepoint: bool = False,
521 lock: Iterable[sqlalchemy.schema.Table] = (),
522 for_temp_tables: bool = False,
523 ) -> Iterator[tuple[bool, sqlalchemy.engine.Connection]]:
524 """Protected implementation for `transaction` that actually returns the
525 connection and whether this is a new outermost transaction.
527 This method is for internal `Database` calls that need the actual
528 SQLAlchemy connection object. It should be overridden by subclasses
529 instead of `transaction` itself.
531 Parameters
532 ----------
533 interrupting : `bool`, optional
534 If `True` (`False` is default), this transaction block may not be
535 nested without an outer one, and attempting to do so is a logic
536 (i.e. assertion) error.
537 savepoint : `bool`, optional
538 If `True` (`False` is default), create a `SAVEPOINT`, allowing
539 exceptions raised by the database (e.g. due to constraint
540 violations) during this transaction's context to be caught outside
541 it without also rolling back all operations in an outer transaction
542 block. If `False`, transactions may still be nested, but a
543 rollback may be generated at any level and affects all levels, and
544 commits are deferred until the outermost block completes. If any
545 outer transaction block was created with ``savepoint=True``, all
546 inner blocks will be as well (regardless of the actual value
547 passed). This has no effect if this is the outermost transaction.
548 lock : `~collections.abc.Iterable` [ `sqlalchemy.schema.Table` ], \
549 optional
550 A list of tables to lock for the duration of this transaction.
551 These locks are guaranteed to prevent concurrent writes and allow
552 this transaction (only) to acquire the same locks (others should
553 block), but only prevent concurrent reads if the database engine
554 requires that in order to block concurrent writes.
555 for_temp_tables : `bool`, optional
556 If `True`, this transaction may involve creating temporary tables.
558 Returns
559 -------
560 context : `AbstractContextManager` [ `tuple` [ `bool`,
561 `sqlalchemy.engine.Connection` ] ]
562 A context manager that commits the transaction when it is exited
563 without error and rolls back the transactoin when it is exited via
564 an exception. When entered, it returns a tuple of:
566 - ``is_new`` (`bool`): whether this is a new (outermost)
567 transaction;
568 - ``connection`` (`sqlalchemy.engine.Connection`): the connection.
569 """
570 with self._session() as connection:
571 already_in_transaction = connection.in_transaction()
572 assert not (interrupting and already_in_transaction), (
573 "Logic error in transaction nesting: an operation that would "
574 "interrupt the active transaction context has been requested."
575 )
576 savepoint = savepoint or connection.in_nested_transaction()
577 trans: sqlalchemy.engine.Transaction | None
578 if already_in_transaction:
579 if savepoint:
580 trans = connection.begin_nested()
581 else:
582 # Nested non-savepoint transactions don't do anything.
583 trans = None
584 else:
585 # Use a regular (non-savepoint) transaction always for the
586 # outermost context.
587 trans = connection.begin()
588 self._lockTables(connection, lock)
589 try:
590 yield not already_in_transaction, connection
591 if trans is not None:
592 trans.commit()
593 except BaseException:
594 if trans is not None:
595 trans.rollback()
596 raise
598 @abstractmethod
599 def _lockTables(
600 self, connection: sqlalchemy.engine.Connection, tables: Iterable[sqlalchemy.schema.Table] = ()
601 ) -> None:
602 """Acquire locks on the given tables.
604 This is an implementation hook for subclasses, called by `transaction`.
605 It should not be called directly by other code.
607 Parameters
608 ----------
609 connection : `sqlalchemy.engine.Connection`
610 Database connection object. It is guaranteed that transaction is
611 already in a progress for this connection.
612 tables : `~collections.abc.Iterable` [ `sqlalchemy.schema.Table` ], \
613 optional
614 A list of tables to lock for the duration of this transaction.
615 These locks are guaranteed to prevent concurrent writes and allow
616 this transaction (only) to acquire the same locks (others should
617 block), but only prevent concurrent reads if the database engine
618 requires that in order to block concurrent writes.
619 """
620 raise NotImplementedError()
622 def isTableWriteable(self, table: sqlalchemy.schema.Table) -> bool:
623 """Check whether a table is writeable, either because the database
624 connection is read-write or the table is a temporary table.
626 Parameters
627 ----------
628 table : `sqlalchemy.schema.Table`
629 SQLAlchemy table object to check.
631 Returns
632 -------
633 writeable : `bool`
634 Whether this table is writeable.
635 """
636 return self.isWriteable() or table.key in self._temp_tables
638 def assertTableWriteable(self, table: sqlalchemy.schema.Table, msg: str) -> None:
639 """Raise if the given table is not writeable, either because the
640 database connection is read-write or the table is a temporary table.
642 Parameters
643 ----------
644 table : `sqlalchemy.schema.Table`
645 SQLAlchemy table object to check.
646 msg : `str`, optional
647 If provided, raise `ReadOnlyDatabaseError` instead of returning
648 `False`, with this message.
649 """
650 if not self.isTableWriteable(table):
651 raise ReadOnlyDatabaseError(msg)
653 @contextmanager
654 def declareStaticTables(self, *, create: bool) -> Iterator[StaticTablesContext]:
655 """Return a context manager in which the database's static DDL schema
656 can be declared.
658 Parameters
659 ----------
660 create : `bool`
661 If `True`, attempt to create all tables at the end of the context.
662 If `False`, they will be assumed to already exist.
664 Returns
665 -------
666 schema : `StaticTablesContext`
667 A helper object that is used to add new tables.
669 Raises
670 ------
671 ReadOnlyDatabaseError
672 Raised if ``create`` is `True`, `Database.isWriteable` is `False`,
673 and one or more declared tables do not already exist.
675 Examples
676 --------
677 Given a `Database` instance ``db``::
679 with db.declareStaticTables(create=True) as schema:
680 schema.addTable("table1", TableSpec(...))
681 schema.addTable("table2", TableSpec(...))
683 Notes
684 -----
685 A database's static DDL schema must be declared before any dynamic
686 tables are managed via calls to `ensureTableExists` or
687 `getExistingTable`. The order in which static schema tables are added
688 inside the context block is unimportant; they will automatically be
689 sorted and added in an order consistent with their foreign key
690 relationships.
691 """
692 if create and not self.isWriteable():
693 raise ReadOnlyDatabaseError(f"Cannot create tables in read-only database {self}.")
694 self._metadata = sqlalchemy.MetaData(schema=self.namespace)
695 try:
696 with self._transaction() as (_, connection):
697 context = StaticTablesContext(self, connection)
698 if create and context._tableNames:
699 # Looks like database is already initalized, to avoid
700 # danger of modifying/destroying valid schema we refuse to
701 # do anything in this case
702 raise SchemaAlreadyDefinedError(f"Cannot create tables in non-empty database {self}.")
703 yield context
704 for table, foreignKey in context._foreignKeys:
705 table.append_constraint(foreignKey)
706 if create:
707 if (
708 self.namespace is not None
709 and self.namespace not in context._inspector.get_schema_names()
710 ):
711 connection.execute(sqlalchemy.schema.CreateSchema(self.namespace))
712 # In our tables we have columns that make use of sqlalchemy
713 # Sequence objects. There is currently a bug in sqlalchemy
714 # that causes a deprecation warning to be thrown on a
715 # property of the Sequence object when the repr for the
716 # sequence is created. Here a filter is used to catch these
717 # deprecation warnings when tables are created.
718 with warnings.catch_warnings():
719 warnings.simplefilter("ignore", category=sqlalchemy.exc.SADeprecationWarning)
720 self._metadata.create_all(connection)
721 # call all initializer methods sequentially
722 for init in context._initializers:
723 init(self)
724 except BaseException:
725 self._metadata = None
726 raise
728 @abstractmethod
729 def isWriteable(self) -> bool:
730 """Return `True` if this database can be modified by this client."""
731 raise NotImplementedError()
733 @abstractmethod
734 def __str__(self) -> str:
735 """Return a human-readable identifier for this `Database`, including
736 any namespace or schema that identifies its names within a `Registry`.
737 """
738 raise NotImplementedError()
740 @property
741 def dialect(self) -> sqlalchemy.engine.Dialect:
742 """The SQLAlchemy dialect for this database engine
743 (`sqlalchemy.engine.Dialect`).
744 """
745 return self._engine.dialect
747 def shrinkDatabaseEntityName(self, original: str) -> str:
748 """Return a version of the given name that fits within this database
749 engine's length limits for table, constraint, indexes, and sequence
750 names.
752 Implementations should not assume that simple truncation is safe,
753 because multiple long names often begin with the same prefix.
755 The default implementation simply returns the given name.
757 Parameters
758 ----------
759 original : `str`
760 The original name.
762 Returns
763 -------
764 shrunk : `str`
765 The new, possibly shortened name.
766 """
767 return original
769 def expandDatabaseEntityName(self, shrunk: str) -> str:
770 """Retrieve the original name for a database entity that was too long
771 to fit within the database engine's limits.
773 Parameters
774 ----------
775 original : `str`
776 The original name.
778 Returns
779 -------
780 shrunk : `str`
781 The new, possibly shortened name.
782 """
783 return shrunk
785 def _mangleTableName(self, name: str) -> str:
786 """Map a logical, user-visible table name to the true table name used
787 in the database.
789 The default implementation returns the given name unchanged.
791 Parameters
792 ----------
793 name : `str`
794 Input table name. Should not include a namespace (i.e. schema)
795 prefix.
797 Returns
798 -------
799 mangled : `str`
800 Mangled version of the table name (still with no namespace prefix).
802 Notes
803 -----
804 Reimplementations of this method must be idempotent - mangling an
805 already-mangled name must have no effect.
806 """
807 return name
809 def _makeColumnConstraints(self, table: str, spec: ddl.FieldSpec) -> list[sqlalchemy.CheckConstraint]:
810 """Create constraints based on this spec.
812 Parameters
813 ----------
814 table : `str`
815 Name of the table this column is being added to.
816 spec : `FieldSpec`
817 Specification for the field to be added.
819 Returns
820 -------
821 constraint : `list` of `sqlalchemy.CheckConstraint`
822 Constraint added for this column.
823 """
824 # By default we return no additional constraints
825 return []
827 def _convertFieldSpec(
828 self, table: str, spec: ddl.FieldSpec, metadata: sqlalchemy.MetaData, **kwargs: Any
829 ) -> sqlalchemy.schema.Column:
830 """Convert a `FieldSpec` to a `sqlalchemy.schema.Column`.
832 Parameters
833 ----------
834 table : `str`
835 Name of the table this column is being added to.
836 spec : `FieldSpec`
837 Specification for the field to be added.
838 metadata : `sqlalchemy.MetaData`
839 SQLAlchemy representation of the DDL schema this field's table is
840 being added to.
841 **kwargs
842 Additional keyword arguments to forward to the
843 `sqlalchemy.schema.Column` constructor. This is provided to make
844 it easier for derived classes to delegate to ``super()`` while
845 making only minor changes.
847 Returns
848 -------
849 column : `sqlalchemy.schema.Column`
850 SQLAlchemy representation of the field.
851 """
852 args = []
853 if spec.autoincrement:
854 # Generate a sequence to use for auto incrementing for databases
855 # that do not support it natively. This will be ignored by
856 # sqlalchemy for databases that do support it.
857 args.append(
858 sqlalchemy.Sequence(
859 self.shrinkDatabaseEntityName(f"{table}_seq_{spec.name}"), metadata=metadata
860 )
861 )
862 assert spec.doc is None or isinstance(spec.doc, str), f"Bad doc for {table}.{spec.name}."
863 return sqlalchemy.schema.Column(
864 spec.name,
865 spec.getSizedColumnType(),
866 *args,
867 nullable=spec.nullable,
868 primary_key=spec.primaryKey,
869 comment=spec.doc,
870 server_default=spec.default,
871 **kwargs,
872 )
874 def _convertForeignKeySpec(
875 self, table: str, spec: ddl.ForeignKeySpec, metadata: sqlalchemy.MetaData, **kwargs: Any
876 ) -> sqlalchemy.schema.ForeignKeyConstraint:
877 """Convert a `ForeignKeySpec` to a
878 `sqlalchemy.schema.ForeignKeyConstraint`.
880 Parameters
881 ----------
882 table : `str`
883 Name of the table this foreign key is being added to.
884 spec : `ForeignKeySpec`
885 Specification for the foreign key to be added.
886 metadata : `sqlalchemy.MetaData`
887 SQLAlchemy representation of the DDL schema this constraint is
888 being added to.
889 **kwargs
890 Additional keyword arguments to forward to the
891 `sqlalchemy.schema.ForeignKeyConstraint` constructor. This is
892 provided to make it easier for derived classes to delegate to
893 ``super()`` while making only minor changes.
895 Returns
896 -------
897 constraint : `sqlalchemy.schema.ForeignKeyConstraint`
898 SQLAlchemy representation of the constraint.
899 """
900 name = self.shrinkDatabaseEntityName(
901 "_".join(
902 ["fkey", table, self._mangleTableName(spec.table)] + list(spec.target) + list(spec.source)
903 )
904 )
905 return sqlalchemy.schema.ForeignKeyConstraint(
906 spec.source,
907 [f"{self._mangleTableName(spec.table)}.{col}" for col in spec.target],
908 name=name,
909 ondelete=spec.onDelete,
910 )
912 def _convertExclusionConstraintSpec(
913 self,
914 table: str,
915 spec: tuple[str | type[TimespanDatabaseRepresentation], ...],
916 metadata: sqlalchemy.MetaData,
917 ) -> sqlalchemy.schema.Constraint:
918 """Convert a `tuple` from `ddl.TableSpec.exclusion` into a SQLAlchemy
919 constraint representation.
921 Parameters
922 ----------
923 table : `str`
924 Name of the table this constraint is being added to.
925 spec : `tuple` [ `str` or `type` ]
926 A tuple of `str` column names and the `type` object returned by
927 `getTimespanRepresentation` (which must appear exactly once),
928 indicating the order of the columns in the index used to back the
929 constraint.
930 metadata : `sqlalchemy.MetaData`
931 SQLAlchemy representation of the DDL schema this constraint is
932 being added to.
934 Returns
935 -------
936 constraint : `sqlalchemy.schema.Constraint`
937 SQLAlchemy representation of the constraint.
939 Raises
940 ------
941 NotImplementedError
942 Raised if this database does not support exclusion constraints.
943 """
944 raise NotImplementedError(f"Database {self} does not support exclusion constraints.")
946 def _convertTableSpec(
947 self, name: str, spec: ddl.TableSpec, metadata: sqlalchemy.MetaData, **kwargs: Any
948 ) -> sqlalchemy.schema.Table:
949 """Convert a `TableSpec` to a `sqlalchemy.schema.Table`.
951 Parameters
952 ----------
953 spec : `TableSpec`
954 Specification for the foreign key to be added.
955 metadata : `sqlalchemy.MetaData`
956 SQLAlchemy representation of the DDL schema this table is being
957 added to.
958 **kwargs
959 Additional keyword arguments to forward to the
960 `sqlalchemy.schema.Table` constructor. This is provided to make it
961 easier for derived classes to delegate to ``super()`` while making
962 only minor changes.
964 Returns
965 -------
966 table : `sqlalchemy.schema.Table`
967 SQLAlchemy representation of the table.
969 Notes
970 -----
971 This method does not handle ``spec.foreignKeys`` at all, in order to
972 avoid circular dependencies. These are added by higher-level logic in
973 `ensureTableExists`, `getExistingTable`, and `declareStaticTables`.
974 """
975 name = self._mangleTableName(name)
976 args: list[sqlalchemy.schema.SchemaItem] = [
977 self._convertFieldSpec(name, fieldSpec, metadata) for fieldSpec in spec.fields
978 ]
980 # Add any column constraints
981 for fieldSpec in spec.fields:
982 args.extend(self._makeColumnConstraints(name, fieldSpec))
984 # Track indexes added for primary key and unique constraints, to make
985 # sure we don't add duplicate explicit or foreign key indexes for
986 # those.
987 allIndexes = {tuple(fieldSpec.name for fieldSpec in spec.fields if fieldSpec.primaryKey)}
988 args.extend(
989 sqlalchemy.schema.UniqueConstraint(
990 *columns, name=self.shrinkDatabaseEntityName("_".join([name, "unq"] + list(columns)))
991 )
992 for columns in spec.unique
993 )
994 allIndexes.update(spec.unique)
995 args.extend(
996 sqlalchemy.schema.Index(
997 self.shrinkDatabaseEntityName("_".join([name, "idx"] + list(index.columns))),
998 *index.columns,
999 unique=(index.columns in spec.unique),
1000 **index.kwargs,
1001 )
1002 for index in spec.indexes
1003 if index.columns not in allIndexes
1004 )
1005 allIndexes.update(index.columns for index in spec.indexes)
1006 args.extend(
1007 sqlalchemy.schema.Index(
1008 self.shrinkDatabaseEntityName("_".join((name, "fkidx") + fk.source)),
1009 *fk.source,
1010 )
1011 for fk in spec.foreignKeys
1012 if fk.addIndex and fk.source not in allIndexes
1013 )
1015 args.extend(self._convertExclusionConstraintSpec(name, excl, metadata) for excl in spec.exclusion)
1017 assert spec.doc is None or isinstance(spec.doc, str), f"Bad doc for {name}."
1018 return sqlalchemy.schema.Table(name, metadata, *args, comment=spec.doc, info={"spec": spec}, **kwargs)
1020 def ensureTableExists(self, name: str, spec: ddl.TableSpec) -> sqlalchemy.schema.Table:
1021 """Ensure that a table with the given name and specification exists,
1022 creating it if necessary.
1024 Parameters
1025 ----------
1026 name : `str`
1027 Name of the table (not including namespace qualifiers).
1028 spec : `TableSpec`
1029 Specification for the table. This will be used when creating the
1030 table, and *may* be used when obtaining an existing table to check
1031 for consistency, but no such check is guaranteed.
1033 Returns
1034 -------
1035 table : `sqlalchemy.schema.Table`
1036 SQLAlchemy representation of the table.
1038 Raises
1039 ------
1040 ReadOnlyDatabaseError
1041 Raised if `isWriteable` returns `False`, and the table does not
1042 already exist.
1043 DatabaseConflictError
1044 Raised if the table exists but ``spec`` is inconsistent with its
1045 definition.
1047 Notes
1048 -----
1049 This method may not be called within transactions. It may be called on
1050 read-only databases if and only if the table does in fact already
1051 exist.
1053 Subclasses may override this method, but usually should not need to.
1054 """
1055 # TODO: if _engine is used to make a table then it uses separate
1056 # connection and should not interfere with current transaction
1057 assert (
1058 self._session_connection is None or not self._session_connection.in_transaction()
1059 ), "Table creation interrupts transactions."
1060 assert self._metadata is not None, "Static tables must be declared before dynamic tables."
1061 table = self.getExistingTable(name, spec)
1062 if table is not None:
1063 return table
1064 if not self.isWriteable():
1065 raise ReadOnlyDatabaseError(
1066 f"Table {name} does not exist, and cannot be created because database {self} is read-only."
1067 )
1068 table = self._convertTableSpec(name, spec, self._metadata)
1069 for foreignKeySpec in spec.foreignKeys:
1070 table.append_constraint(self._convertForeignKeySpec(name, foreignKeySpec, self._metadata))
1071 try:
1072 with self._transaction() as (_, connection):
1073 table.create(connection)
1074 except sqlalchemy.exc.DatabaseError:
1075 # Some other process could have created the table meanwhile, which
1076 # usually causes OperationalError or ProgrammingError. We cannot
1077 # use IF NOT EXISTS clause in this case due to PostgreSQL race
1078 # condition on server side which causes IntegrityError. Instead we
1079 # catch these exceptions (they all inherit DatabaseError) and
1080 # re-check whether table is now there.
1081 table = self.getExistingTable(name, spec)
1082 if table is None:
1083 raise
1084 return table
1086 def getExistingTable(self, name: str, spec: ddl.TableSpec) -> sqlalchemy.schema.Table | None:
1087 """Obtain an existing table with the given name and specification.
1089 Parameters
1090 ----------
1091 name : `str`
1092 Name of the table (not including namespace qualifiers).
1093 spec : `TableSpec`
1094 Specification for the table. This will be used when creating the
1095 SQLAlchemy representation of the table, and it is used to
1096 check that the actual table in the database is consistent.
1098 Returns
1099 -------
1100 table : `sqlalchemy.schema.Table` or `None`
1101 SQLAlchemy representation of the table, or `None` if it does not
1102 exist.
1104 Raises
1105 ------
1106 DatabaseConflictError
1107 Raised if the table exists but ``spec`` is inconsistent with its
1108 definition.
1110 Notes
1111 -----
1112 This method can be called within transactions and never modifies the
1113 database.
1115 Subclasses may override this method, but usually should not need to.
1116 """
1117 assert self._metadata is not None, "Static tables must be declared before dynamic tables."
1118 name = self._mangleTableName(name)
1119 table = self._metadata.tables.get(name if self.namespace is None else f"{self.namespace}.{name}")
1120 if table is not None:
1121 if spec.fields.names != set(table.columns.keys()):
1122 raise DatabaseConflictError(
1123 f"Table '{name}' has already been defined differently; the new "
1124 f"specification has columns {list(spec.fields.names)}, while "
1125 f"the previous definition has {list(table.columns.keys())}."
1126 )
1127 else:
1128 inspector = sqlalchemy.inspect(
1129 self._engine if self._session_connection is None else self._session_connection, raiseerr=True
1130 )
1131 if name in inspector.get_table_names(schema=self.namespace):
1132 _checkExistingTableDefinition(name, spec, inspector.get_columns(name, schema=self.namespace))
1133 table = self._convertTableSpec(name, spec, self._metadata)
1134 for foreignKeySpec in spec.foreignKeys:
1135 table.append_constraint(self._convertForeignKeySpec(name, foreignKeySpec, self._metadata))
1136 return table
1137 return table
1139 def _make_temporary_table(
1140 self,
1141 connection: sqlalchemy.engine.Connection,
1142 spec: ddl.TableSpec,
1143 name: str | None = None,
1144 **kwargs: Any,
1145 ) -> sqlalchemy.schema.Table:
1146 """Create a temporary table.
1148 Parameters
1149 ----------
1150 connection : `sqlalchemy.engine.Connection`
1151 Connection to use when creating the table.
1152 spec : `TableSpec`
1153 Specification for the table.
1154 name : `str`, optional
1155 A unique (within this session/connetion) name for the table.
1156 Subclasses may override to modify the actual name used. If not
1157 provided, a unique name will be generated.
1158 **kwargs
1159 Additional keyword arguments to forward to the
1160 `sqlalchemy.schema.Table` constructor. This is provided to make it
1161 easier for derived classes to delegate to ``super()`` while making
1162 only minor changes.
1164 Returns
1165 -------
1166 table : `sqlalchemy.schema.Table`
1167 SQLAlchemy representation of the table.
1168 """
1169 if name is None:
1170 name = f"tmp_{uuid.uuid4().hex}"
1171 metadata = self._metadata
1172 if metadata is None:
1173 raise RuntimeError("Cannot create temporary table before static schema is defined.")
1174 table = self._convertTableSpec(
1175 name, spec, metadata, prefixes=["TEMPORARY"], schema=sqlalchemy.schema.BLANK_SCHEMA, **kwargs
1176 )
1177 if table.key in self._temp_tables and table.key != name:
1178 raise ValueError(
1179 f"A temporary table with name {name} (transformed to {table.key} by "
1180 "Database) already exists."
1181 )
1182 for foreignKeySpec in spec.foreignKeys:
1183 table.append_constraint(self._convertForeignKeySpec(name, foreignKeySpec, metadata))
1184 with self._transaction():
1185 table.create(connection)
1186 return table
1188 @classmethod
1189 def getTimespanRepresentation(cls) -> type[TimespanDatabaseRepresentation]:
1190 """Return a `type` that encapsulates the way `Timespan` objects are
1191 stored in this database.
1193 `Database` does not automatically use the return type of this method
1194 anywhere else; calling code is responsible for making sure that DDL
1195 and queries are consistent with it.
1197 Returns
1198 -------
1199 TimespanReprClass : `type` (`TimespanDatabaseRepresention` subclass)
1200 A type that encapsulates the way `Timespan` objects should be
1201 stored in this database.
1203 Notes
1204 -----
1205 There are two big reasons we've decided to keep timespan-mangling logic
1206 outside the `Database` implementations, even though the choice of
1207 representation is ultimately up to a `Database` implementation:
1209 - Timespans appear in relatively few tables and queries in our
1210 typical usage, and the code that operates on them is already aware
1211 that it is working with timespans. In contrast, a
1212 timespan-representation-aware implementation of, say, `insert`,
1213 would need to have extra logic to identify when timespan-mangling
1214 needed to occur, which would usually be useless overhead.
1216 - SQLAlchemy's rich SELECT query expression system has no way to wrap
1217 multiple columns in a single expression object (the ORM does, but
1218 we are not using the ORM). So we would have to wrap _much_ more of
1219 that code in our own interfaces to encapsulate timespan
1220 representations there.
1221 """
1222 return TimespanDatabaseRepresentation.Compound
1224 def sync(
1225 self,
1226 table: sqlalchemy.schema.Table,
1227 *,
1228 keys: dict[str, Any],
1229 compared: dict[str, Any] | None = None,
1230 extra: dict[str, Any] | None = None,
1231 returning: Sequence[str] | None = None,
1232 update: bool = False,
1233 ) -> tuple[dict[str, Any] | None, bool | dict[str, Any]]:
1234 """Insert into a table as necessary to ensure database contains
1235 values equivalent to the given ones.
1237 Parameters
1238 ----------
1239 table : `sqlalchemy.schema.Table`
1240 Table to be queried and possibly inserted into.
1241 keys : `dict`
1242 Column name-value pairs used to search for an existing row; must
1243 be a combination that can be used to select a single row if one
1244 exists. If such a row does not exist, these values are used in
1245 the insert.
1246 compared : `dict`, optional
1247 Column name-value pairs that are compared to those in any existing
1248 row. If such a row does not exist, these rows are used in the
1249 insert.
1250 extra : `dict`, optional
1251 Column name-value pairs that are ignored if a matching row exists,
1252 but used in an insert if one is necessary.
1253 returning : `~collections.abc.Sequence` of `str`, optional
1254 The names of columns whose values should be returned.
1255 update : `bool`, optional
1256 If `True` (`False` is default), update the existing row with the
1257 values in ``compared`` instead of raising `DatabaseConflictError`.
1259 Returns
1260 -------
1261 row : `dict`, optional
1262 The value of the fields indicated by ``returning``, or `None` if
1263 ``returning`` is `None`.
1264 inserted_or_updated : `bool` or `dict`
1265 If `True`, a new row was inserted; if `False`, a matching row
1266 already existed. If a `dict` (only possible if ``update=True``),
1267 then an existing row was updated, and the dict maps the names of
1268 the updated columns to their *old* values (new values can be
1269 obtained from ``compared``).
1271 Raises
1272 ------
1273 DatabaseConflictError
1274 Raised if the values in ``compared`` do not match the values in the
1275 database.
1276 ReadOnlyDatabaseError
1277 Raised if `isWriteable` returns `False`, and no matching record
1278 already exists.
1280 Notes
1281 -----
1282 May be used inside transaction contexts, so implementations may not
1283 perform operations that interrupt transactions.
1285 It may be called on read-only databases if and only if the matching row
1286 does in fact already exist.
1287 """
1289 def check() -> tuple[int, dict[str, Any] | None, list | None]:
1290 """Query for a row that matches the ``key`` argument, and compare
1291 to what was given by the caller.
1293 Returns
1294 -------
1295 n : `int`
1296 Number of matching rows. ``n != 1`` is always an error, but
1297 it's a different kind of error depending on where `check` is
1298 being called.
1299 bad : `dict` or `None`
1300 The subset of the keys of ``compared`` for which the existing
1301 values did not match the given one, mapped to the existing
1302 values in the database. Once again, ``not bad`` is always an
1303 error, but a different kind on context. `None` if ``n != 1``
1304 result : `list` or `None`
1305 Results in the database that correspond to the columns given
1306 in ``returning``, or `None` if ``returning is None``.
1307 """
1308 toSelect: set[str] = set()
1309 if compared is not None:
1310 toSelect.update(compared.keys())
1311 if returning is not None:
1312 toSelect.update(returning)
1313 if not toSelect:
1314 # Need to select some column, even if we just want to see
1315 # how many rows we get back.
1316 toSelect.add(next(iter(keys.keys())))
1317 selectSql = (
1318 sqlalchemy.sql.select(*[table.columns[k].label(k) for k in toSelect])
1319 .select_from(table)
1320 .where(sqlalchemy.sql.and_(*[table.columns[k] == v for k, v in keys.items()]))
1321 )
1322 with self._transaction() as (_, connection):
1323 fetched = list(connection.execute(selectSql).mappings())
1324 if len(fetched) != 1:
1325 return len(fetched), None, None
1326 existing = fetched[0]
1327 if compared is not None:
1329 def safeNotEqual(a: Any, b: Any) -> bool:
1330 if isinstance(a, astropy.time.Time):
1331 return not time_utils.TimeConverter().times_equal(a, b)
1332 return a != b
1334 inconsistencies = {
1335 k: existing[k] for k, v in compared.items() if safeNotEqual(existing[k], v)
1336 }
1337 else:
1338 inconsistencies = {}
1339 if returning is not None:
1340 toReturn: list | None = [existing[k] for k in returning]
1341 else:
1342 toReturn = None
1343 return 1, inconsistencies, toReturn
1345 def format_bad(inconsistencies: dict[str, Any]) -> str:
1346 """Format the 'bad' dictionary of existing values returned by
1347 ``check`` into a string suitable for an error message.
1348 """
1349 assert compared is not None, "Should not be able to get inconsistencies without comparing."
1350 return ", ".join(f"{k}: {v!r} != {compared[k]!r}" for k, v in inconsistencies.items())
1352 if self.isTableWriteable(table):
1353 # Try an insert first, but allow it to fail (in only specific
1354 # ways).
1355 row = keys.copy()
1356 if compared is not None:
1357 row.update(compared)
1358 if extra is not None:
1359 row.update(extra)
1360 with self.transaction():
1361 inserted = bool(self.ensure(table, row))
1362 inserted_or_updated: bool | dict[str, Any]
1363 # Need to perform check() for this branch inside the
1364 # transaction, so we roll back an insert that didn't do
1365 # what we expected. That limits the extent to which we
1366 # can reduce duplication between this block and the other
1367 # ones that perform similar logic.
1368 n, bad, result = check()
1369 if n < 1:
1370 raise ConflictingDefinitionError(
1371 f"Attempted to ensure {row} exists by inserting it with ON CONFLICT IGNORE, "
1372 f"but a post-insert query on {keys} returned no results. "
1373 f"Insert was {'' if inserted else 'not '}reported as successful. "
1374 "This can occur if the insert violated a database constraint other than the "
1375 "unique constraint or primary key used to identify the row in this call."
1376 )
1377 elif n > 1:
1378 raise RuntimeError(
1379 f"Keys passed to sync {keys.keys()} do not comprise a "
1380 f"unique constraint for table {table.name}."
1381 )
1382 elif bad:
1383 assert (
1384 compared is not None
1385 ), "Should not be able to get inconsistencies without comparing."
1386 if inserted:
1387 raise RuntimeError(
1388 f"Conflict ({bad}) in sync after successful insert; this is "
1389 "possible if the same table is being updated by a concurrent "
1390 "process that isn't using sync, but it may also be a bug in "
1391 "daf_butler."
1392 )
1393 elif update:
1394 with self._transaction() as (_, connection):
1395 connection.execute(
1396 table.update()
1397 .where(sqlalchemy.sql.and_(*[table.columns[k] == v for k, v in keys.items()]))
1398 .values(**{k: compared[k] for k in bad})
1399 )
1400 inserted_or_updated = bad
1401 else:
1402 raise DatabaseConflictError(
1403 f"Conflict in sync for table {table.name} on column(s) {format_bad(bad)}."
1404 )
1405 else:
1406 inserted_or_updated = inserted
1407 else:
1408 # Database is not writeable; just see if the row exists.
1409 n, bad, result = check()
1410 if n < 1:
1411 raise ReadOnlyDatabaseError("sync needs to insert, but database is read-only.")
1412 elif n > 1:
1413 raise RuntimeError("Keys passed to sync do not comprise a unique constraint.")
1414 elif bad:
1415 if update:
1416 raise ReadOnlyDatabaseError("sync needs to update, but database is read-only.")
1417 else:
1418 raise DatabaseConflictError(
1419 f"Conflict in sync for table {table.name} on column(s) {format_bad(bad)}."
1420 )
1421 inserted_or_updated = False
1422 if returning is None:
1423 return None, inserted_or_updated
1424 else:
1425 assert result is not None
1426 return dict(zip(returning, result, strict=True)), inserted_or_updated
1428 def insert(
1429 self,
1430 table: sqlalchemy.schema.Table,
1431 *rows: dict,
1432 returnIds: bool = False,
1433 select: sqlalchemy.sql.expression.SelectBase | None = None,
1434 names: Iterable[str] | None = None,
1435 ) -> list[int] | None:
1436 """Insert one or more rows into a table, optionally returning
1437 autoincrement primary key values.
1439 Parameters
1440 ----------
1441 table : `sqlalchemy.schema.Table`
1442 Table rows should be inserted into.
1443 returnIds: `bool`
1444 If `True` (`False` is default), return the values of the table's
1445 autoincrement primary key field (which much exist).
1446 select : `sqlalchemy.sql.SelectBase`, optional
1447 A SELECT query expression to insert rows from. Cannot be provided
1448 with either ``rows`` or ``returnIds=True``.
1449 names : `~collections.abc.Iterable` [ `str` ], optional
1450 Names of columns in ``table`` to be populated, ordered to match the
1451 columns returned by ``select``. Ignored if ``select`` is `None`.
1452 If not provided, the columns returned by ``select`` must be named
1453 to match the desired columns of ``table``.
1454 *rows
1455 Positional arguments are the rows to be inserted, as dictionaries
1456 mapping column name to value. The keys in all dictionaries must
1457 be the same.
1459 Returns
1460 -------
1461 ids : `None`, or `list` of `int`
1462 If ``returnIds`` is `True`, a `list` containing the inserted
1463 values for the table's autoincrement primary key.
1465 Raises
1466 ------
1467 ReadOnlyDatabaseError
1468 Raised if `isWriteable` returns `False` when this method is called.
1470 Notes
1471 -----
1472 The default implementation uses bulk insert syntax when ``returnIds``
1473 is `False`, and a loop over single-row insert operations when it is
1474 `True`.
1476 Derived classes should reimplement when they can provide a more
1477 efficient implementation (especially for the latter case).
1479 May be used inside transaction contexts, so implementations may not
1480 perform operations that interrupt transactions.
1481 """
1482 self.assertTableWriteable(table, f"Cannot insert into read-only table {table}.")
1483 if select is not None and (rows or returnIds):
1484 raise TypeError("'select' is incompatible with passing value rows or returnIds=True.")
1485 if not rows and select is None:
1486 if returnIds:
1487 return []
1488 else:
1489 return None
1490 with self._transaction() as (_, connection):
1491 if not returnIds:
1492 if select is not None:
1493 if names is None:
1494 # columns() is deprecated since 1.4, but
1495 # selected_columns() method did not exist in 1.3.
1496 if hasattr(select, "selected_columns"):
1497 names = select.selected_columns.keys()
1498 else:
1499 names = select.columns.keys()
1500 connection.execute(table.insert().from_select(list(names), select))
1501 else:
1502 connection.execute(table.insert(), rows)
1503 return None
1504 else:
1505 sql = table.insert()
1506 return [connection.execute(sql, row).inserted_primary_key[0] for row in rows]
1508 @abstractmethod
1509 def replace(self, table: sqlalchemy.schema.Table, *rows: dict) -> None:
1510 """Insert one or more rows into a table, replacing any existing rows
1511 for which insertion of a new row would violate the primary key
1512 constraint.
1514 Parameters
1515 ----------
1516 table : `sqlalchemy.schema.Table`
1517 Table rows should be inserted into.
1518 *rows
1519 Positional arguments are the rows to be inserted, as dictionaries
1520 mapping column name to value. The keys in all dictionaries must
1521 be the same.
1523 Raises
1524 ------
1525 ReadOnlyDatabaseError
1526 Raised if `isWriteable` returns `False` when this method is called.
1528 Notes
1529 -----
1530 May be used inside transaction contexts, so implementations may not
1531 perform operations that interrupt transactions.
1533 Implementations should raise a `sqlalchemy.exc.IntegrityError`
1534 exception when a constraint other than the primary key would be
1535 violated.
1537 Implementations are not required to support `replace` on tables
1538 with autoincrement keys.
1539 """
1540 raise NotImplementedError()
1542 @abstractmethod
1543 def ensure(self, table: sqlalchemy.schema.Table, *rows: dict, primary_key_only: bool = False) -> int:
1544 """Insert one or more rows into a table, skipping any rows for which
1545 insertion would violate a unique constraint.
1547 Parameters
1548 ----------
1549 table : `sqlalchemy.schema.Table`
1550 Table rows should be inserted into.
1551 *rows
1552 Positional arguments are the rows to be inserted, as dictionaries
1553 mapping column name to value. The keys in all dictionaries must
1554 be the same.
1555 primary_key_only : `bool`, optional
1556 If `True` (`False` is default), only skip rows that violate the
1557 primary key constraint, and raise an exception (and rollback
1558 transactions) for other constraint violations.
1560 Returns
1561 -------
1562 count : `int`
1563 The number of rows actually inserted.
1565 Raises
1566 ------
1567 ReadOnlyDatabaseError
1568 Raised if `isWriteable` returns `False` when this method is called.
1569 This is raised even if the operation would do nothing even on a
1570 writeable database.
1572 Notes
1573 -----
1574 May be used inside transaction contexts, so implementations may not
1575 perform operations that interrupt transactions.
1577 Implementations are not required to support `ensure` on tables
1578 with autoincrement keys.
1579 """
1580 raise NotImplementedError()
1582 def delete(self, table: sqlalchemy.schema.Table, columns: Iterable[str], *rows: dict) -> int:
1583 """Delete one or more rows from a table.
1585 Parameters
1586 ----------
1587 table : `sqlalchemy.schema.Table`
1588 Table that rows should be deleted from.
1589 columns: `~collections.abc.Iterable` of `str`
1590 The names of columns that will be used to constrain the rows to
1591 be deleted; these will be combined via ``AND`` to form the
1592 ``WHERE`` clause of the delete query.
1593 *rows
1594 Positional arguments are the keys of rows to be deleted, as
1595 dictionaries mapping column name to value. The keys in all
1596 dictionaries must be exactly the names in ``columns``.
1598 Returns
1599 -------
1600 count : `int`
1601 Number of rows deleted.
1603 Raises
1604 ------
1605 ReadOnlyDatabaseError
1606 Raised if `isWriteable` returns `False` when this method is called.
1608 Notes
1609 -----
1610 May be used inside transaction contexts, so implementations may not
1611 perform operations that interrupt transactions.
1613 The default implementation should be sufficient for most derived
1614 classes.
1615 """
1616 self.assertTableWriteable(table, f"Cannot delete from read-only table {table}.")
1617 if columns and not rows:
1618 # If there are no columns, this operation is supposed to delete
1619 # everything (so we proceed as usual). But if there are columns,
1620 # but no rows, it was a constrained bulk operation where the
1621 # constraint is that no rows match, and we should short-circuit
1622 # while reporting that no rows were affected.
1623 return 0
1624 sql = table.delete()
1625 columns = list(columns) # Force iterators to list
1627 # More efficient to use IN operator if there is only one
1628 # variable changing across all rows.
1629 content: dict[str, set] = defaultdict(set)
1630 if len(columns) == 1:
1631 # Nothing to calculate since we can always use IN
1632 column = columns[0]
1633 changing_columns = [column]
1634 content[column] = {row[column] for row in rows}
1635 else:
1636 for row in rows:
1637 for k, v in row.items():
1638 content[k].add(v)
1639 changing_columns = [col for col, values in content.items() if len(values) > 1]
1641 if len(changing_columns) != 1:
1642 # More than one column changes each time so do explicit bind
1643 # parameters and have each row processed separately.
1644 whereTerms = [table.columns[name] == sqlalchemy.sql.bindparam(name) for name in columns]
1645 if whereTerms:
1646 sql = sql.where(sqlalchemy.sql.and_(*whereTerms))
1647 with self._transaction() as (_, connection):
1648 return connection.execute(sql, rows).rowcount
1649 else:
1650 # One of the columns has changing values but any others are
1651 # fixed. In this case we can use an IN operator and be more
1652 # efficient.
1653 name = changing_columns.pop()
1655 # Simple where clause for the unchanging columns
1656 clauses = []
1657 for k, v in content.items():
1658 if k == name:
1659 continue
1660 column = table.columns[k]
1661 # The set only has one element
1662 clauses.append(column == v.pop())
1664 # The IN operator will not work for "infinite" numbers of
1665 # rows so must batch it up into distinct calls.
1666 in_content = list(content[name])
1667 n_elements = len(in_content)
1669 rowcount = 0
1670 iposn = 0
1671 n_per_loop = 1_000 # Controls how many items to put in IN clause
1672 with self._transaction() as (_, connection):
1673 for iposn in range(0, n_elements, n_per_loop):
1674 endpos = iposn + n_per_loop
1675 in_clause = table.columns[name].in_(in_content[iposn:endpos])
1677 newsql = sql.where(sqlalchemy.sql.and_(*clauses, in_clause))
1678 rowcount += connection.execute(newsql).rowcount
1679 return rowcount
1681 def deleteWhere(self, table: sqlalchemy.schema.Table, where: sqlalchemy.sql.ColumnElement) -> int:
1682 """Delete rows from a table with pre-constructed WHERE clause.
1684 Parameters
1685 ----------
1686 table : `sqlalchemy.schema.Table`
1687 Table that rows should be deleted from.
1688 where: `sqlalchemy.sql.ClauseElement`
1689 The names of columns that will be used to constrain the rows to
1690 be deleted; these will be combined via ``AND`` to form the
1691 ``WHERE`` clause of the delete query.
1693 Returns
1694 -------
1695 count : `int`
1696 Number of rows deleted.
1698 Raises
1699 ------
1700 ReadOnlyDatabaseError
1701 Raised if `isWriteable` returns `False` when this method is called.
1703 Notes
1704 -----
1705 May be used inside transaction contexts, so implementations may not
1706 perform operations that interrupt transactions.
1708 The default implementation should be sufficient for most derived
1709 classes.
1710 """
1711 self.assertTableWriteable(table, f"Cannot delete from read-only table {table}.")
1713 sql = table.delete().where(where)
1714 with self._transaction() as (_, connection):
1715 return connection.execute(sql).rowcount
1717 def update(self, table: sqlalchemy.schema.Table, where: dict[str, str], *rows: dict) -> int:
1718 """Update one or more rows in a table.
1720 Parameters
1721 ----------
1722 table : `sqlalchemy.schema.Table`
1723 Table containing the rows to be updated.
1724 where : `dict` [`str`, `str`]
1725 A mapping from the names of columns that will be used to search for
1726 existing rows to the keys that will hold these values in the
1727 ``rows`` dictionaries. Note that these may not be the same due to
1728 SQLAlchemy limitations.
1729 *rows
1730 Positional arguments are the rows to be updated. The keys in all
1731 dictionaries must be the same, and may correspond to either a
1732 value in the ``where`` dictionary or the name of a column to be
1733 updated.
1735 Returns
1736 -------
1737 count : `int`
1738 Number of rows matched (regardless of whether the update actually
1739 modified them).
1741 Raises
1742 ------
1743 ReadOnlyDatabaseError
1744 Raised if `isWriteable` returns `False` when this method is called.
1746 Notes
1747 -----
1748 May be used inside transaction contexts, so implementations may not
1749 perform operations that interrupt transactions.
1751 The default implementation should be sufficient for most derived
1752 classes.
1753 """
1754 self.assertTableWriteable(table, f"Cannot update read-only table {table}.")
1755 if not rows:
1756 return 0
1757 sql = table.update().where(
1758 sqlalchemy.sql.and_(*[table.columns[k] == sqlalchemy.sql.bindparam(v) for k, v in where.items()])
1759 )
1760 with self._transaction() as (_, connection):
1761 return connection.execute(sql, rows).rowcount
1763 @contextmanager
1764 def query(
1765 self,
1766 sql: sqlalchemy.sql.expression.Executable | sqlalchemy.sql.expression.SelectBase,
1767 *args: Any,
1768 **kwargs: Any,
1769 ) -> Iterator[sqlalchemy.engine.CursorResult]:
1770 """Run a SELECT query against the database.
1772 Parameters
1773 ----------
1774 sql : `sqlalchemy.sql.expression.SelectBase`
1775 A SQLAlchemy representation of a ``SELECT`` query.
1776 *args
1777 Additional positional arguments are forwarded to
1778 `sqlalchemy.engine.Connection.execute`.
1779 **kwargs
1780 Additional keyword arguments are forwarded to
1781 `sqlalchemy.engine.Connection.execute`.
1783 Returns
1784 -------
1785 result_context : `sqlalchemy.engine.CursorResults`
1786 Context manager that returns the query result object when entered.
1787 These results are invalidated when the context is exited.
1788 """
1789 if self._session_connection is None:
1790 connection = self._engine.connect()
1791 else:
1792 connection = self._session_connection
1793 # TODO: SelectBase is not good for execute(), but it used everywhere,
1794 # e.g. in daf_relation. We should switch to Executable at some point.
1795 result = connection.execute(cast(sqlalchemy.sql.expression.Executable, sql), *args, **kwargs)
1796 try:
1797 yield result
1798 finally:
1799 if connection is not self._session_connection:
1800 connection.close()
1802 @abstractmethod
1803 def constant_rows(
1804 self,
1805 fields: NamedValueAbstractSet[ddl.FieldSpec],
1806 *rows: dict,
1807 name: str | None = None,
1808 ) -> sqlalchemy.sql.FromClause:
1809 """Return a SQLAlchemy object that represents a small number of
1810 constant-valued rows.
1812 Parameters
1813 ----------
1814 fields : `NamedValueAbstractSet` [ `ddl.FieldSpec` ]
1815 The columns of the rows. Unique and foreign key constraints are
1816 ignored.
1817 *rows : `dict`
1818 Values for the rows.
1819 name : `str`, optional
1820 If provided, the name of the SQL construct. If not provided, an
1821 opaque but unique identifier is generated.
1823 Returns
1824 -------
1825 from_clause : `sqlalchemy.sql.FromClause`
1826 SQLAlchemy object representing the given rows. This is guaranteed
1827 to be something that can be directly joined into a ``SELECT``
1828 query's ``FROM`` clause, and will not involve a temporary table
1829 that needs to be cleaned up later.
1831 Notes
1832 -----
1833 The default implementation uses the SQL-standard ``VALUES`` construct,
1834 but support for that construct is varied enough across popular RDBMSs
1835 that the method is still marked abstract to force explicit opt-in via
1836 delegation to `super`.
1837 """
1838 if name is None:
1839 name = f"tmp_{uuid.uuid4().hex}"
1840 return sqlalchemy.sql.values(
1841 *[sqlalchemy.Column(field.name, field.getSizedColumnType()) for field in fields],
1842 name=name,
1843 ).data([tuple(row[name] for name in fields.names) for row in rows])
1845 def get_constant_rows_max(self) -> int:
1846 """Return the maximum number of rows that should be passed to
1847 `constant_rows` for this backend.
1849 Returns
1850 -------
1851 max : `int`
1852 Maximum number of rows.
1854 Notes
1855 -----
1856 This should reflect typical performance profiles (or a guess at these),
1857 not just hard database engine limits.
1858 """
1859 return 100
1861 origin: int
1862 """An integer ID that should be used as the default for any datasets,
1863 quanta, or other entities that use a (autoincrement, origin) compound
1864 primary key (`int`).
1865 """
1867 namespace: str | None
1868 """The schema or namespace this database instance is associated with
1869 (`str` or `None`).
1870 """