Coverage for python/lsst/daf/butler/registry/interfaces/_database.py: 21%
406 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-14 19:21 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-14 19:21 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = [
24 "Database",
25 "ReadOnlyDatabaseError",
26 "DatabaseConflictError",
27 "SchemaAlreadyDefinedError",
28 "StaticTablesContext",
29]
31import uuid
32import warnings
33from abc import ABC, abstractmethod
34from collections import defaultdict
35from collections.abc import Callable, Iterable, Iterator, Sequence
36from contextlib import contextmanager
37from typing import Any, cast, final
39import astropy.time
40import sqlalchemy
42from ...core import TimespanDatabaseRepresentation, ddl, time_utils
43from ...core.named import NamedValueAbstractSet
44from .._exceptions import ConflictingDefinitionError
47# TODO: method is called with list[ReflectedColumn] in SA 2, and
48# ReflectedColumn does not exist in 1.4.
49def _checkExistingTableDefinition(name: str, spec: ddl.TableSpec, inspection: list) -> None:
50 """Test that the definition of a table in a `ddl.TableSpec` and from
51 database introspection are consistent.
53 Parameters
54 ----------
55 name : `str`
56 Name of the table (only used in error messages).
57 spec : `ddl.TableSpec`
58 Specification of the table.
59 inspection : `dict`
60 Dictionary returned by
61 `sqlalchemy.engine.reflection.Inspector.get_columns`.
63 Raises
64 ------
65 DatabaseConflictError
66 Raised if the definitions are inconsistent.
67 """
68 columnNames = [c["name"] for c in inspection]
69 if spec.fields.names != set(columnNames):
70 raise DatabaseConflictError(
71 f"Table '{name}' exists but is defined differently in the database; "
72 f"specification has columns {list(spec.fields.names)}, while the "
73 f"table in the database has {columnNames}."
74 )
77class ReadOnlyDatabaseError(RuntimeError):
78 """Exception raised when a write operation is called on a read-only
79 `Database`.
80 """
83class DatabaseConflictError(ConflictingDefinitionError):
84 """Exception raised when database content (row values or schema entities)
85 are inconsistent with what this client expects.
86 """
89class SchemaAlreadyDefinedError(RuntimeError):
90 """Exception raised when trying to initialize database schema when some
91 tables already exist.
92 """
95class StaticTablesContext:
96 """Helper class used to declare the static schema for a registry layer
97 in a database.
99 An instance of this class is returned by `Database.declareStaticTables`,
100 which should be the only way it should be constructed.
101 """
103 def __init__(self, db: Database, connection: sqlalchemy.engine.Connection):
104 self._db = db
105 self._foreignKeys: list[tuple[sqlalchemy.schema.Table, sqlalchemy.schema.ForeignKeyConstraint]] = []
106 self._inspector = sqlalchemy.inspect(connection)
107 self._tableNames = frozenset(self._inspector.get_table_names(schema=self._db.namespace))
108 self._initializers: list[Callable[[Database], None]] = []
110 def addTable(self, name: str, spec: ddl.TableSpec) -> sqlalchemy.schema.Table:
111 """Add a new table to the schema, returning its sqlalchemy
112 representation.
114 The new table may not actually be created until the end of the
115 context created by `Database.declareStaticTables`, allowing tables
116 to be declared in any order even in the presence of foreign key
117 relationships.
118 """
119 name = self._db._mangleTableName(name)
120 if name in self._tableNames:
121 _checkExistingTableDefinition(
122 name, spec, self._inspector.get_columns(name, schema=self._db.namespace)
123 )
124 metadata = self._db._metadata
125 assert metadata is not None, "Guaranteed by context manager that returns this object."
126 table = self._db._convertTableSpec(name, spec, metadata)
127 for foreignKeySpec in spec.foreignKeys:
128 self._foreignKeys.append((table, self._db._convertForeignKeySpec(name, foreignKeySpec, metadata)))
129 return table
131 def addTableTuple(self, specs: tuple[ddl.TableSpec, ...]) -> tuple[sqlalchemy.schema.Table, ...]:
132 """Add a named tuple of tables to the schema, returning their
133 SQLAlchemy representations in a named tuple of the same type.
135 The new tables may not actually be created until the end of the
136 context created by `Database.declareStaticTables`, allowing tables
137 to be declared in any order even in the presence of foreign key
138 relationships.
140 Notes
141 -----
142 ``specs`` *must* be an instance of a type created by
143 `collections.namedtuple`, not just regular tuple, and the returned
144 object is guaranteed to be the same. Because `~collections.namedtuple`
145 is just a factory for `type` objects, not an actual type itself,
146 we cannot represent this with type annotations.
147 """
148 return specs._make( # type: ignore
149 self.addTable(name, spec) for name, spec in zip(specs._fields, specs) # type: ignore
150 )
152 def addInitializer(self, initializer: Callable[[Database], None]) -> None:
153 """Add a method that does one-time initialization of a database.
155 Initialization can mean anything that changes state of a database
156 and needs to be done exactly once after database schema was created.
157 An example for that could be population of schema attributes.
159 Parameters
160 ----------
161 initializer : callable
162 Method of a single argument which is a `Database` instance.
163 """
164 self._initializers.append(initializer)
167class Database(ABC):
168 """An abstract interface that represents a particular database engine's
169 representation of a single schema/namespace/database.
171 Parameters
172 ----------
173 origin : `int`
174 An integer ID that should be used as the default for any datasets,
175 quanta, or other entities that use a (autoincrement, origin) compound
176 primary key.
177 engine : `sqlalchemy.engine.Engine`
178 The SQLAlchemy engine for this `Database`.
179 namespace : `str`, optional
180 Name of the schema or namespace this instance is associated with.
181 This is passed as the ``schema`` argument when constructing a
182 `sqlalchemy.schema.MetaData` instance. We use ``namespace`` instead to
183 avoid confusion between "schema means namespace" and "schema means
184 table definitions".
186 Notes
187 -----
188 `Database` requires all write operations to go through its special named
189 methods. Our write patterns are sufficiently simple that we don't really
190 need the full flexibility of SQL insert/update/delete syntax, and we need
191 non-standard (but common) functionality in these operations sufficiently
192 often that it seems worthwhile to provide our own generic API.
194 In contrast, `Database.query` allows arbitrary ``SELECT`` queries (via
195 their SQLAlchemy representation) to be run, as we expect these to require
196 significantly more sophistication while still being limited to standard
197 SQL.
199 `Database` itself has several underscore-prefixed attributes:
201 - ``_engine``: SQLAlchemy object representing its engine.
202 - ``_connection``: method returning a context manager for
203 `sqlalchemy.engine.Connection` object.
204 - ``_metadata``: the `sqlalchemy.schema.MetaData` object representing
205 the tables and other schema entities.
207 These are considered protected (derived classes may access them, but other
208 code should not), and read-only, aside from executing SQL via
209 ``_connection``.
210 """
212 def __init__(self, *, origin: int, engine: sqlalchemy.engine.Engine, namespace: str | None = None):
213 self.origin = origin
214 self.namespace = namespace
215 self._engine = engine
216 self._session_connection: sqlalchemy.engine.Connection | None = None
217 self._metadata: sqlalchemy.schema.MetaData | None = None
218 self._temp_tables: set[str] = set()
220 def __repr__(self) -> str:
221 # Rather than try to reproduce all the parameters used to create
222 # the object, instead report the more useful information of the
223 # connection URL.
224 if self._engine.url.password is not None:
225 uri = str(self._engine.url.set(password="***"))
226 else:
227 uri = str(self._engine.url)
228 if self.namespace:
229 uri += f"#{self.namespace}"
230 return f'{type(self).__name__}("{uri}")'
232 @classmethod
233 def makeDefaultUri(cls, root: str) -> str | None:
234 """Create a default connection URI appropriate for the given root
235 directory, or `None` if there can be no such default.
236 """
237 return None
239 @classmethod
240 def fromUri(
241 cls,
242 uri: str | sqlalchemy.engine.URL,
243 *,
244 origin: int,
245 namespace: str | None = None,
246 writeable: bool = True,
247 ) -> Database:
248 """Construct a database from a SQLAlchemy URI.
250 Parameters
251 ----------
252 uri : `str` or `sqlalchemy.engine.URL`
253 A SQLAlchemy URI connection string.
254 origin : `int`
255 An integer ID that should be used as the default for any datasets,
256 quanta, or other entities that use a (autoincrement, origin)
257 compound primary key.
258 namespace : `str`, optional
259 A database namespace (i.e. schema) the new instance should be
260 associated with. If `None` (default), the namespace (if any) is
261 inferred from the URI.
262 writeable : `bool`, optional
263 If `True`, allow write operations on the database, including
264 ``CREATE TABLE``.
266 Returns
267 -------
268 db : `Database`
269 A new `Database` instance.
270 """
271 return cls.fromEngine(
272 cls.makeEngine(uri, writeable=writeable), origin=origin, namespace=namespace, writeable=writeable
273 )
275 @classmethod
276 @abstractmethod
277 def makeEngine(
278 cls, uri: str | sqlalchemy.engine.URL, *, writeable: bool = True
279 ) -> sqlalchemy.engine.Engine:
280 """Create a `sqlalchemy.engine.Engine` from a SQLAlchemy URI.
282 Parameters
283 ----------
284 uri : `str` or `sqlalchemy.engine.URL`
285 A SQLAlchemy URI connection string.
286 writeable : `bool`, optional
287 If `True`, allow write operations on the database, including
288 ``CREATE TABLE``.
290 Returns
291 -------
292 engine : `sqlalchemy.engine.Engine`
293 A database engine.
295 Notes
296 -----
297 Subclasses that support other ways to connect to a database are
298 encouraged to add optional arguments to their implementation of this
299 method, as long as they maintain compatibility with the base class
300 call signature.
301 """
302 raise NotImplementedError()
304 @classmethod
305 @abstractmethod
306 def fromEngine(
307 cls,
308 engine: sqlalchemy.engine.Engine,
309 *,
310 origin: int,
311 namespace: str | None = None,
312 writeable: bool = True,
313 ) -> Database:
314 """Create a new `Database` from an existing `sqlalchemy.engine.Engine`.
316 Parameters
317 ----------
318 engine : `sqlalchemy.engine.Engine`
319 The engine for the database. May be shared between `Database`
320 instances.
321 origin : `int`
322 An integer ID that should be used as the default for any datasets,
323 quanta, or other entities that use a (autoincrement, origin)
324 compound primary key.
325 namespace : `str`, optional
326 A different database namespace (i.e. schema) the new instance
327 should be associated with. If `None` (default), the namespace
328 (if any) is inferred from the connection.
329 writeable : `bool`, optional
330 If `True`, allow write operations on the database, including
331 ``CREATE TABLE``.
333 Returns
334 -------
335 db : `Database`
336 A new `Database` instance.
338 Notes
339 -----
340 This method allows different `Database` instances to share the same
341 engine, which is desirable when they represent different namespaces
342 can be queried together.
343 """
344 raise NotImplementedError()
346 @final
347 @contextmanager
348 def session(self) -> Iterator[None]:
349 """Return a context manager that represents a session (persistent
350 connection to a database).
352 Returns
353 -------
354 context : `AbstractContextManager` [ `None` ]
355 A context manager that does not return a value when entered.
357 Notes
358 -----
359 This method should be used when a sequence of read-only SQL operations
360 will be performed in rapid succession *without* a requirement that they
361 yield consistent results in the presence of concurrent writes (or, more
362 rarely, when conflicting concurrent writes are rare/impossible and the
363 session will be open long enough that a transaction is inadvisable).
364 """
365 with self._session():
366 yield
368 @final
369 @contextmanager
370 def transaction(
371 self,
372 *,
373 interrupting: bool = False,
374 savepoint: bool = False,
375 lock: Iterable[sqlalchemy.schema.Table] = (),
376 for_temp_tables: bool = False,
377 ) -> Iterator[None]:
378 """Return a context manager that represents a transaction.
380 Parameters
381 ----------
382 interrupting : `bool`, optional
383 If `True` (`False` is default), this transaction block may not be
384 nested without an outer one, and attempting to do so is a logic
385 (i.e. assertion) error.
386 savepoint : `bool`, optional
387 If `True` (`False` is default), create a `SAVEPOINT`, allowing
388 exceptions raised by the database (e.g. due to constraint
389 violations) during this transaction's context to be caught outside
390 it without also rolling back all operations in an outer transaction
391 block. If `False`, transactions may still be nested, but a
392 rollback may be generated at any level and affects all levels, and
393 commits are deferred until the outermost block completes. If any
394 outer transaction block was created with ``savepoint=True``, all
395 inner blocks will be as well (regardless of the actual value
396 passed). This has no effect if this is the outermost transaction.
397 lock : `~collections.abc.Iterable` [ `sqlalchemy.schema.Table` ], \
398 optional
399 A list of tables to lock for the duration of this transaction.
400 These locks are guaranteed to prevent concurrent writes and allow
401 this transaction (only) to acquire the same locks (others should
402 block), but only prevent concurrent reads if the database engine
403 requires that in order to block concurrent writes.
404 for_temp_tables : `bool`, optional
405 If `True`, this transaction may involve creating temporary tables.
407 Returns
408 -------
409 context : `AbstractContextManager` [ `None` ]
410 A context manager that commits the transaction when it is exited
411 without error and rolls back the transactoin when it is exited via
412 an exception.
414 Notes
415 -----
416 All transactions on a connection managed by one or more `Database`
417 instances _must_ go through this method, or transaction state will not
418 be correctly managed.
419 """
420 with self._transaction(
421 interrupting=interrupting, savepoint=savepoint, lock=lock, for_temp_tables=for_temp_tables
422 ):
423 yield
425 @contextmanager
426 def temporary_table(
427 self, spec: ddl.TableSpec, name: str | None = None
428 ) -> Iterator[sqlalchemy.schema.Table]:
429 """Return a context manager that creates and then drops a temporary
430 table.
432 Parameters
433 ----------
434 spec : `ddl.TableSpec`
435 Specification for the columns. Unique and foreign key constraints
436 may be ignored.
437 name : `str`, optional
438 If provided, the name of the SQL construct. If not provided, an
439 opaque but unique identifier is generated.
441 Returns
442 -------
443 context : `AbstractContextManager` [ `sqlalchemy.schema.Table` ]
444 A context manager that returns a SQLAlchemy representation of the
445 temporary table when entered.
447 Notes
448 -----
449 Temporary tables may be created, dropped, and written to even in
450 read-only databases - at least according to the Python-level
451 protections in the `Database` classes. Server permissions may say
452 otherwise, but in that case they probably need to be modified to
453 support the full range of expected read-only butler behavior.
454 """
455 with self._session() as connection:
456 table = self._make_temporary_table(connection, spec=spec, name=name)
457 self._temp_tables.add(table.key)
458 try:
459 yield table
460 finally:
461 with self._transaction():
462 table.drop(connection)
463 self._temp_tables.remove(table.key)
465 @contextmanager
466 def _session(self) -> Iterator[sqlalchemy.engine.Connection]:
467 """Protected implementation for `session` that actually returns the
468 connection.
470 This method is for internal `Database` calls that need the actual
471 SQLAlchemy connection object. It should be overridden by subclasses
472 instead of `session` itself.
474 Returns
475 -------
476 context : `AbstractContextManager` [ `sqlalchemy.engine.Connection` ]
477 A context manager that returns a SQLALchemy connection when
478 entered.
480 """
481 if self._session_connection is not None:
482 # session already started, just reuse that
483 yield self._session_connection
484 else:
485 try:
486 # open new connection and close it when done
487 self._session_connection = self._engine.connect()
488 yield self._session_connection
489 finally:
490 if self._session_connection is not None:
491 self._session_connection.close()
492 self._session_connection = None
493 # Temporary tables only live within session
494 self._temp_tables = set()
496 @contextmanager
497 def _transaction(
498 self,
499 *,
500 interrupting: bool = False,
501 savepoint: bool = False,
502 lock: Iterable[sqlalchemy.schema.Table] = (),
503 for_temp_tables: bool = False,
504 ) -> Iterator[tuple[bool, sqlalchemy.engine.Connection]]:
505 """Protected implementation for `transaction` that actually returns the
506 connection and whether this is a new outermost transaction.
508 This method is for internal `Database` calls that need the actual
509 SQLAlchemy connection object. It should be overridden by subclasses
510 instead of `transaction` itself.
512 Parameters
513 ----------
514 interrupting : `bool`, optional
515 If `True` (`False` is default), this transaction block may not be
516 nested without an outer one, and attempting to do so is a logic
517 (i.e. assertion) error.
518 savepoint : `bool`, optional
519 If `True` (`False` is default), create a `SAVEPOINT`, allowing
520 exceptions raised by the database (e.g. due to constraint
521 violations) during this transaction's context to be caught outside
522 it without also rolling back all operations in an outer transaction
523 block. If `False`, transactions may still be nested, but a
524 rollback may be generated at any level and affects all levels, and
525 commits are deferred until the outermost block completes. If any
526 outer transaction block was created with ``savepoint=True``, all
527 inner blocks will be as well (regardless of the actual value
528 passed). This has no effect if this is the outermost transaction.
529 lock : `~collections.abc.Iterable` [ `sqlalchemy.schema.Table` ], \
530 optional
531 A list of tables to lock for the duration of this transaction.
532 These locks are guaranteed to prevent concurrent writes and allow
533 this transaction (only) to acquire the same locks (others should
534 block), but only prevent concurrent reads if the database engine
535 requires that in order to block concurrent writes.
536 for_temp_tables : `bool`, optional
537 If `True`, this transaction may involve creating temporary tables.
539 Returns
540 -------
541 context : `AbstractContextManager` [ `tuple` [ `bool`,
542 `sqlalchemy.engine.Connection` ] ]
543 A context manager that commits the transaction when it is exited
544 without error and rolls back the transactoin when it is exited via
545 an exception. When entered, it returns a tuple of:
547 - ``is_new`` (`bool`): whether this is a new (outermost)
548 transaction;
549 - ``connection`` (`sqlalchemy.engine.Connection`): the connection.
550 """
551 with self._session() as connection:
552 already_in_transaction = connection.in_transaction()
553 assert not (interrupting and already_in_transaction), (
554 "Logic error in transaction nesting: an operation that would "
555 "interrupt the active transaction context has been requested."
556 )
557 savepoint = savepoint or connection.in_nested_transaction()
558 trans: sqlalchemy.engine.Transaction | None
559 if already_in_transaction:
560 if savepoint:
561 trans = connection.begin_nested()
562 else:
563 # Nested non-savepoint transactions don't do anything.
564 trans = None
565 else:
566 # Use a regular (non-savepoint) transaction always for the
567 # outermost context.
568 trans = connection.begin()
569 self._lockTables(connection, lock)
570 try:
571 yield not already_in_transaction, connection
572 if trans is not None:
573 trans.commit()
574 except BaseException:
575 if trans is not None:
576 trans.rollback()
577 raise
579 @abstractmethod
580 def _lockTables(
581 self, connection: sqlalchemy.engine.Connection, tables: Iterable[sqlalchemy.schema.Table] = ()
582 ) -> None:
583 """Acquire locks on the given tables.
585 This is an implementation hook for subclasses, called by `transaction`.
586 It should not be called directly by other code.
588 Parameters
589 ----------
590 connection : `sqlalchemy.engine.Connection`
591 Database connection object. It is guaranteed that transaction is
592 already in a progress for this connection.
593 tables : `~collections.abc.Iterable` [ `sqlalchemy.schema.Table` ], \
594 optional
595 A list of tables to lock for the duration of this transaction.
596 These locks are guaranteed to prevent concurrent writes and allow
597 this transaction (only) to acquire the same locks (others should
598 block), but only prevent concurrent reads if the database engine
599 requires that in order to block concurrent writes.
600 """
601 raise NotImplementedError()
603 def isTableWriteable(self, table: sqlalchemy.schema.Table) -> bool:
604 """Check whether a table is writeable, either because the database
605 connection is read-write or the table is a temporary table.
607 Parameters
608 ----------
609 table : `sqlalchemy.schema.Table`
610 SQLAlchemy table object to check.
612 Returns
613 -------
614 writeable : `bool`
615 Whether this table is writeable.
616 """
617 return self.isWriteable() or table.key in self._temp_tables
619 def assertTableWriteable(self, table: sqlalchemy.schema.Table, msg: str) -> None:
620 """Raise if the given table is not writeable, either because the
621 database connection is read-write or the table is a temporary table.
623 Parameters
624 ----------
625 table : `sqlalchemy.schema.Table`
626 SQLAlchemy table object to check.
627 msg : `str`, optional
628 If provided, raise `ReadOnlyDatabaseError` instead of returning
629 `False`, with this message.
630 """
631 if not self.isTableWriteable(table):
632 raise ReadOnlyDatabaseError(msg)
634 @contextmanager
635 def declareStaticTables(self, *, create: bool) -> Iterator[StaticTablesContext]:
636 """Return a context manager in which the database's static DDL schema
637 can be declared.
639 Parameters
640 ----------
641 create : `bool`
642 If `True`, attempt to create all tables at the end of the context.
643 If `False`, they will be assumed to already exist.
645 Returns
646 -------
647 schema : `StaticTablesContext`
648 A helper object that is used to add new tables.
650 Raises
651 ------
652 ReadOnlyDatabaseError
653 Raised if ``create`` is `True`, `Database.isWriteable` is `False`,
654 and one or more declared tables do not already exist.
656 Examples
657 --------
658 Given a `Database` instance ``db``::
660 with db.declareStaticTables(create=True) as schema:
661 schema.addTable("table1", TableSpec(...))
662 schema.addTable("table2", TableSpec(...))
664 Notes
665 -----
666 A database's static DDL schema must be declared before any dynamic
667 tables are managed via calls to `ensureTableExists` or
668 `getExistingTable`. The order in which static schema tables are added
669 inside the context block is unimportant; they will automatically be
670 sorted and added in an order consistent with their foreign key
671 relationships.
672 """
673 if create and not self.isWriteable():
674 raise ReadOnlyDatabaseError(f"Cannot create tables in read-only database {self}.")
675 self._metadata = sqlalchemy.MetaData(schema=self.namespace)
676 try:
677 with self._transaction() as (_, connection):
678 context = StaticTablesContext(self, connection)
679 if create and context._tableNames:
680 # Looks like database is already initalized, to avoid
681 # danger of modifying/destroying valid schema we refuse to
682 # do anything in this case
683 raise SchemaAlreadyDefinedError(f"Cannot create tables in non-empty database {self}.")
684 yield context
685 for table, foreignKey in context._foreignKeys:
686 table.append_constraint(foreignKey)
687 if create:
688 if self.namespace is not None:
689 if self.namespace not in context._inspector.get_schema_names():
690 connection.execute(sqlalchemy.schema.CreateSchema(self.namespace))
691 # In our tables we have columns that make use of sqlalchemy
692 # Sequence objects. There is currently a bug in sqlalchemy
693 # that causes a deprecation warning to be thrown on a
694 # property of the Sequence object when the repr for the
695 # sequence is created. Here a filter is used to catch these
696 # deprecation warnings when tables are created.
697 with warnings.catch_warnings():
698 warnings.simplefilter("ignore", category=sqlalchemy.exc.SADeprecationWarning)
699 self._metadata.create_all(connection)
700 # call all initializer methods sequentially
701 for init in context._initializers:
702 init(self)
703 except BaseException:
704 self._metadata = None
705 raise
707 @abstractmethod
708 def isWriteable(self) -> bool:
709 """Return `True` if this database can be modified by this client."""
710 raise NotImplementedError()
712 @abstractmethod
713 def __str__(self) -> str:
714 """Return a human-readable identifier for this `Database`, including
715 any namespace or schema that identifies its names within a `Registry`.
716 """
717 raise NotImplementedError()
719 @property
720 def dialect(self) -> sqlalchemy.engine.Dialect:
721 """The SQLAlchemy dialect for this database engine
722 (`sqlalchemy.engine.Dialect`).
723 """
724 return self._engine.dialect
726 def shrinkDatabaseEntityName(self, original: str) -> str:
727 """Return a version of the given name that fits within this database
728 engine's length limits for table, constraint, indexes, and sequence
729 names.
731 Implementations should not assume that simple truncation is safe,
732 because multiple long names often begin with the same prefix.
734 The default implementation simply returns the given name.
736 Parameters
737 ----------
738 original : `str`
739 The original name.
741 Returns
742 -------
743 shrunk : `str`
744 The new, possibly shortened name.
745 """
746 return original
748 def expandDatabaseEntityName(self, shrunk: str) -> str:
749 """Retrieve the original name for a database entity that was too long
750 to fit within the database engine's limits.
752 Parameters
753 ----------
754 original : `str`
755 The original name.
757 Returns
758 -------
759 shrunk : `str`
760 The new, possibly shortened name.
761 """
762 return shrunk
764 def _mangleTableName(self, name: str) -> str:
765 """Map a logical, user-visible table name to the true table name used
766 in the database.
768 The default implementation returns the given name unchanged.
770 Parameters
771 ----------
772 name : `str`
773 Input table name. Should not include a namespace (i.e. schema)
774 prefix.
776 Returns
777 -------
778 mangled : `str`
779 Mangled version of the table name (still with no namespace prefix).
781 Notes
782 -----
783 Reimplementations of this method must be idempotent - mangling an
784 already-mangled name must have no effect.
785 """
786 return name
788 def _makeColumnConstraints(self, table: str, spec: ddl.FieldSpec) -> list[sqlalchemy.CheckConstraint]:
789 """Create constraints based on this spec.
791 Parameters
792 ----------
793 table : `str`
794 Name of the table this column is being added to.
795 spec : `FieldSpec`
796 Specification for the field to be added.
798 Returns
799 -------
800 constraint : `list` of `sqlalchemy.CheckConstraint`
801 Constraint added for this column.
802 """
803 # By default we return no additional constraints
804 return []
806 def _convertFieldSpec(
807 self, table: str, spec: ddl.FieldSpec, metadata: sqlalchemy.MetaData, **kwargs: Any
808 ) -> sqlalchemy.schema.Column:
809 """Convert a `FieldSpec` to a `sqlalchemy.schema.Column`.
811 Parameters
812 ----------
813 table : `str`
814 Name of the table this column is being added to.
815 spec : `FieldSpec`
816 Specification for the field to be added.
817 metadata : `sqlalchemy.MetaData`
818 SQLAlchemy representation of the DDL schema this field's table is
819 being added to.
820 **kwargs
821 Additional keyword arguments to forward to the
822 `sqlalchemy.schema.Column` constructor. This is provided to make
823 it easier for derived classes to delegate to ``super()`` while
824 making only minor changes.
826 Returns
827 -------
828 column : `sqlalchemy.schema.Column`
829 SQLAlchemy representation of the field.
830 """
831 args = []
832 if spec.autoincrement:
833 # Generate a sequence to use for auto incrementing for databases
834 # that do not support it natively. This will be ignored by
835 # sqlalchemy for databases that do support it.
836 args.append(
837 sqlalchemy.Sequence(
838 self.shrinkDatabaseEntityName(f"{table}_seq_{spec.name}"), metadata=metadata
839 )
840 )
841 assert spec.doc is None or isinstance(spec.doc, str), f"Bad doc for {table}.{spec.name}."
842 return sqlalchemy.schema.Column(
843 spec.name,
844 spec.getSizedColumnType(),
845 *args,
846 nullable=spec.nullable,
847 primary_key=spec.primaryKey,
848 comment=spec.doc,
849 server_default=spec.default,
850 **kwargs,
851 )
853 def _convertForeignKeySpec(
854 self, table: str, spec: ddl.ForeignKeySpec, metadata: sqlalchemy.MetaData, **kwargs: Any
855 ) -> sqlalchemy.schema.ForeignKeyConstraint:
856 """Convert a `ForeignKeySpec` to a
857 `sqlalchemy.schema.ForeignKeyConstraint`.
859 Parameters
860 ----------
861 table : `str`
862 Name of the table this foreign key is being added to.
863 spec : `ForeignKeySpec`
864 Specification for the foreign key to be added.
865 metadata : `sqlalchemy.MetaData`
866 SQLAlchemy representation of the DDL schema this constraint is
867 being added to.
868 **kwargs
869 Additional keyword arguments to forward to the
870 `sqlalchemy.schema.ForeignKeyConstraint` constructor. This is
871 provided to make it easier for derived classes to delegate to
872 ``super()`` while making only minor changes.
874 Returns
875 -------
876 constraint : `sqlalchemy.schema.ForeignKeyConstraint`
877 SQLAlchemy representation of the constraint.
878 """
879 name = self.shrinkDatabaseEntityName(
880 "_".join(
881 ["fkey", table, self._mangleTableName(spec.table)] + list(spec.target) + list(spec.source)
882 )
883 )
884 return sqlalchemy.schema.ForeignKeyConstraint(
885 spec.source,
886 [f"{self._mangleTableName(spec.table)}.{col}" for col in spec.target],
887 name=name,
888 ondelete=spec.onDelete,
889 )
891 def _convertExclusionConstraintSpec(
892 self,
893 table: str,
894 spec: tuple[str | type[TimespanDatabaseRepresentation], ...],
895 metadata: sqlalchemy.MetaData,
896 ) -> sqlalchemy.schema.Constraint:
897 """Convert a `tuple` from `ddl.TableSpec.exclusion` into a SQLAlchemy
898 constraint representation.
900 Parameters
901 ----------
902 table : `str`
903 Name of the table this constraint is being added to.
904 spec : `tuple` [ `str` or `type` ]
905 A tuple of `str` column names and the `type` object returned by
906 `getTimespanRepresentation` (which must appear exactly once),
907 indicating the order of the columns in the index used to back the
908 constraint.
909 metadata : `sqlalchemy.MetaData`
910 SQLAlchemy representation of the DDL schema this constraint is
911 being added to.
913 Returns
914 -------
915 constraint : `sqlalchemy.schema.Constraint`
916 SQLAlchemy representation of the constraint.
918 Raises
919 ------
920 NotImplementedError
921 Raised if this database does not support exclusion constraints.
922 """
923 raise NotImplementedError(f"Database {self} does not support exclusion constraints.")
925 def _convertTableSpec(
926 self, name: str, spec: ddl.TableSpec, metadata: sqlalchemy.MetaData, **kwargs: Any
927 ) -> sqlalchemy.schema.Table:
928 """Convert a `TableSpec` to a `sqlalchemy.schema.Table`.
930 Parameters
931 ----------
932 spec : `TableSpec`
933 Specification for the foreign key to be added.
934 metadata : `sqlalchemy.MetaData`
935 SQLAlchemy representation of the DDL schema this table is being
936 added to.
937 **kwargs
938 Additional keyword arguments to forward to the
939 `sqlalchemy.schema.Table` constructor. This is provided to make it
940 easier for derived classes to delegate to ``super()`` while making
941 only minor changes.
943 Returns
944 -------
945 table : `sqlalchemy.schema.Table`
946 SQLAlchemy representation of the table.
948 Notes
949 -----
950 This method does not handle ``spec.foreignKeys`` at all, in order to
951 avoid circular dependencies. These are added by higher-level logic in
952 `ensureTableExists`, `getExistingTable`, and `declareStaticTables`.
953 """
954 name = self._mangleTableName(name)
955 args: list[sqlalchemy.schema.SchemaItem] = [
956 self._convertFieldSpec(name, fieldSpec, metadata) for fieldSpec in spec.fields
957 ]
959 # Add any column constraints
960 for fieldSpec in spec.fields:
961 args.extend(self._makeColumnConstraints(name, fieldSpec))
963 # Track indexes added for primary key and unique constraints, to make
964 # sure we don't add duplicate explicit or foreign key indexes for
965 # those.
966 allIndexes = {tuple(fieldSpec.name for fieldSpec in spec.fields if fieldSpec.primaryKey)}
967 args.extend(
968 sqlalchemy.schema.UniqueConstraint(
969 *columns, name=self.shrinkDatabaseEntityName("_".join([name, "unq"] + list(columns)))
970 )
971 for columns in spec.unique
972 )
973 allIndexes.update(spec.unique)
974 args.extend(
975 sqlalchemy.schema.Index(
976 self.shrinkDatabaseEntityName("_".join([name, "idx"] + list(index.columns))),
977 *index.columns,
978 unique=(index.columns in spec.unique),
979 **index.kwargs,
980 )
981 for index in spec.indexes
982 if index.columns not in allIndexes
983 )
984 allIndexes.update(index.columns for index in spec.indexes)
985 args.extend(
986 sqlalchemy.schema.Index(
987 self.shrinkDatabaseEntityName("_".join((name, "fkidx") + fk.source)),
988 *fk.source,
989 )
990 for fk in spec.foreignKeys
991 if fk.addIndex and fk.source not in allIndexes
992 )
994 args.extend(self._convertExclusionConstraintSpec(name, excl, metadata) for excl in spec.exclusion)
996 assert spec.doc is None or isinstance(spec.doc, str), f"Bad doc for {name}."
997 return sqlalchemy.schema.Table(name, metadata, *args, comment=spec.doc, info={"spec": spec}, **kwargs)
999 def ensureTableExists(self, name: str, spec: ddl.TableSpec) -> sqlalchemy.schema.Table:
1000 """Ensure that a table with the given name and specification exists,
1001 creating it if necessary.
1003 Parameters
1004 ----------
1005 name : `str`
1006 Name of the table (not including namespace qualifiers).
1007 spec : `TableSpec`
1008 Specification for the table. This will be used when creating the
1009 table, and *may* be used when obtaining an existing table to check
1010 for consistency, but no such check is guaranteed.
1012 Returns
1013 -------
1014 table : `sqlalchemy.schema.Table`
1015 SQLAlchemy representation of the table.
1017 Raises
1018 ------
1019 ReadOnlyDatabaseError
1020 Raised if `isWriteable` returns `False`, and the table does not
1021 already exist.
1022 DatabaseConflictError
1023 Raised if the table exists but ``spec`` is inconsistent with its
1024 definition.
1026 Notes
1027 -----
1028 This method may not be called within transactions. It may be called on
1029 read-only databases if and only if the table does in fact already
1030 exist.
1032 Subclasses may override this method, but usually should not need to.
1033 """
1034 # TODO: if _engine is used to make a table then it uses separate
1035 # connection and should not interfere with current transaction
1036 assert (
1037 self._session_connection is None or not self._session_connection.in_transaction()
1038 ), "Table creation interrupts transactions."
1039 assert self._metadata is not None, "Static tables must be declared before dynamic tables."
1040 table = self.getExistingTable(name, spec)
1041 if table is not None:
1042 return table
1043 if not self.isWriteable():
1044 raise ReadOnlyDatabaseError(
1045 f"Table {name} does not exist, and cannot be created because database {self} is read-only."
1046 )
1047 table = self._convertTableSpec(name, spec, self._metadata)
1048 for foreignKeySpec in spec.foreignKeys:
1049 table.append_constraint(self._convertForeignKeySpec(name, foreignKeySpec, self._metadata))
1050 try:
1051 with self._transaction() as (_, connection):
1052 table.create(connection)
1053 except sqlalchemy.exc.DatabaseError:
1054 # Some other process could have created the table meanwhile, which
1055 # usually causes OperationalError or ProgrammingError. We cannot
1056 # use IF NOT EXISTS clause in this case due to PostgreSQL race
1057 # condition on server side which causes IntegrityError. Instead we
1058 # catch these exceptions (they all inherit DatabaseError) and
1059 # re-check whether table is now there.
1060 table = self.getExistingTable(name, spec)
1061 if table is None:
1062 raise
1063 return table
1065 def getExistingTable(self, name: str, spec: ddl.TableSpec) -> sqlalchemy.schema.Table | None:
1066 """Obtain an existing table with the given name and specification.
1068 Parameters
1069 ----------
1070 name : `str`
1071 Name of the table (not including namespace qualifiers).
1072 spec : `TableSpec`
1073 Specification for the table. This will be used when creating the
1074 SQLAlchemy representation of the table, and it is used to
1075 check that the actual table in the database is consistent.
1077 Returns
1078 -------
1079 table : `sqlalchemy.schema.Table` or `None`
1080 SQLAlchemy representation of the table, or `None` if it does not
1081 exist.
1083 Raises
1084 ------
1085 DatabaseConflictError
1086 Raised if the table exists but ``spec`` is inconsistent with its
1087 definition.
1089 Notes
1090 -----
1091 This method can be called within transactions and never modifies the
1092 database.
1094 Subclasses may override this method, but usually should not need to.
1095 """
1096 assert self._metadata is not None, "Static tables must be declared before dynamic tables."
1097 name = self._mangleTableName(name)
1098 table = self._metadata.tables.get(name if self.namespace is None else f"{self.namespace}.{name}")
1099 if table is not None:
1100 if spec.fields.names != set(table.columns.keys()):
1101 raise DatabaseConflictError(
1102 f"Table '{name}' has already been defined differently; the new "
1103 f"specification has columns {list(spec.fields.names)}, while "
1104 f"the previous definition has {list(table.columns.keys())}."
1105 )
1106 else:
1107 inspector = sqlalchemy.inspect(
1108 self._engine if self._session_connection is None else self._session_connection, raiseerr=True
1109 )
1110 if name in inspector.get_table_names(schema=self.namespace):
1111 _checkExistingTableDefinition(name, spec, inspector.get_columns(name, schema=self.namespace))
1112 table = self._convertTableSpec(name, spec, self._metadata)
1113 for foreignKeySpec in spec.foreignKeys:
1114 table.append_constraint(self._convertForeignKeySpec(name, foreignKeySpec, self._metadata))
1115 return table
1116 return table
1118 def _make_temporary_table(
1119 self,
1120 connection: sqlalchemy.engine.Connection,
1121 spec: ddl.TableSpec,
1122 name: str | None = None,
1123 **kwargs: Any,
1124 ) -> sqlalchemy.schema.Table:
1125 """Create a temporary table.
1127 Parameters
1128 ----------
1129 connection : `sqlalchemy.engine.Connection`
1130 Connection to use when creating the table.
1131 spec : `TableSpec`
1132 Specification for the table.
1133 name : `str`, optional
1134 A unique (within this session/connetion) name for the table.
1135 Subclasses may override to modify the actual name used. If not
1136 provided, a unique name will be generated.
1137 **kwargs
1138 Additional keyword arguments to forward to the
1139 `sqlalchemy.schema.Table` constructor. This is provided to make it
1140 easier for derived classes to delegate to ``super()`` while making
1141 only minor changes.
1143 Returns
1144 -------
1145 table : `sqlalchemy.schema.Table`
1146 SQLAlchemy representation of the table.
1147 """
1148 if name is None:
1149 name = f"tmp_{uuid.uuid4().hex}"
1150 metadata = self._metadata
1151 if metadata is None:
1152 raise RuntimeError("Cannot create temporary table before static schema is defined.")
1153 table = self._convertTableSpec(
1154 name, spec, metadata, prefixes=["TEMPORARY"], schema=sqlalchemy.schema.BLANK_SCHEMA, **kwargs
1155 )
1156 if table.key in self._temp_tables:
1157 if table.key != name:
1158 raise ValueError(
1159 f"A temporary table with name {name} (transformed to {table.key} by "
1160 "Database) already exists."
1161 )
1162 for foreignKeySpec in spec.foreignKeys:
1163 table.append_constraint(self._convertForeignKeySpec(name, foreignKeySpec, metadata))
1164 with self._transaction():
1165 table.create(connection)
1166 return table
1168 @classmethod
1169 def getTimespanRepresentation(cls) -> type[TimespanDatabaseRepresentation]:
1170 """Return a `type` that encapsulates the way `Timespan` objects are
1171 stored in this database.
1173 `Database` does not automatically use the return type of this method
1174 anywhere else; calling code is responsible for making sure that DDL
1175 and queries are consistent with it.
1177 Returns
1178 -------
1179 TimespanReprClass : `type` (`TimespanDatabaseRepresention` subclass)
1180 A type that encapsulates the way `Timespan` objects should be
1181 stored in this database.
1183 Notes
1184 -----
1185 There are two big reasons we've decided to keep timespan-mangling logic
1186 outside the `Database` implementations, even though the choice of
1187 representation is ultimately up to a `Database` implementation:
1189 - Timespans appear in relatively few tables and queries in our
1190 typical usage, and the code that operates on them is already aware
1191 that it is working with timespans. In contrast, a
1192 timespan-representation-aware implementation of, say, `insert`,
1193 would need to have extra logic to identify when timespan-mangling
1194 needed to occur, which would usually be useless overhead.
1196 - SQLAlchemy's rich SELECT query expression system has no way to wrap
1197 multiple columns in a single expression object (the ORM does, but
1198 we are not using the ORM). So we would have to wrap _much_ more of
1199 that code in our own interfaces to encapsulate timespan
1200 representations there.
1201 """
1202 return TimespanDatabaseRepresentation.Compound
1204 def sync(
1205 self,
1206 table: sqlalchemy.schema.Table,
1207 *,
1208 keys: dict[str, Any],
1209 compared: dict[str, Any] | None = None,
1210 extra: dict[str, Any] | None = None,
1211 returning: Sequence[str] | None = None,
1212 update: bool = False,
1213 ) -> tuple[dict[str, Any] | None, bool | dict[str, Any]]:
1214 """Insert into a table as necessary to ensure database contains
1215 values equivalent to the given ones.
1217 Parameters
1218 ----------
1219 table : `sqlalchemy.schema.Table`
1220 Table to be queried and possibly inserted into.
1221 keys : `dict`
1222 Column name-value pairs used to search for an existing row; must
1223 be a combination that can be used to select a single row if one
1224 exists. If such a row does not exist, these values are used in
1225 the insert.
1226 compared : `dict`, optional
1227 Column name-value pairs that are compared to those in any existing
1228 row. If such a row does not exist, these rows are used in the
1229 insert.
1230 extra : `dict`, optional
1231 Column name-value pairs that are ignored if a matching row exists,
1232 but used in an insert if one is necessary.
1233 returning : `~collections.abc.Sequence` of `str`, optional
1234 The names of columns whose values should be returned.
1235 update : `bool`, optional
1236 If `True` (`False` is default), update the existing row with the
1237 values in ``compared`` instead of raising `DatabaseConflictError`.
1239 Returns
1240 -------
1241 row : `dict`, optional
1242 The value of the fields indicated by ``returning``, or `None` if
1243 ``returning`` is `None`.
1244 inserted_or_updated : `bool` or `dict`
1245 If `True`, a new row was inserted; if `False`, a matching row
1246 already existed. If a `dict` (only possible if ``update=True``),
1247 then an existing row was updated, and the dict maps the names of
1248 the updated columns to their *old* values (new values can be
1249 obtained from ``compared``).
1251 Raises
1252 ------
1253 DatabaseConflictError
1254 Raised if the values in ``compared`` do not match the values in the
1255 database.
1256 ReadOnlyDatabaseError
1257 Raised if `isWriteable` returns `False`, and no matching record
1258 already exists.
1260 Notes
1261 -----
1262 May be used inside transaction contexts, so implementations may not
1263 perform operations that interrupt transactions.
1265 It may be called on read-only databases if and only if the matching row
1266 does in fact already exist.
1267 """
1269 def check() -> tuple[int, dict[str, Any] | None, list | None]:
1270 """Query for a row that matches the ``key`` argument, and compare
1271 to what was given by the caller.
1273 Returns
1274 -------
1275 n : `int`
1276 Number of matching rows. ``n != 1`` is always an error, but
1277 it's a different kind of error depending on where `check` is
1278 being called.
1279 bad : `dict` or `None`
1280 The subset of the keys of ``compared`` for which the existing
1281 values did not match the given one, mapped to the existing
1282 values in the database. Once again, ``not bad`` is always an
1283 error, but a different kind on context. `None` if ``n != 1``
1284 result : `list` or `None`
1285 Results in the database that correspond to the columns given
1286 in ``returning``, or `None` if ``returning is None``.
1287 """
1288 toSelect: set[str] = set()
1289 if compared is not None:
1290 toSelect.update(compared.keys())
1291 if returning is not None:
1292 toSelect.update(returning)
1293 if not toSelect:
1294 # Need to select some column, even if we just want to see
1295 # how many rows we get back.
1296 toSelect.add(next(iter(keys.keys())))
1297 selectSql = (
1298 sqlalchemy.sql.select(*[table.columns[k].label(k) for k in toSelect])
1299 .select_from(table)
1300 .where(sqlalchemy.sql.and_(*[table.columns[k] == v for k, v in keys.items()]))
1301 )
1302 with self._transaction() as (_, connection):
1303 fetched = list(connection.execute(selectSql).mappings())
1304 if len(fetched) != 1:
1305 return len(fetched), None, None
1306 existing = fetched[0]
1307 if compared is not None:
1309 def safeNotEqual(a: Any, b: Any) -> bool:
1310 if isinstance(a, astropy.time.Time):
1311 return not time_utils.TimeConverter().times_equal(a, b)
1312 return a != b
1314 inconsistencies = {
1315 k: existing[k] for k, v in compared.items() if safeNotEqual(existing[k], v)
1316 }
1317 else:
1318 inconsistencies = {}
1319 if returning is not None:
1320 toReturn: list | None = [existing[k] for k in returning]
1321 else:
1322 toReturn = None
1323 return 1, inconsistencies, toReturn
1325 def format_bad(inconsistencies: dict[str, Any]) -> str:
1326 """Format the 'bad' dictionary of existing values returned by
1327 ``check`` into a string suitable for an error message.
1328 """
1329 assert compared is not None, "Should not be able to get inconsistencies without comparing."
1330 return ", ".join(f"{k}: {v!r} != {compared[k]!r}" for k, v in inconsistencies.items())
1332 if self.isTableWriteable(table):
1333 # Try an insert first, but allow it to fail (in only specific
1334 # ways).
1335 row = keys.copy()
1336 if compared is not None:
1337 row.update(compared)
1338 if extra is not None:
1339 row.update(extra)
1340 with self.transaction():
1341 inserted = bool(self.ensure(table, row))
1342 inserted_or_updated: bool | dict[str, Any]
1343 # Need to perform check() for this branch inside the
1344 # transaction, so we roll back an insert that didn't do
1345 # what we expected. That limits the extent to which we
1346 # can reduce duplication between this block and the other
1347 # ones that perform similar logic.
1348 n, bad, result = check()
1349 if n < 1:
1350 raise ConflictingDefinitionError(
1351 f"Attempted to ensure {row} exists by inserting it with ON CONFLICT IGNORE, "
1352 f"but a post-insert query on {keys} returned no results. "
1353 f"Insert was {'' if inserted else 'not '}reported as successful. "
1354 "This can occur if the insert violated a database constraint other than the "
1355 "unique constraint or primary key used to identify the row in this call."
1356 )
1357 elif n > 1:
1358 raise RuntimeError(
1359 f"Keys passed to sync {keys.keys()} do not comprise a "
1360 f"unique constraint for table {table.name}."
1361 )
1362 elif bad:
1363 assert (
1364 compared is not None
1365 ), "Should not be able to get inconsistencies without comparing."
1366 if inserted:
1367 raise RuntimeError(
1368 f"Conflict ({bad}) in sync after successful insert; this is "
1369 "possible if the same table is being updated by a concurrent "
1370 "process that isn't using sync, but it may also be a bug in "
1371 "daf_butler."
1372 )
1373 elif update:
1374 with self._transaction() as (_, connection):
1375 connection.execute(
1376 table.update()
1377 .where(sqlalchemy.sql.and_(*[table.columns[k] == v for k, v in keys.items()]))
1378 .values(**{k: compared[k] for k in bad.keys()})
1379 )
1380 inserted_or_updated = bad
1381 else:
1382 raise DatabaseConflictError(
1383 f"Conflict in sync for table {table.name} on column(s) {format_bad(bad)}."
1384 )
1385 else:
1386 inserted_or_updated = inserted
1387 else:
1388 # Database is not writeable; just see if the row exists.
1389 n, bad, result = check()
1390 if n < 1:
1391 raise ReadOnlyDatabaseError("sync needs to insert, but database is read-only.")
1392 elif n > 1:
1393 raise RuntimeError("Keys passed to sync do not comprise a unique constraint.")
1394 elif bad:
1395 if update:
1396 raise ReadOnlyDatabaseError("sync needs to update, but database is read-only.")
1397 else:
1398 raise DatabaseConflictError(
1399 f"Conflict in sync for table {table.name} on column(s) {format_bad(bad)}."
1400 )
1401 inserted_or_updated = False
1402 if returning is None:
1403 return None, inserted_or_updated
1404 else:
1405 assert result is not None
1406 return {k: v for k, v in zip(returning, result)}, inserted_or_updated
1408 def insert(
1409 self,
1410 table: sqlalchemy.schema.Table,
1411 *rows: dict,
1412 returnIds: bool = False,
1413 select: sqlalchemy.sql.expression.SelectBase | None = None,
1414 names: Iterable[str] | None = None,
1415 ) -> list[int] | None:
1416 """Insert one or more rows into a table, optionally returning
1417 autoincrement primary key values.
1419 Parameters
1420 ----------
1421 table : `sqlalchemy.schema.Table`
1422 Table rows should be inserted into.
1423 returnIds: `bool`
1424 If `True` (`False` is default), return the values of the table's
1425 autoincrement primary key field (which much exist).
1426 select : `sqlalchemy.sql.SelectBase`, optional
1427 A SELECT query expression to insert rows from. Cannot be provided
1428 with either ``rows`` or ``returnIds=True``.
1429 names : `~collections.abc.Iterable` [ `str` ], optional
1430 Names of columns in ``table`` to be populated, ordered to match the
1431 columns returned by ``select``. Ignored if ``select`` is `None`.
1432 If not provided, the columns returned by ``select`` must be named
1433 to match the desired columns of ``table``.
1434 *rows
1435 Positional arguments are the rows to be inserted, as dictionaries
1436 mapping column name to value. The keys in all dictionaries must
1437 be the same.
1439 Returns
1440 -------
1441 ids : `None`, or `list` of `int`
1442 If ``returnIds`` is `True`, a `list` containing the inserted
1443 values for the table's autoincrement primary key.
1445 Raises
1446 ------
1447 ReadOnlyDatabaseError
1448 Raised if `isWriteable` returns `False` when this method is called.
1450 Notes
1451 -----
1452 The default implementation uses bulk insert syntax when ``returnIds``
1453 is `False`, and a loop over single-row insert operations when it is
1454 `True`.
1456 Derived classes should reimplement when they can provide a more
1457 efficient implementation (especially for the latter case).
1459 May be used inside transaction contexts, so implementations may not
1460 perform operations that interrupt transactions.
1461 """
1462 self.assertTableWriteable(table, f"Cannot insert into read-only table {table}.")
1463 if select is not None and (rows or returnIds):
1464 raise TypeError("'select' is incompatible with passing value rows or returnIds=True.")
1465 if not rows and select is None:
1466 if returnIds:
1467 return []
1468 else:
1469 return None
1470 with self._transaction() as (_, connection):
1471 if not returnIds:
1472 if select is not None:
1473 if names is None:
1474 # columns() is deprecated since 1.4, but
1475 # selected_columns() method did not exist in 1.3.
1476 if hasattr(select, "selected_columns"):
1477 names = select.selected_columns.keys()
1478 else:
1479 names = select.columns.keys()
1480 connection.execute(table.insert().from_select(list(names), select))
1481 else:
1482 connection.execute(table.insert(), rows)
1483 return None
1484 else:
1485 sql = table.insert()
1486 return [connection.execute(sql, row).inserted_primary_key[0] for row in rows]
1488 @abstractmethod
1489 def replace(self, table: sqlalchemy.schema.Table, *rows: dict) -> None:
1490 """Insert one or more rows into a table, replacing any existing rows
1491 for which insertion of a new row would violate the primary key
1492 constraint.
1494 Parameters
1495 ----------
1496 table : `sqlalchemy.schema.Table`
1497 Table rows should be inserted into.
1498 *rows
1499 Positional arguments are the rows to be inserted, as dictionaries
1500 mapping column name to value. The keys in all dictionaries must
1501 be the same.
1503 Raises
1504 ------
1505 ReadOnlyDatabaseError
1506 Raised if `isWriteable` returns `False` when this method is called.
1508 Notes
1509 -----
1510 May be used inside transaction contexts, so implementations may not
1511 perform operations that interrupt transactions.
1513 Implementations should raise a `sqlalchemy.exc.IntegrityError`
1514 exception when a constraint other than the primary key would be
1515 violated.
1517 Implementations are not required to support `replace` on tables
1518 with autoincrement keys.
1519 """
1520 raise NotImplementedError()
1522 @abstractmethod
1523 def ensure(self, table: sqlalchemy.schema.Table, *rows: dict, primary_key_only: bool = False) -> int:
1524 """Insert one or more rows into a table, skipping any rows for which
1525 insertion would violate a unique constraint.
1527 Parameters
1528 ----------
1529 table : `sqlalchemy.schema.Table`
1530 Table rows should be inserted into.
1531 *rows
1532 Positional arguments are the rows to be inserted, as dictionaries
1533 mapping column name to value. The keys in all dictionaries must
1534 be the same.
1535 primary_key_only : `bool`, optional
1536 If `True` (`False` is default), only skip rows that violate the
1537 primary key constraint, and raise an exception (and rollback
1538 transactions) for other constraint violations.
1540 Returns
1541 -------
1542 count : `int`
1543 The number of rows actually inserted.
1545 Raises
1546 ------
1547 ReadOnlyDatabaseError
1548 Raised if `isWriteable` returns `False` when this method is called.
1549 This is raised even if the operation would do nothing even on a
1550 writeable database.
1552 Notes
1553 -----
1554 May be used inside transaction contexts, so implementations may not
1555 perform operations that interrupt transactions.
1557 Implementations are not required to support `ensure` on tables
1558 with autoincrement keys.
1559 """
1560 raise NotImplementedError()
1562 def delete(self, table: sqlalchemy.schema.Table, columns: Iterable[str], *rows: dict) -> int:
1563 """Delete one or more rows from a table.
1565 Parameters
1566 ----------
1567 table : `sqlalchemy.schema.Table`
1568 Table that rows should be deleted from.
1569 columns: `~collections.abc.Iterable` of `str`
1570 The names of columns that will be used to constrain the rows to
1571 be deleted; these will be combined via ``AND`` to form the
1572 ``WHERE`` clause of the delete query.
1573 *rows
1574 Positional arguments are the keys of rows to be deleted, as
1575 dictionaries mapping column name to value. The keys in all
1576 dictionaries must be exactly the names in ``columns``.
1578 Returns
1579 -------
1580 count : `int`
1581 Number of rows deleted.
1583 Raises
1584 ------
1585 ReadOnlyDatabaseError
1586 Raised if `isWriteable` returns `False` when this method is called.
1588 Notes
1589 -----
1590 May be used inside transaction contexts, so implementations may not
1591 perform operations that interrupt transactions.
1593 The default implementation should be sufficient for most derived
1594 classes.
1595 """
1596 self.assertTableWriteable(table, f"Cannot delete from read-only table {table}.")
1597 if columns and not rows:
1598 # If there are no columns, this operation is supposed to delete
1599 # everything (so we proceed as usual). But if there are columns,
1600 # but no rows, it was a constrained bulk operation where the
1601 # constraint is that no rows match, and we should short-circuit
1602 # while reporting that no rows were affected.
1603 return 0
1604 sql = table.delete()
1605 columns = list(columns) # Force iterators to list
1607 # More efficient to use IN operator if there is only one
1608 # variable changing across all rows.
1609 content: dict[str, set] = defaultdict(set)
1610 if len(columns) == 1:
1611 # Nothing to calculate since we can always use IN
1612 column = columns[0]
1613 changing_columns = [column]
1614 content[column] = {row[column] for row in rows}
1615 else:
1616 for row in rows:
1617 for k, v in row.items():
1618 content[k].add(v)
1619 changing_columns = [col for col, values in content.items() if len(values) > 1]
1621 if len(changing_columns) != 1:
1622 # More than one column changes each time so do explicit bind
1623 # parameters and have each row processed separately.
1624 whereTerms = [table.columns[name] == sqlalchemy.sql.bindparam(name) for name in columns]
1625 if whereTerms:
1626 sql = sql.where(sqlalchemy.sql.and_(*whereTerms))
1627 with self._transaction() as (_, connection):
1628 return connection.execute(sql, rows).rowcount
1629 else:
1630 # One of the columns has changing values but any others are
1631 # fixed. In this case we can use an IN operator and be more
1632 # efficient.
1633 name = changing_columns.pop()
1635 # Simple where clause for the unchanging columns
1636 clauses = []
1637 for k, v in content.items():
1638 if k == name:
1639 continue
1640 column = table.columns[k]
1641 # The set only has one element
1642 clauses.append(column == v.pop())
1644 # The IN operator will not work for "infinite" numbers of
1645 # rows so must batch it up into distinct calls.
1646 in_content = list(content[name])
1647 n_elements = len(in_content)
1649 rowcount = 0
1650 iposn = 0
1651 n_per_loop = 1_000 # Controls how many items to put in IN clause
1652 with self._transaction() as (_, connection):
1653 for iposn in range(0, n_elements, n_per_loop):
1654 endpos = iposn + n_per_loop
1655 in_clause = table.columns[name].in_(in_content[iposn:endpos])
1657 newsql = sql.where(sqlalchemy.sql.and_(*clauses, in_clause))
1658 rowcount += connection.execute(newsql).rowcount
1659 return rowcount
1661 def deleteWhere(self, table: sqlalchemy.schema.Table, where: sqlalchemy.sql.ColumnElement) -> int:
1662 """Delete rows from a table with pre-constructed WHERE clause.
1664 Parameters
1665 ----------
1666 table : `sqlalchemy.schema.Table`
1667 Table that rows should be deleted from.
1668 where: `sqlalchemy.sql.ClauseElement`
1669 The names of columns that will be used to constrain the rows to
1670 be deleted; these will be combined via ``AND`` to form the
1671 ``WHERE`` clause of the delete query.
1673 Returns
1674 -------
1675 count : `int`
1676 Number of rows deleted.
1678 Raises
1679 ------
1680 ReadOnlyDatabaseError
1681 Raised if `isWriteable` returns `False` when this method is called.
1683 Notes
1684 -----
1685 May be used inside transaction contexts, so implementations may not
1686 perform operations that interrupt transactions.
1688 The default implementation should be sufficient for most derived
1689 classes.
1690 """
1691 self.assertTableWriteable(table, f"Cannot delete from read-only table {table}.")
1693 sql = table.delete().where(where)
1694 with self._transaction() as (_, connection):
1695 return connection.execute(sql).rowcount
1697 def update(self, table: sqlalchemy.schema.Table, where: dict[str, str], *rows: dict) -> int:
1698 """Update one or more rows in a table.
1700 Parameters
1701 ----------
1702 table : `sqlalchemy.schema.Table`
1703 Table containing the rows to be updated.
1704 where : `dict` [`str`, `str`]
1705 A mapping from the names of columns that will be used to search for
1706 existing rows to the keys that will hold these values in the
1707 ``rows`` dictionaries. Note that these may not be the same due to
1708 SQLAlchemy limitations.
1709 *rows
1710 Positional arguments are the rows to be updated. The keys in all
1711 dictionaries must be the same, and may correspond to either a
1712 value in the ``where`` dictionary or the name of a column to be
1713 updated.
1715 Returns
1716 -------
1717 count : `int`
1718 Number of rows matched (regardless of whether the update actually
1719 modified them).
1721 Raises
1722 ------
1723 ReadOnlyDatabaseError
1724 Raised if `isWriteable` returns `False` when this method is called.
1726 Notes
1727 -----
1728 May be used inside transaction contexts, so implementations may not
1729 perform operations that interrupt transactions.
1731 The default implementation should be sufficient for most derived
1732 classes.
1733 """
1734 self.assertTableWriteable(table, f"Cannot update read-only table {table}.")
1735 if not rows:
1736 return 0
1737 sql = table.update().where(
1738 sqlalchemy.sql.and_(*[table.columns[k] == sqlalchemy.sql.bindparam(v) for k, v in where.items()])
1739 )
1740 with self._transaction() as (_, connection):
1741 return connection.execute(sql, rows).rowcount
1743 @contextmanager
1744 def query(
1745 self,
1746 sql: sqlalchemy.sql.expression.Executable | sqlalchemy.sql.expression.SelectBase,
1747 *args: Any,
1748 **kwargs: Any,
1749 ) -> Iterator[sqlalchemy.engine.CursorResult]:
1750 """Run a SELECT query against the database.
1752 Parameters
1753 ----------
1754 sql : `sqlalchemy.sql.expression.SelectBase`
1755 A SQLAlchemy representation of a ``SELECT`` query.
1756 *args
1757 Additional positional arguments are forwarded to
1758 `sqlalchemy.engine.Connection.execute`.
1759 **kwargs
1760 Additional keyword arguments are forwarded to
1761 `sqlalchemy.engine.Connection.execute`.
1763 Returns
1764 -------
1765 result_context : `sqlalchemy.engine.CursorResults`
1766 Context manager that returns the query result object when entered.
1767 These results are invalidated when the context is exited.
1768 """
1769 if self._session_connection is None:
1770 connection = self._engine.connect()
1771 else:
1772 connection = self._session_connection
1773 # TODO: SelectBase is not good for execute(), but it used everywhere,
1774 # e.g. in daf_relation. We should switch to Executable at some point.
1775 result = connection.execute(cast(sqlalchemy.sql.expression.Executable, sql), *args, **kwargs)
1776 try:
1777 yield result
1778 finally:
1779 if connection is not self._session_connection:
1780 connection.close()
1782 @abstractmethod
1783 def constant_rows(
1784 self,
1785 fields: NamedValueAbstractSet[ddl.FieldSpec],
1786 *rows: dict,
1787 name: str | None = None,
1788 ) -> sqlalchemy.sql.FromClause:
1789 """Return a SQLAlchemy object that represents a small number of
1790 constant-valued rows.
1792 Parameters
1793 ----------
1794 fields : `NamedValueAbstractSet` [ `ddl.FieldSpec` ]
1795 The columns of the rows. Unique and foreign key constraints are
1796 ignored.
1797 *rows : `dict`
1798 Values for the rows.
1799 name : `str`, optional
1800 If provided, the name of the SQL construct. If not provided, an
1801 opaque but unique identifier is generated.
1803 Returns
1804 -------
1805 from_clause : `sqlalchemy.sql.FromClause`
1806 SQLAlchemy object representing the given rows. This is guaranteed
1807 to be something that can be directly joined into a ``SELECT``
1808 query's ``FROM`` clause, and will not involve a temporary table
1809 that needs to be cleaned up later.
1811 Notes
1812 -----
1813 The default implementation uses the SQL-standard ``VALUES`` construct,
1814 but support for that construct is varied enough across popular RDBMSs
1815 that the method is still marked abstract to force explicit opt-in via
1816 delegation to `super`.
1817 """
1818 if name is None:
1819 name = f"tmp_{uuid.uuid4().hex}"
1820 return sqlalchemy.sql.values(
1821 *[sqlalchemy.Column(field.name, field.getSizedColumnType()) for field in fields],
1822 name=name,
1823 ).data([tuple(row[name] for name in fields.names) for row in rows])
1825 def get_constant_rows_max(self) -> int:
1826 """Return the maximum number of rows that should be passed to
1827 `constant_rows` for this backend.
1829 Returns
1830 -------
1831 max : `int`
1832 Maximum number of rows.
1834 Notes
1835 -----
1836 This should reflect typical performance profiles (or a guess at these),
1837 not just hard database engine limits.
1838 """
1839 return 100
1841 origin: int
1842 """An integer ID that should be used as the default for any datasets,
1843 quanta, or other entities that use a (autoincrement, origin) compound
1844 primary key (`int`).
1845 """
1847 namespace: str | None
1848 """The schema or namespace this database instance is associated with
1849 (`str` or `None`).
1850 """