Coverage for python/lsst/daf/butler/registry/interfaces/_database.py: 14%
405 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-08 05:05 -0700
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-08 05:05 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = [
24 "Database",
25 "ReadOnlyDatabaseError",
26 "DatabaseConflictError",
27 "SchemaAlreadyDefinedError",
28 "StaticTablesContext",
29]
31import uuid
32import warnings
33from abc import ABC, abstractmethod
34from collections import defaultdict
35from contextlib import contextmanager
36from typing import (
37 Any,
38 Callable,
39 Dict,
40 Iterable,
41 Iterator,
42 List,
43 Optional,
44 Sequence,
45 Set,
46 Tuple,
47 Type,
48 Union,
49 cast,
50 final,
51)
53import astropy.time
54import sqlalchemy
56from ...core import TimespanDatabaseRepresentation, ddl, time_utils
57from ...core.named import NamedValueAbstractSet
58from .._exceptions import ConflictingDefinitionError
61# TODO: method is called with list[ReflectedColumn] in SA 2, and
62# ReflectedColumn does not exist in 1.4.
63def _checkExistingTableDefinition(name: str, spec: ddl.TableSpec, inspection: list) -> None:
64 """Test that the definition of a table in a `ddl.TableSpec` and from
65 database introspection are consistent.
67 Parameters
68 ----------
69 name : `str`
70 Name of the table (only used in error messages).
71 spec : `ddl.TableSpec`
72 Specification of the table.
73 inspection : `dict`
74 Dictionary returned by
75 `sqlalchemy.engine.reflection.Inspector.get_columns`.
77 Raises
78 ------
79 DatabaseConflictError
80 Raised if the definitions are inconsistent.
81 """
82 columnNames = [c["name"] for c in inspection]
83 if spec.fields.names != set(columnNames):
84 raise DatabaseConflictError(
85 f"Table '{name}' exists but is defined differently in the database; "
86 f"specification has columns {list(spec.fields.names)}, while the "
87 f"table in the database has {columnNames}."
88 )
91class ReadOnlyDatabaseError(RuntimeError):
92 """Exception raised when a write operation is called on a read-only
93 `Database`.
94 """
97class DatabaseConflictError(ConflictingDefinitionError):
98 """Exception raised when database content (row values or schema entities)
99 are inconsistent with what this client expects.
100 """
103class SchemaAlreadyDefinedError(RuntimeError):
104 """Exception raised when trying to initialize database schema when some
105 tables already exist.
106 """
109class StaticTablesContext:
110 """Helper class used to declare the static schema for a registry layer
111 in a database.
113 An instance of this class is returned by `Database.declareStaticTables`,
114 which should be the only way it should be constructed.
115 """
117 def __init__(self, db: Database, connection: sqlalchemy.engine.Connection):
118 self._db = db
119 self._foreignKeys: List[Tuple[sqlalchemy.schema.Table, sqlalchemy.schema.ForeignKeyConstraint]] = []
120 self._inspector = sqlalchemy.inspect(connection)
121 self._tableNames = frozenset(self._inspector.get_table_names(schema=self._db.namespace))
122 self._initializers: List[Callable[[Database], None]] = []
124 def addTable(self, name: str, spec: ddl.TableSpec) -> sqlalchemy.schema.Table:
125 """Add a new table to the schema, returning its sqlalchemy
126 representation.
128 The new table may not actually be created until the end of the
129 context created by `Database.declareStaticTables`, allowing tables
130 to be declared in any order even in the presence of foreign key
131 relationships.
132 """
133 name = self._db._mangleTableName(name)
134 if name in self._tableNames:
135 _checkExistingTableDefinition(
136 name, spec, self._inspector.get_columns(name, schema=self._db.namespace)
137 )
138 metadata = self._db._metadata
139 assert metadata is not None, "Guaranteed by context manager that returns this object."
140 table = self._db._convertTableSpec(name, spec, metadata)
141 for foreignKeySpec in spec.foreignKeys:
142 self._foreignKeys.append((table, self._db._convertForeignKeySpec(name, foreignKeySpec, metadata)))
143 return table
145 def addTableTuple(self, specs: Tuple[ddl.TableSpec, ...]) -> Tuple[sqlalchemy.schema.Table, ...]:
146 """Add a named tuple of tables to the schema, returning their
147 SQLAlchemy representations in a named tuple of the same type.
149 The new tables may not actually be created until the end of the
150 context created by `Database.declareStaticTables`, allowing tables
151 to be declared in any order even in the presence of foreign key
152 relationships.
154 Notes
155 -----
156 ``specs`` *must* be an instance of a type created by
157 `collections.namedtuple`, not just regular tuple, and the returned
158 object is guaranteed to be the same. Because `~collections.namedtuple`
159 is just a factory for `type` objects, not an actual type itself,
160 we cannot represent this with type annotations.
161 """
162 return specs._make( # type: ignore
163 self.addTable(name, spec) for name, spec in zip(specs._fields, specs) # type: ignore
164 )
166 def addInitializer(self, initializer: Callable[[Database], None]) -> None:
167 """Add a method that does one-time initialization of a database.
169 Initialization can mean anything that changes state of a database
170 and needs to be done exactly once after database schema was created.
171 An example for that could be population of schema attributes.
173 Parameters
174 ----------
175 initializer : callable
176 Method of a single argument which is a `Database` instance.
177 """
178 self._initializers.append(initializer)
181class Database(ABC):
182 """An abstract interface that represents a particular database engine's
183 representation of a single schema/namespace/database.
185 Parameters
186 ----------
187 origin : `int`
188 An integer ID that should be used as the default for any datasets,
189 quanta, or other entities that use a (autoincrement, origin) compound
190 primary key.
191 engine : `sqlalchemy.engine.Engine`
192 The SQLAlchemy engine for this `Database`.
193 namespace : `str`, optional
194 Name of the schema or namespace this instance is associated with.
195 This is passed as the ``schema`` argument when constructing a
196 `sqlalchemy.schema.MetaData` instance. We use ``namespace`` instead to
197 avoid confusion between "schema means namespace" and "schema means
198 table definitions".
200 Notes
201 -----
202 `Database` requires all write operations to go through its special named
203 methods. Our write patterns are sufficiently simple that we don't really
204 need the full flexibility of SQL insert/update/delete syntax, and we need
205 non-standard (but common) functionality in these operations sufficiently
206 often that it seems worthwhile to provide our own generic API.
208 In contrast, `Database.query` allows arbitrary ``SELECT`` queries (via
209 their SQLAlchemy representation) to be run, as we expect these to require
210 significantly more sophistication while still being limited to standard
211 SQL.
213 `Database` itself has several underscore-prefixed attributes:
215 - ``_engine``: SQLAlchemy object representing its engine.
216 - ``_connection``: method returning a context manager for
217 `sqlalchemy.engine.Connection` object.
218 - ``_metadata``: the `sqlalchemy.schema.MetaData` object representing
219 the tables and other schema entities.
221 These are considered protected (derived classes may access them, but other
222 code should not), and read-only, aside from executing SQL via
223 ``_connection``.
224 """
226 def __init__(self, *, origin: int, engine: sqlalchemy.engine.Engine, namespace: Optional[str] = None):
227 self.origin = origin
228 self.namespace = namespace
229 self._engine = engine
230 self._session_connection: Optional[sqlalchemy.engine.Connection] = None
231 self._metadata: Optional[sqlalchemy.schema.MetaData] = None
232 self._temp_tables: Set[str] = set()
234 def __repr__(self) -> str:
235 # Rather than try to reproduce all the parameters used to create
236 # the object, instead report the more useful information of the
237 # connection URL.
238 if self._engine.url.password is not None:
239 uri = str(self._engine.url.set(password="***"))
240 else:
241 uri = str(self._engine.url)
242 if self.namespace:
243 uri += f"#{self.namespace}"
244 return f'{type(self).__name__}("{uri}")'
246 @classmethod
247 def makeDefaultUri(cls, root: str) -> Optional[str]:
248 """Create a default connection URI appropriate for the given root
249 directory, or `None` if there can be no such default.
250 """
251 return None
253 @classmethod
254 def fromUri(
255 cls,
256 uri: str | sqlalchemy.engine.URL,
257 *,
258 origin: int,
259 namespace: str | None = None,
260 writeable: bool = True,
261 ) -> Database:
262 """Construct a database from a SQLAlchemy URI.
264 Parameters
265 ----------
266 uri : `str` or `sqlalchemy.engine.URL`
267 A SQLAlchemy URI connection string.
268 origin : `int`
269 An integer ID that should be used as the default for any datasets,
270 quanta, or other entities that use a (autoincrement, origin)
271 compound primary key.
272 namespace : `str`, optional
273 A database namespace (i.e. schema) the new instance should be
274 associated with. If `None` (default), the namespace (if any) is
275 inferred from the URI.
276 writeable : `bool`, optional
277 If `True`, allow write operations on the database, including
278 ``CREATE TABLE``.
280 Returns
281 -------
282 db : `Database`
283 A new `Database` instance.
284 """
285 return cls.fromEngine(
286 cls.makeEngine(uri, writeable=writeable), origin=origin, namespace=namespace, writeable=writeable
287 )
289 @classmethod
290 @abstractmethod
291 def makeEngine(
292 cls, uri: str | sqlalchemy.engine.URL, *, writeable: bool = True
293 ) -> sqlalchemy.engine.Engine:
294 """Create a `sqlalchemy.engine.Engine` from a SQLAlchemy URI.
296 Parameters
297 ----------
298 uri : `str` or `sqlalchemy.engine.URL`
299 A SQLAlchemy URI connection string.
300 writeable : `bool`, optional
301 If `True`, allow write operations on the database, including
302 ``CREATE TABLE``.
304 Returns
305 -------
306 engine : `sqlalchemy.engine.Engine`
307 A database engine.
309 Notes
310 -----
311 Subclasses that support other ways to connect to a database are
312 encouraged to add optional arguments to their implementation of this
313 method, as long as they maintain compatibility with the base class
314 call signature.
315 """
316 raise NotImplementedError()
318 @classmethod
319 @abstractmethod
320 def fromEngine(
321 cls,
322 engine: sqlalchemy.engine.Engine,
323 *,
324 origin: int,
325 namespace: Optional[str] = None,
326 writeable: bool = True,
327 ) -> Database:
328 """Create a new `Database` from an existing `sqlalchemy.engine.Engine`.
330 Parameters
331 ----------
332 engine : `sqlalchemy.engine.Engine`
333 The engine for the database. May be shared between `Database`
334 instances.
335 origin : `int`
336 An integer ID that should be used as the default for any datasets,
337 quanta, or other entities that use a (autoincrement, origin)
338 compound primary key.
339 namespace : `str`, optional
340 A different database namespace (i.e. schema) the new instance
341 should be associated with. If `None` (default), the namespace
342 (if any) is inferred from the connection.
343 writeable : `bool`, optional
344 If `True`, allow write operations on the database, including
345 ``CREATE TABLE``.
347 Returns
348 -------
349 db : `Database`
350 A new `Database` instance.
352 Notes
353 -----
354 This method allows different `Database` instances to share the same
355 engine, which is desirable when they represent different namespaces
356 can be queried together.
357 """
358 raise NotImplementedError()
360 @final
361 @contextmanager
362 def session(self) -> Iterator[None]:
363 """Return a context manager that represents a session (persistent
364 connection to a database).
366 Returns
367 -------
368 context : `AbstractContextManager` [ `None` ]
369 A context manager that does not return a value when entered.
371 Notes
372 -----
373 This method should be used when a sequence of read-only SQL operations
374 will be performed in rapid succession *without* a requirement that they
375 yield consistent results in the presence of concurrent writes (or, more
376 rarely, when conflicting concurrent writes are rare/impossible and the
377 session will be open long enough that a transaction is inadvisable).
378 """
379 with self._session():
380 yield
382 @final
383 @contextmanager
384 def transaction(
385 self,
386 *,
387 interrupting: bool = False,
388 savepoint: bool = False,
389 lock: Iterable[sqlalchemy.schema.Table] = (),
390 for_temp_tables: bool = False,
391 ) -> Iterator[None]:
392 """Return a context manager that represents a transaction.
394 Parameters
395 ----------
396 interrupting : `bool`, optional
397 If `True` (`False` is default), this transaction block may not be
398 nested without an outer one, and attempting to do so is a logic
399 (i.e. assertion) error.
400 savepoint : `bool`, optional
401 If `True` (`False` is default), create a `SAVEPOINT`, allowing
402 exceptions raised by the database (e.g. due to constraint
403 violations) during this transaction's context to be caught outside
404 it without also rolling back all operations in an outer transaction
405 block. If `False`, transactions may still be nested, but a
406 rollback may be generated at any level and affects all levels, and
407 commits are deferred until the outermost block completes. If any
408 outer transaction block was created with ``savepoint=True``, all
409 inner blocks will be as well (regardless of the actual value
410 passed). This has no effect if this is the outermost transaction.
411 lock : `Iterable` [ `sqlalchemy.schema.Table` ], optional
412 A list of tables to lock for the duration of this transaction.
413 These locks are guaranteed to prevent concurrent writes and allow
414 this transaction (only) to acquire the same locks (others should
415 block), but only prevent concurrent reads if the database engine
416 requires that in order to block concurrent writes.
417 for_temp_tables : `bool`, optional
418 If `True`, this transaction may involve creating temporary tables.
420 Returns
421 -------
422 context : `AbstractContextManager` [ `None` ]
423 A context manager that commits the transaction when it is exited
424 without error and rolls back the transactoin when it is exited via
425 an exception.
427 Notes
428 -----
429 All transactions on a connection managed by one or more `Database`
430 instances _must_ go through this method, or transaction state will not
431 be correctly managed.
432 """
433 with self._transaction(
434 interrupting=interrupting, savepoint=savepoint, lock=lock, for_temp_tables=for_temp_tables
435 ):
436 yield
438 @contextmanager
439 def temporary_table(
440 self, spec: ddl.TableSpec, name: Optional[str] = None
441 ) -> Iterator[sqlalchemy.schema.Table]:
442 """Return a context manager that creates and then drops a temporary
443 table.
445 Parameters
446 ----------
447 spec : `ddl.TableSpec`
448 Specification for the columns. Unique and foreign key constraints
449 may be ignored.
450 name : `str`, optional
451 If provided, the name of the SQL construct. If not provided, an
452 opaque but unique identifier is generated.
454 Returns
455 -------
456 context : `AbstractContextManager` [ `sqlalchemy.schema.Table` ]
457 A context manager that returns a SQLAlchemy representation of the
458 temporary table when entered.
460 Notes
461 -----
462 Temporary tables may be created, dropped, and written to even in
463 read-only databases - at least according to the Python-level
464 protections in the `Database` classes. Server permissions may say
465 otherwise, but in that case they probably need to be modified to
466 support the full range of expected read-only butler behavior.
467 """
468 with self._session() as connection:
469 table = self._make_temporary_table(connection, spec=spec, name=name)
470 self._temp_tables.add(table.key)
471 try:
472 yield table
473 finally:
474 with self._transaction():
475 table.drop(connection)
476 self._temp_tables.remove(table.key)
478 @contextmanager
479 def _session(self) -> Iterator[sqlalchemy.engine.Connection]:
480 """Protected implementation for `session` that actually returns the
481 connection.
483 This method is for internal `Database` calls that need the actual
484 SQLAlchemy connection object. It should be overridden by subclasses
485 instead of `session` itself.
487 Returns
488 -------
489 context : `AbstractContextManager` [ `sqlalchemy.engine.Connection` ]
490 A context manager that returns a SQLALchemy connection when
491 entered.
493 """
494 if self._session_connection is not None:
495 # session already started, just reuse that
496 yield self._session_connection
497 else:
498 try:
499 # open new connection and close it when done
500 self._session_connection = self._engine.connect()
501 yield self._session_connection
502 finally:
503 if self._session_connection is not None:
504 self._session_connection.close()
505 self._session_connection = None
506 # Temporary tables only live within session
507 self._temp_tables = set()
509 @contextmanager
510 def _transaction(
511 self,
512 *,
513 interrupting: bool = False,
514 savepoint: bool = False,
515 lock: Iterable[sqlalchemy.schema.Table] = (),
516 for_temp_tables: bool = False,
517 ) -> Iterator[tuple[bool, sqlalchemy.engine.Connection]]:
518 """Protected implementation for `transaction` that actually returns the
519 connection and whether this is a new outermost transaction.
521 This method is for internal `Database` calls that need the actual
522 SQLAlchemy connection object. It should be overridden by subclasses
523 instead of `transaction` itself.
525 Parameters
526 ----------
527 interrupting : `bool`, optional
528 If `True` (`False` is default), this transaction block may not be
529 nested without an outer one, and attempting to do so is a logic
530 (i.e. assertion) error.
531 savepoint : `bool`, optional
532 If `True` (`False` is default), create a `SAVEPOINT`, allowing
533 exceptions raised by the database (e.g. due to constraint
534 violations) during this transaction's context to be caught outside
535 it without also rolling back all operations in an outer transaction
536 block. If `False`, transactions may still be nested, but a
537 rollback may be generated at any level and affects all levels, and
538 commits are deferred until the outermost block completes. If any
539 outer transaction block was created with ``savepoint=True``, all
540 inner blocks will be as well (regardless of the actual value
541 passed). This has no effect if this is the outermost transaction.
542 lock : `Iterable` [ `sqlalchemy.schema.Table` ], optional
543 A list of tables to lock for the duration of this transaction.
544 These locks are guaranteed to prevent concurrent writes and allow
545 this transaction (only) to acquire the same locks (others should
546 block), but only prevent concurrent reads if the database engine
547 requires that in order to block concurrent writes.
548 for_temp_tables : `bool`, optional
549 If `True`, this transaction may involve creating temporary tables.
551 Returns
552 -------
553 context : `AbstractContextManager` [ `tuple` [ `bool`,
554 `sqlalchemy.engine.Connection` ] ]
555 A context manager that commits the transaction when it is exited
556 without error and rolls back the transactoin when it is exited via
557 an exception. When entered, it returns a tuple of:
559 - ``is_new`` (`bool`): whether this is a new (outermost)
560 transaction;
561 - ``connection`` (`sqlalchemy.engine.Connection`): the connection.
562 """
563 with self._session() as connection:
564 already_in_transaction = connection.in_transaction()
565 assert not (interrupting and already_in_transaction), (
566 "Logic error in transaction nesting: an operation that would "
567 "interrupt the active transaction context has been requested."
568 )
569 savepoint = savepoint or connection.in_nested_transaction()
570 trans: sqlalchemy.engine.Transaction | None
571 if already_in_transaction:
572 if savepoint:
573 trans = connection.begin_nested()
574 else:
575 # Nested non-savepoint transactions don't do anything.
576 trans = None
577 else:
578 # Use a regular (non-savepoint) transaction always for the
579 # outermost context.
580 trans = connection.begin()
581 self._lockTables(connection, lock)
582 try:
583 yield not already_in_transaction, connection
584 if trans is not None:
585 trans.commit()
586 except BaseException:
587 if trans is not None:
588 trans.rollback()
589 raise
591 @abstractmethod
592 def _lockTables(
593 self, connection: sqlalchemy.engine.Connection, tables: Iterable[sqlalchemy.schema.Table] = ()
594 ) -> None:
595 """Acquire locks on the given tables.
597 This is an implementation hook for subclasses, called by `transaction`.
598 It should not be called directly by other code.
600 Parameters
601 ----------
602 connection : `sqlalchemy.engine.Connection`
603 Database connection object. It is guaranteed that transaction is
604 already in a progress for this connection.
605 tables : `Iterable` [ `sqlalchemy.schema.Table` ], optional
606 A list of tables to lock for the duration of this transaction.
607 These locks are guaranteed to prevent concurrent writes and allow
608 this transaction (only) to acquire the same locks (others should
609 block), but only prevent concurrent reads if the database engine
610 requires that in order to block concurrent writes.
611 """
612 raise NotImplementedError()
614 def isTableWriteable(self, table: sqlalchemy.schema.Table) -> bool:
615 """Check whether a table is writeable, either because the database
616 connection is read-write or the table is a temporary table.
618 Parameters
619 ----------
620 table : `sqlalchemy.schema.Table`
621 SQLAlchemy table object to check.
623 Returns
624 -------
625 writeable : `bool`
626 Whether this table is writeable.
627 """
628 return self.isWriteable() or table.key in self._temp_tables
630 def assertTableWriteable(self, table: sqlalchemy.schema.Table, msg: str) -> None:
631 """Raise if the given table is not writeable, either because the
632 database connection is read-write or the table is a temporary table.
634 Parameters
635 ----------
636 table : `sqlalchemy.schema.Table`
637 SQLAlchemy table object to check.
638 msg : `str`, optional
639 If provided, raise `ReadOnlyDatabaseError` instead of returning
640 `False`, with this message.
641 """
642 if not self.isTableWriteable(table):
643 raise ReadOnlyDatabaseError(msg)
645 @contextmanager
646 def declareStaticTables(self, *, create: bool) -> Iterator[StaticTablesContext]:
647 """Return a context manager in which the database's static DDL schema
648 can be declared.
650 Parameters
651 ----------
652 create : `bool`
653 If `True`, attempt to create all tables at the end of the context.
654 If `False`, they will be assumed to already exist.
656 Returns
657 -------
658 schema : `StaticTablesContext`
659 A helper object that is used to add new tables.
661 Raises
662 ------
663 ReadOnlyDatabaseError
664 Raised if ``create`` is `True`, `Database.isWriteable` is `False`,
665 and one or more declared tables do not already exist.
667 Examples
668 --------
669 Given a `Database` instance ``db``::
671 with db.declareStaticTables(create=True) as schema:
672 schema.addTable("table1", TableSpec(...))
673 schema.addTable("table2", TableSpec(...))
675 Notes
676 -----
677 A database's static DDL schema must be declared before any dynamic
678 tables are managed via calls to `ensureTableExists` or
679 `getExistingTable`. The order in which static schema tables are added
680 inside the context block is unimportant; they will automatically be
681 sorted and added in an order consistent with their foreign key
682 relationships.
683 """
684 if create and not self.isWriteable():
685 raise ReadOnlyDatabaseError(f"Cannot create tables in read-only database {self}.")
686 self._metadata = sqlalchemy.MetaData(schema=self.namespace)
687 try:
688 with self._transaction() as (_, connection):
689 context = StaticTablesContext(self, connection)
690 if create and context._tableNames:
691 # Looks like database is already initalized, to avoid
692 # danger of modifying/destroying valid schema we refuse to
693 # do anything in this case
694 raise SchemaAlreadyDefinedError(f"Cannot create tables in non-empty database {self}.")
695 yield context
696 for table, foreignKey in context._foreignKeys:
697 table.append_constraint(foreignKey)
698 if create:
699 if self.namespace is not None:
700 if self.namespace not in context._inspector.get_schema_names():
701 connection.execute(sqlalchemy.schema.CreateSchema(self.namespace))
702 # In our tables we have columns that make use of sqlalchemy
703 # Sequence objects. There is currently a bug in sqlalchemy
704 # that causes a deprecation warning to be thrown on a
705 # property of the Sequence object when the repr for the
706 # sequence is created. Here a filter is used to catch these
707 # deprecation warnings when tables are created.
708 with warnings.catch_warnings():
709 warnings.simplefilter("ignore", category=sqlalchemy.exc.SADeprecationWarning)
710 self._metadata.create_all(connection)
711 # call all initializer methods sequentially
712 for init in context._initializers:
713 init(self)
714 except BaseException:
715 self._metadata = None
716 raise
718 @abstractmethod
719 def isWriteable(self) -> bool:
720 """Return `True` if this database can be modified by this client."""
721 raise NotImplementedError()
723 @abstractmethod
724 def __str__(self) -> str:
725 """Return a human-readable identifier for this `Database`, including
726 any namespace or schema that identifies its names within a `Registry`.
727 """
728 raise NotImplementedError()
730 @property
731 def dialect(self) -> sqlalchemy.engine.Dialect:
732 """The SQLAlchemy dialect for this database engine
733 (`sqlalchemy.engine.Dialect`).
734 """
735 return self._engine.dialect
737 def shrinkDatabaseEntityName(self, original: str) -> str:
738 """Return a version of the given name that fits within this database
739 engine's length limits for table, constraint, indexes, and sequence
740 names.
742 Implementations should not assume that simple truncation is safe,
743 because multiple long names often begin with the same prefix.
745 The default implementation simply returns the given name.
747 Parameters
748 ----------
749 original : `str`
750 The original name.
752 Returns
753 -------
754 shrunk : `str`
755 The new, possibly shortened name.
756 """
757 return original
759 def expandDatabaseEntityName(self, shrunk: str) -> str:
760 """Retrieve the original name for a database entity that was too long
761 to fit within the database engine's limits.
763 Parameters
764 ----------
765 original : `str`
766 The original name.
768 Returns
769 -------
770 shrunk : `str`
771 The new, possibly shortened name.
772 """
773 return shrunk
775 def _mangleTableName(self, name: str) -> str:
776 """Map a logical, user-visible table name to the true table name used
777 in the database.
779 The default implementation returns the given name unchanged.
781 Parameters
782 ----------
783 name : `str`
784 Input table name. Should not include a namespace (i.e. schema)
785 prefix.
787 Returns
788 -------
789 mangled : `str`
790 Mangled version of the table name (still with no namespace prefix).
792 Notes
793 -----
794 Reimplementations of this method must be idempotent - mangling an
795 already-mangled name must have no effect.
796 """
797 return name
799 def _makeColumnConstraints(self, table: str, spec: ddl.FieldSpec) -> List[sqlalchemy.CheckConstraint]:
800 """Create constraints based on this spec.
802 Parameters
803 ----------
804 table : `str`
805 Name of the table this column is being added to.
806 spec : `FieldSpec`
807 Specification for the field to be added.
809 Returns
810 -------
811 constraint : `list` of `sqlalchemy.CheckConstraint`
812 Constraint added for this column.
813 """
814 # By default we return no additional constraints
815 return []
817 def _convertFieldSpec(
818 self, table: str, spec: ddl.FieldSpec, metadata: sqlalchemy.MetaData, **kwargs: Any
819 ) -> sqlalchemy.schema.Column:
820 """Convert a `FieldSpec` to a `sqlalchemy.schema.Column`.
822 Parameters
823 ----------
824 table : `str`
825 Name of the table this column is being added to.
826 spec : `FieldSpec`
827 Specification for the field to be added.
828 metadata : `sqlalchemy.MetaData`
829 SQLAlchemy representation of the DDL schema this field's table is
830 being added to.
831 **kwargs
832 Additional keyword arguments to forward to the
833 `sqlalchemy.schema.Column` constructor. This is provided to make
834 it easier for derived classes to delegate to ``super()`` while
835 making only minor changes.
837 Returns
838 -------
839 column : `sqlalchemy.schema.Column`
840 SQLAlchemy representation of the field.
841 """
842 args = []
843 if spec.autoincrement:
844 # Generate a sequence to use for auto incrementing for databases
845 # that do not support it natively. This will be ignored by
846 # sqlalchemy for databases that do support it.
847 args.append(
848 sqlalchemy.Sequence(
849 self.shrinkDatabaseEntityName(f"{table}_seq_{spec.name}"), metadata=metadata
850 )
851 )
852 assert spec.doc is None or isinstance(spec.doc, str), f"Bad doc for {table}.{spec.name}."
853 return sqlalchemy.schema.Column(
854 spec.name,
855 spec.getSizedColumnType(),
856 *args,
857 nullable=spec.nullable,
858 primary_key=spec.primaryKey,
859 comment=spec.doc,
860 server_default=spec.default,
861 **kwargs,
862 )
864 def _convertForeignKeySpec(
865 self, table: str, spec: ddl.ForeignKeySpec, metadata: sqlalchemy.MetaData, **kwargs: Any
866 ) -> sqlalchemy.schema.ForeignKeyConstraint:
867 """Convert a `ForeignKeySpec` to a
868 `sqlalchemy.schema.ForeignKeyConstraint`.
870 Parameters
871 ----------
872 table : `str`
873 Name of the table this foreign key is being added to.
874 spec : `ForeignKeySpec`
875 Specification for the foreign key to be added.
876 metadata : `sqlalchemy.MetaData`
877 SQLAlchemy representation of the DDL schema this constraint is
878 being added to.
879 **kwargs
880 Additional keyword arguments to forward to the
881 `sqlalchemy.schema.ForeignKeyConstraint` constructor. This is
882 provided to make it easier for derived classes to delegate to
883 ``super()`` while making only minor changes.
885 Returns
886 -------
887 constraint : `sqlalchemy.schema.ForeignKeyConstraint`
888 SQLAlchemy representation of the constraint.
889 """
890 name = self.shrinkDatabaseEntityName(
891 "_".join(
892 ["fkey", table, self._mangleTableName(spec.table)] + list(spec.target) + list(spec.source)
893 )
894 )
895 return sqlalchemy.schema.ForeignKeyConstraint(
896 spec.source,
897 [f"{self._mangleTableName(spec.table)}.{col}" for col in spec.target],
898 name=name,
899 ondelete=spec.onDelete,
900 )
902 def _convertExclusionConstraintSpec(
903 self,
904 table: str,
905 spec: Tuple[Union[str, Type[TimespanDatabaseRepresentation]], ...],
906 metadata: sqlalchemy.MetaData,
907 ) -> sqlalchemy.schema.Constraint:
908 """Convert a `tuple` from `ddl.TableSpec.exclusion` into a SQLAlchemy
909 constraint representation.
911 Parameters
912 ----------
913 table : `str`
914 Name of the table this constraint is being added to.
915 spec : `tuple` [ `str` or `type` ]
916 A tuple of `str` column names and the `type` object returned by
917 `getTimespanRepresentation` (which must appear exactly once),
918 indicating the order of the columns in the index used to back the
919 constraint.
920 metadata : `sqlalchemy.MetaData`
921 SQLAlchemy representation of the DDL schema this constraint is
922 being added to.
924 Returns
925 -------
926 constraint : `sqlalchemy.schema.Constraint`
927 SQLAlchemy representation of the constraint.
929 Raises
930 ------
931 NotImplementedError
932 Raised if this database does not support exclusion constraints.
933 """
934 raise NotImplementedError(f"Database {self} does not support exclusion constraints.")
936 def _convertTableSpec(
937 self, name: str, spec: ddl.TableSpec, metadata: sqlalchemy.MetaData, **kwargs: Any
938 ) -> sqlalchemy.schema.Table:
939 """Convert a `TableSpec` to a `sqlalchemy.schema.Table`.
941 Parameters
942 ----------
943 spec : `TableSpec`
944 Specification for the foreign key to be added.
945 metadata : `sqlalchemy.MetaData`
946 SQLAlchemy representation of the DDL schema this table is being
947 added to.
948 **kwargs
949 Additional keyword arguments to forward to the
950 `sqlalchemy.schema.Table` constructor. This is provided to make it
951 easier for derived classes to delegate to ``super()`` while making
952 only minor changes.
954 Returns
955 -------
956 table : `sqlalchemy.schema.Table`
957 SQLAlchemy representation of the table.
959 Notes
960 -----
961 This method does not handle ``spec.foreignKeys`` at all, in order to
962 avoid circular dependencies. These are added by higher-level logic in
963 `ensureTableExists`, `getExistingTable`, and `declareStaticTables`.
964 """
965 name = self._mangleTableName(name)
966 args: list[sqlalchemy.schema.SchemaItem] = [
967 self._convertFieldSpec(name, fieldSpec, metadata) for fieldSpec in spec.fields
968 ]
970 # Add any column constraints
971 for fieldSpec in spec.fields:
972 args.extend(self._makeColumnConstraints(name, fieldSpec))
974 # Track indexes added for primary key and unique constraints, to make
975 # sure we don't add duplicate explicit or foreign key indexes for
976 # those.
977 allIndexes = {tuple(fieldSpec.name for fieldSpec in spec.fields if fieldSpec.primaryKey)}
978 args.extend(
979 sqlalchemy.schema.UniqueConstraint(
980 *columns, name=self.shrinkDatabaseEntityName("_".join([name, "unq"] + list(columns)))
981 )
982 for columns in spec.unique
983 )
984 allIndexes.update(spec.unique)
985 args.extend(
986 sqlalchemy.schema.Index(
987 self.shrinkDatabaseEntityName("_".join([name, "idx"] + list(index.columns))),
988 *index.columns,
989 unique=(index.columns in spec.unique),
990 **index.kwargs,
991 )
992 for index in spec.indexes
993 if index.columns not in allIndexes
994 )
995 allIndexes.update(index.columns for index in spec.indexes)
996 args.extend(
997 sqlalchemy.schema.Index(
998 self.shrinkDatabaseEntityName("_".join((name, "fkidx") + fk.source)),
999 *fk.source,
1000 )
1001 for fk in spec.foreignKeys
1002 if fk.addIndex and fk.source not in allIndexes
1003 )
1005 args.extend(self._convertExclusionConstraintSpec(name, excl, metadata) for excl in spec.exclusion)
1007 assert spec.doc is None or isinstance(spec.doc, str), f"Bad doc for {name}."
1008 return sqlalchemy.schema.Table(name, metadata, *args, comment=spec.doc, info={"spec": spec}, **kwargs)
1010 def ensureTableExists(self, name: str, spec: ddl.TableSpec) -> sqlalchemy.schema.Table:
1011 """Ensure that a table with the given name and specification exists,
1012 creating it if necessary.
1014 Parameters
1015 ----------
1016 name : `str`
1017 Name of the table (not including namespace qualifiers).
1018 spec : `TableSpec`
1019 Specification for the table. This will be used when creating the
1020 table, and *may* be used when obtaining an existing table to check
1021 for consistency, but no such check is guaranteed.
1023 Returns
1024 -------
1025 table : `sqlalchemy.schema.Table`
1026 SQLAlchemy representation of the table.
1028 Raises
1029 ------
1030 ReadOnlyDatabaseError
1031 Raised if `isWriteable` returns `False`, and the table does not
1032 already exist.
1033 DatabaseConflictError
1034 Raised if the table exists but ``spec`` is inconsistent with its
1035 definition.
1037 Notes
1038 -----
1039 This method may not be called within transactions. It may be called on
1040 read-only databases if and only if the table does in fact already
1041 exist.
1043 Subclasses may override this method, but usually should not need to.
1044 """
1045 # TODO: if _engine is used to make a table then it uses separate
1046 # connection and should not interfere with current transaction
1047 assert (
1048 self._session_connection is None or not self._session_connection.in_transaction()
1049 ), "Table creation interrupts transactions."
1050 assert self._metadata is not None, "Static tables must be declared before dynamic tables."
1051 table = self.getExistingTable(name, spec)
1052 if table is not None:
1053 return table
1054 if not self.isWriteable():
1055 raise ReadOnlyDatabaseError(
1056 f"Table {name} does not exist, and cannot be created because database {self} is read-only."
1057 )
1058 table = self._convertTableSpec(name, spec, self._metadata)
1059 for foreignKeySpec in spec.foreignKeys:
1060 table.append_constraint(self._convertForeignKeySpec(name, foreignKeySpec, self._metadata))
1061 try:
1062 with self._transaction() as (_, connection):
1063 table.create(connection)
1064 except sqlalchemy.exc.DatabaseError:
1065 # Some other process could have created the table meanwhile, which
1066 # usually causes OperationalError or ProgrammingError. We cannot
1067 # use IF NOT EXISTS clause in this case due to PostgreSQL race
1068 # condition on server side which causes IntegrityError. Instead we
1069 # catch these exceptions (they all inherit DatabaseError) and
1070 # re-check whether table is now there.
1071 table = self.getExistingTable(name, spec)
1072 if table is None:
1073 raise
1074 return table
1076 def getExistingTable(self, name: str, spec: ddl.TableSpec) -> Optional[sqlalchemy.schema.Table]:
1077 """Obtain an existing table with the given name and specification.
1079 Parameters
1080 ----------
1081 name : `str`
1082 Name of the table (not including namespace qualifiers).
1083 spec : `TableSpec`
1084 Specification for the table. This will be used when creating the
1085 SQLAlchemy representation of the table, and it is used to
1086 check that the actual table in the database is consistent.
1088 Returns
1089 -------
1090 table : `sqlalchemy.schema.Table` or `None`
1091 SQLAlchemy representation of the table, or `None` if it does not
1092 exist.
1094 Raises
1095 ------
1096 DatabaseConflictError
1097 Raised if the table exists but ``spec`` is inconsistent with its
1098 definition.
1100 Notes
1101 -----
1102 This method can be called within transactions and never modifies the
1103 database.
1105 Subclasses may override this method, but usually should not need to.
1106 """
1107 assert self._metadata is not None, "Static tables must be declared before dynamic tables."
1108 name = self._mangleTableName(name)
1109 table = self._metadata.tables.get(name if self.namespace is None else f"{self.namespace}.{name}")
1110 if table is not None:
1111 if spec.fields.names != set(table.columns.keys()):
1112 raise DatabaseConflictError(
1113 f"Table '{name}' has already been defined differently; the new "
1114 f"specification has columns {list(spec.fields.names)}, while "
1115 f"the previous definition has {list(table.columns.keys())}."
1116 )
1117 else:
1118 inspector = sqlalchemy.inspect(
1119 self._engine if self._session_connection is None else self._session_connection, raiseerr=True
1120 )
1121 if name in inspector.get_table_names(schema=self.namespace):
1122 _checkExistingTableDefinition(name, spec, inspector.get_columns(name, schema=self.namespace))
1123 table = self._convertTableSpec(name, spec, self._metadata)
1124 for foreignKeySpec in spec.foreignKeys:
1125 table.append_constraint(self._convertForeignKeySpec(name, foreignKeySpec, self._metadata))
1126 return table
1127 return table
1129 def _make_temporary_table(
1130 self,
1131 connection: sqlalchemy.engine.Connection,
1132 spec: ddl.TableSpec,
1133 name: Optional[str] = None,
1134 **kwargs: Any,
1135 ) -> sqlalchemy.schema.Table:
1136 """Create a temporary table.
1138 Parameters
1139 ----------
1140 connection : `sqlalchemy.engine.Connection`
1141 Connection to use when creating the table.
1142 spec : `TableSpec`
1143 Specification for the table.
1144 name : `str`, optional
1145 A unique (within this session/connetion) name for the table.
1146 Subclasses may override to modify the actual name used. If not
1147 provided, a unique name will be generated.
1148 **kwargs
1149 Additional keyword arguments to forward to the
1150 `sqlalchemy.schema.Table` constructor. This is provided to make it
1151 easier for derived classes to delegate to ``super()`` while making
1152 only minor changes.
1154 Returns
1155 -------
1156 table : `sqlalchemy.schema.Table`
1157 SQLAlchemy representation of the table.
1158 """
1159 if name is None:
1160 name = f"tmp_{uuid.uuid4().hex}"
1161 metadata = self._metadata
1162 if metadata is None:
1163 raise RuntimeError("Cannot create temporary table before static schema is defined.")
1164 table = self._convertTableSpec(
1165 name, spec, metadata, prefixes=["TEMPORARY"], schema=sqlalchemy.schema.BLANK_SCHEMA, **kwargs
1166 )
1167 if table.key in self._temp_tables:
1168 if table.key != name:
1169 raise ValueError(
1170 f"A temporary table with name {name} (transformed to {table.key} by "
1171 "Database) already exists."
1172 )
1173 for foreignKeySpec in spec.foreignKeys:
1174 table.append_constraint(self._convertForeignKeySpec(name, foreignKeySpec, metadata))
1175 with self._transaction():
1176 table.create(connection)
1177 return table
1179 @classmethod
1180 def getTimespanRepresentation(cls) -> Type[TimespanDatabaseRepresentation]:
1181 """Return a `type` that encapsulates the way `Timespan` objects are
1182 stored in this database.
1184 `Database` does not automatically use the return type of this method
1185 anywhere else; calling code is responsible for making sure that DDL
1186 and queries are consistent with it.
1188 Returns
1189 -------
1190 TimespanReprClass : `type` (`TimespanDatabaseRepresention` subclass)
1191 A type that encapsulates the way `Timespan` objects should be
1192 stored in this database.
1194 Notes
1195 -----
1196 There are two big reasons we've decided to keep timespan-mangling logic
1197 outside the `Database` implementations, even though the choice of
1198 representation is ultimately up to a `Database` implementation:
1200 - Timespans appear in relatively few tables and queries in our
1201 typical usage, and the code that operates on them is already aware
1202 that it is working with timespans. In contrast, a
1203 timespan-representation-aware implementation of, say, `insert`,
1204 would need to have extra logic to identify when timespan-mangling
1205 needed to occur, which would usually be useless overhead.
1207 - SQLAlchemy's rich SELECT query expression system has no way to wrap
1208 multiple columns in a single expression object (the ORM does, but
1209 we are not using the ORM). So we would have to wrap _much_ more of
1210 that code in our own interfaces to encapsulate timespan
1211 representations there.
1212 """
1213 return TimespanDatabaseRepresentation.Compound
1215 def sync(
1216 self,
1217 table: sqlalchemy.schema.Table,
1218 *,
1219 keys: Dict[str, Any],
1220 compared: Optional[Dict[str, Any]] = None,
1221 extra: Optional[Dict[str, Any]] = None,
1222 returning: Optional[Sequence[str]] = None,
1223 update: bool = False,
1224 ) -> Tuple[Optional[Dict[str, Any]], Union[bool, Dict[str, Any]]]:
1225 """Insert into a table as necessary to ensure database contains
1226 values equivalent to the given ones.
1228 Parameters
1229 ----------
1230 table : `sqlalchemy.schema.Table`
1231 Table to be queried and possibly inserted into.
1232 keys : `dict`
1233 Column name-value pairs used to search for an existing row; must
1234 be a combination that can be used to select a single row if one
1235 exists. If such a row does not exist, these values are used in
1236 the insert.
1237 compared : `dict`, optional
1238 Column name-value pairs that are compared to those in any existing
1239 row. If such a row does not exist, these rows are used in the
1240 insert.
1241 extra : `dict`, optional
1242 Column name-value pairs that are ignored if a matching row exists,
1243 but used in an insert if one is necessary.
1244 returning : `~collections.abc.Sequence` of `str`, optional
1245 The names of columns whose values should be returned.
1246 update : `bool`, optional
1247 If `True` (`False` is default), update the existing row with the
1248 values in ``compared`` instead of raising `DatabaseConflictError`.
1250 Returns
1251 -------
1252 row : `dict`, optional
1253 The value of the fields indicated by ``returning``, or `None` if
1254 ``returning`` is `None`.
1255 inserted_or_updated : `bool` or `dict`
1256 If `True`, a new row was inserted; if `False`, a matching row
1257 already existed. If a `dict` (only possible if ``update=True``),
1258 then an existing row was updated, and the dict maps the names of
1259 the updated columns to their *old* values (new values can be
1260 obtained from ``compared``).
1262 Raises
1263 ------
1264 DatabaseConflictError
1265 Raised if the values in ``compared`` do not match the values in the
1266 database.
1267 ReadOnlyDatabaseError
1268 Raised if `isWriteable` returns `False`, and no matching record
1269 already exists.
1271 Notes
1272 -----
1273 May be used inside transaction contexts, so implementations may not
1274 perform operations that interrupt transactions.
1276 It may be called on read-only databases if and only if the matching row
1277 does in fact already exist.
1278 """
1280 def check() -> Tuple[int, Optional[Dict[str, Any]], Optional[List]]:
1281 """Query for a row that matches the ``key`` argument, and compare
1282 to what was given by the caller.
1284 Returns
1285 -------
1286 n : `int`
1287 Number of matching rows. ``n != 1`` is always an error, but
1288 it's a different kind of error depending on where `check` is
1289 being called.
1290 bad : `dict` or `None`
1291 The subset of the keys of ``compared`` for which the existing
1292 values did not match the given one, mapped to the existing
1293 values in the database. Once again, ``not bad`` is always an
1294 error, but a different kind on context. `None` if ``n != 1``
1295 result : `list` or `None`
1296 Results in the database that correspond to the columns given
1297 in ``returning``, or `None` if ``returning is None``.
1298 """
1299 toSelect: Set[str] = set()
1300 if compared is not None:
1301 toSelect.update(compared.keys())
1302 if returning is not None:
1303 toSelect.update(returning)
1304 if not toSelect:
1305 # Need to select some column, even if we just want to see
1306 # how many rows we get back.
1307 toSelect.add(next(iter(keys.keys())))
1308 selectSql = (
1309 sqlalchemy.sql.select(*[table.columns[k].label(k) for k in toSelect])
1310 .select_from(table)
1311 .where(sqlalchemy.sql.and_(*[table.columns[k] == v for k, v in keys.items()]))
1312 )
1313 with self._transaction() as (_, connection):
1314 fetched = list(connection.execute(selectSql).mappings())
1315 if len(fetched) != 1:
1316 return len(fetched), None, None
1317 existing = fetched[0]
1318 if compared is not None:
1320 def safeNotEqual(a: Any, b: Any) -> bool:
1321 if isinstance(a, astropy.time.Time):
1322 return not time_utils.TimeConverter().times_equal(a, b)
1323 return a != b
1325 inconsistencies = {
1326 k: existing[k] for k, v in compared.items() if safeNotEqual(existing[k], v)
1327 }
1328 else:
1329 inconsistencies = {}
1330 if returning is not None:
1331 toReturn: Optional[list] = [existing[k] for k in returning]
1332 else:
1333 toReturn = None
1334 return 1, inconsistencies, toReturn
1336 def format_bad(inconsistencies: Dict[str, Any]) -> str:
1337 """Format the 'bad' dictionary of existing values returned by
1338 ``check`` into a string suitable for an error message.
1339 """
1340 assert compared is not None, "Should not be able to get inconsistencies without comparing."
1341 return ", ".join(f"{k}: {v!r} != {compared[k]!r}" for k, v in inconsistencies.items())
1343 if self.isTableWriteable(table):
1344 # Try an insert first, but allow it to fail (in only specific
1345 # ways).
1346 row = keys.copy()
1347 if compared is not None:
1348 row.update(compared)
1349 if extra is not None:
1350 row.update(extra)
1351 with self.transaction():
1352 inserted = bool(self.ensure(table, row))
1353 inserted_or_updated: Union[bool, Dict[str, Any]]
1354 # Need to perform check() for this branch inside the
1355 # transaction, so we roll back an insert that didn't do
1356 # what we expected. That limits the extent to which we
1357 # can reduce duplication between this block and the other
1358 # ones that perform similar logic.
1359 n, bad, result = check()
1360 if n < 1:
1361 raise ConflictingDefinitionError(
1362 f"Attempted to ensure {row} exists by inserting it with ON CONFLICT IGNORE, "
1363 f"but a post-insert query on {keys} returned no results. "
1364 f"Insert was {'' if inserted else 'not '}reported as successful. "
1365 "This can occur if the insert violated a database constraint other than the "
1366 "unique constraint or primary key used to identify the row in this call."
1367 )
1368 elif n > 1:
1369 raise RuntimeError(
1370 f"Keys passed to sync {keys.keys()} do not comprise a "
1371 f"unique constraint for table {table.name}."
1372 )
1373 elif bad:
1374 assert (
1375 compared is not None
1376 ), "Should not be able to get inconsistencies without comparing."
1377 if inserted:
1378 raise RuntimeError(
1379 f"Conflict ({bad}) in sync after successful insert; this is "
1380 "possible if the same table is being updated by a concurrent "
1381 "process that isn't using sync, but it may also be a bug in "
1382 "daf_butler."
1383 )
1384 elif update:
1385 with self._transaction() as (_, connection):
1386 connection.execute(
1387 table.update()
1388 .where(sqlalchemy.sql.and_(*[table.columns[k] == v for k, v in keys.items()]))
1389 .values(**{k: compared[k] for k in bad.keys()})
1390 )
1391 inserted_or_updated = bad
1392 else:
1393 raise DatabaseConflictError(
1394 f"Conflict in sync for table {table.name} on column(s) {format_bad(bad)}."
1395 )
1396 else:
1397 inserted_or_updated = inserted
1398 else:
1399 # Database is not writeable; just see if the row exists.
1400 n, bad, result = check()
1401 if n < 1:
1402 raise ReadOnlyDatabaseError("sync needs to insert, but database is read-only.")
1403 elif n > 1:
1404 raise RuntimeError("Keys passed to sync do not comprise a unique constraint.")
1405 elif bad:
1406 if update:
1407 raise ReadOnlyDatabaseError("sync needs to update, but database is read-only.")
1408 else:
1409 raise DatabaseConflictError(
1410 f"Conflict in sync for table {table.name} on column(s) {format_bad(bad)}."
1411 )
1412 inserted_or_updated = False
1413 if returning is None:
1414 return None, inserted_or_updated
1415 else:
1416 assert result is not None
1417 return {k: v for k, v in zip(returning, result)}, inserted_or_updated
1419 def insert(
1420 self,
1421 table: sqlalchemy.schema.Table,
1422 *rows: dict,
1423 returnIds: bool = False,
1424 select: Optional[sqlalchemy.sql.expression.SelectBase] = None,
1425 names: Optional[Iterable[str]] = None,
1426 ) -> Optional[List[int]]:
1427 """Insert one or more rows into a table, optionally returning
1428 autoincrement primary key values.
1430 Parameters
1431 ----------
1432 table : `sqlalchemy.schema.Table`
1433 Table rows should be inserted into.
1434 returnIds: `bool`
1435 If `True` (`False` is default), return the values of the table's
1436 autoincrement primary key field (which much exist).
1437 select : `sqlalchemy.sql.SelectBase`, optional
1438 A SELECT query expression to insert rows from. Cannot be provided
1439 with either ``rows`` or ``returnIds=True``.
1440 names : `Iterable` [ `str` ], optional
1441 Names of columns in ``table`` to be populated, ordered to match the
1442 columns returned by ``select``. Ignored if ``select`` is `None`.
1443 If not provided, the columns returned by ``select`` must be named
1444 to match the desired columns of ``table``.
1445 *rows
1446 Positional arguments are the rows to be inserted, as dictionaries
1447 mapping column name to value. The keys in all dictionaries must
1448 be the same.
1450 Returns
1451 -------
1452 ids : `None`, or `list` of `int`
1453 If ``returnIds`` is `True`, a `list` containing the inserted
1454 values for the table's autoincrement primary key.
1456 Raises
1457 ------
1458 ReadOnlyDatabaseError
1459 Raised if `isWriteable` returns `False` when this method is called.
1461 Notes
1462 -----
1463 The default implementation uses bulk insert syntax when ``returnIds``
1464 is `False`, and a loop over single-row insert operations when it is
1465 `True`.
1467 Derived classes should reimplement when they can provide a more
1468 efficient implementation (especially for the latter case).
1470 May be used inside transaction contexts, so implementations may not
1471 perform operations that interrupt transactions.
1472 """
1473 self.assertTableWriteable(table, f"Cannot insert into read-only table {table}.")
1474 if select is not None and (rows or returnIds):
1475 raise TypeError("'select' is incompatible with passing value rows or returnIds=True.")
1476 if not rows and select is None:
1477 if returnIds:
1478 return []
1479 else:
1480 return None
1481 with self._transaction() as (_, connection):
1482 if not returnIds:
1483 if select is not None:
1484 if names is None:
1485 # columns() is deprecated since 1.4, but
1486 # selected_columns() method did not exist in 1.3.
1487 if hasattr(select, "selected_columns"):
1488 names = select.selected_columns.keys()
1489 else:
1490 names = select.columns.keys()
1491 connection.execute(table.insert().from_select(list(names), select))
1492 else:
1493 connection.execute(table.insert(), rows)
1494 return None
1495 else:
1496 sql = table.insert()
1497 return [connection.execute(sql, row).inserted_primary_key[0] for row in rows]
1499 @abstractmethod
1500 def replace(self, table: sqlalchemy.schema.Table, *rows: dict) -> None:
1501 """Insert one or more rows into a table, replacing any existing rows
1502 for which insertion of a new row would violate the primary key
1503 constraint.
1505 Parameters
1506 ----------
1507 table : `sqlalchemy.schema.Table`
1508 Table rows should be inserted into.
1509 *rows
1510 Positional arguments are the rows to be inserted, as dictionaries
1511 mapping column name to value. The keys in all dictionaries must
1512 be the same.
1514 Raises
1515 ------
1516 ReadOnlyDatabaseError
1517 Raised if `isWriteable` returns `False` when this method is called.
1519 Notes
1520 -----
1521 May be used inside transaction contexts, so implementations may not
1522 perform operations that interrupt transactions.
1524 Implementations should raise a `sqlalchemy.exc.IntegrityError`
1525 exception when a constraint other than the primary key would be
1526 violated.
1528 Implementations are not required to support `replace` on tables
1529 with autoincrement keys.
1530 """
1531 raise NotImplementedError()
1533 @abstractmethod
1534 def ensure(self, table: sqlalchemy.schema.Table, *rows: dict, primary_key_only: bool = False) -> int:
1535 """Insert one or more rows into a table, skipping any rows for which
1536 insertion would violate a unique constraint.
1538 Parameters
1539 ----------
1540 table : `sqlalchemy.schema.Table`
1541 Table rows should be inserted into.
1542 *rows
1543 Positional arguments are the rows to be inserted, as dictionaries
1544 mapping column name to value. The keys in all dictionaries must
1545 be the same.
1546 primary_key_only : `bool`, optional
1547 If `True` (`False` is default), only skip rows that violate the
1548 primary key constraint, and raise an exception (and rollback
1549 transactions) for other constraint violations.
1551 Returns
1552 -------
1553 count : `int`
1554 The number of rows actually inserted.
1556 Raises
1557 ------
1558 ReadOnlyDatabaseError
1559 Raised if `isWriteable` returns `False` when this method is called.
1560 This is raised even if the operation would do nothing even on a
1561 writeable database.
1563 Notes
1564 -----
1565 May be used inside transaction contexts, so implementations may not
1566 perform operations that interrupt transactions.
1568 Implementations are not required to support `ensure` on tables
1569 with autoincrement keys.
1570 """
1571 raise NotImplementedError()
1573 def delete(self, table: sqlalchemy.schema.Table, columns: Iterable[str], *rows: dict) -> int:
1574 """Delete one or more rows from a table.
1576 Parameters
1577 ----------
1578 table : `sqlalchemy.schema.Table`
1579 Table that rows should be deleted from.
1580 columns: `~collections.abc.Iterable` of `str`
1581 The names of columns that will be used to constrain the rows to
1582 be deleted; these will be combined via ``AND`` to form the
1583 ``WHERE`` clause of the delete query.
1584 *rows
1585 Positional arguments are the keys of rows to be deleted, as
1586 dictionaries mapping column name to value. The keys in all
1587 dictionaries must be exactly the names in ``columns``.
1589 Returns
1590 -------
1591 count : `int`
1592 Number of rows deleted.
1594 Raises
1595 ------
1596 ReadOnlyDatabaseError
1597 Raised if `isWriteable` returns `False` when this method is called.
1599 Notes
1600 -----
1601 May be used inside transaction contexts, so implementations may not
1602 perform operations that interrupt transactions.
1604 The default implementation should be sufficient for most derived
1605 classes.
1606 """
1607 self.assertTableWriteable(table, f"Cannot delete from read-only table {table}.")
1608 if columns and not rows:
1609 # If there are no columns, this operation is supposed to delete
1610 # everything (so we proceed as usual). But if there are columns,
1611 # but no rows, it was a constrained bulk operation where the
1612 # constraint is that no rows match, and we should short-circuit
1613 # while reporting that no rows were affected.
1614 return 0
1615 sql = table.delete()
1616 columns = list(columns) # Force iterators to list
1618 # More efficient to use IN operator if there is only one
1619 # variable changing across all rows.
1620 content: Dict[str, Set] = defaultdict(set)
1621 if len(columns) == 1:
1622 # Nothing to calculate since we can always use IN
1623 column = columns[0]
1624 changing_columns = [column]
1625 content[column] = set(row[column] for row in rows)
1626 else:
1627 for row in rows:
1628 for k, v in row.items():
1629 content[k].add(v)
1630 changing_columns = [col for col, values in content.items() if len(values) > 1]
1632 if len(changing_columns) != 1:
1633 # More than one column changes each time so do explicit bind
1634 # parameters and have each row processed separately.
1635 whereTerms = [table.columns[name] == sqlalchemy.sql.bindparam(name) for name in columns]
1636 if whereTerms:
1637 sql = sql.where(sqlalchemy.sql.and_(*whereTerms))
1638 with self._transaction() as (_, connection):
1639 return connection.execute(sql, rows).rowcount
1640 else:
1641 # One of the columns has changing values but any others are
1642 # fixed. In this case we can use an IN operator and be more
1643 # efficient.
1644 name = changing_columns.pop()
1646 # Simple where clause for the unchanging columns
1647 clauses = []
1648 for k, v in content.items():
1649 if k == name:
1650 continue
1651 column = table.columns[k]
1652 # The set only has one element
1653 clauses.append(column == v.pop())
1655 # The IN operator will not work for "infinite" numbers of
1656 # rows so must batch it up into distinct calls.
1657 in_content = list(content[name])
1658 n_elements = len(in_content)
1660 rowcount = 0
1661 iposn = 0
1662 n_per_loop = 1_000 # Controls how many items to put in IN clause
1663 with self._transaction() as (_, connection):
1664 for iposn in range(0, n_elements, n_per_loop):
1665 endpos = iposn + n_per_loop
1666 in_clause = table.columns[name].in_(in_content[iposn:endpos])
1668 newsql = sql.where(sqlalchemy.sql.and_(*clauses, in_clause))
1669 rowcount += connection.execute(newsql).rowcount
1670 return rowcount
1672 def deleteWhere(self, table: sqlalchemy.schema.Table, where: sqlalchemy.sql.ColumnElement) -> int:
1673 """Delete rows from a table with pre-constructed WHERE clause.
1675 Parameters
1676 ----------
1677 table : `sqlalchemy.schema.Table`
1678 Table that rows should be deleted from.
1679 where: `sqlalchemy.sql.ClauseElement`
1680 The names of columns that will be used to constrain the rows to
1681 be deleted; these will be combined via ``AND`` to form the
1682 ``WHERE`` clause of the delete query.
1684 Returns
1685 -------
1686 count : `int`
1687 Number of rows deleted.
1689 Raises
1690 ------
1691 ReadOnlyDatabaseError
1692 Raised if `isWriteable` returns `False` when this method is called.
1694 Notes
1695 -----
1696 May be used inside transaction contexts, so implementations may not
1697 perform operations that interrupt transactions.
1699 The default implementation should be sufficient for most derived
1700 classes.
1701 """
1702 self.assertTableWriteable(table, f"Cannot delete from read-only table {table}.")
1704 sql = table.delete().where(where)
1705 with self._transaction() as (_, connection):
1706 return connection.execute(sql).rowcount
1708 def update(self, table: sqlalchemy.schema.Table, where: Dict[str, str], *rows: dict) -> int:
1709 """Update one or more rows in a table.
1711 Parameters
1712 ----------
1713 table : `sqlalchemy.schema.Table`
1714 Table containing the rows to be updated.
1715 where : `dict` [`str`, `str`]
1716 A mapping from the names of columns that will be used to search for
1717 existing rows to the keys that will hold these values in the
1718 ``rows`` dictionaries. Note that these may not be the same due to
1719 SQLAlchemy limitations.
1720 *rows
1721 Positional arguments are the rows to be updated. The keys in all
1722 dictionaries must be the same, and may correspond to either a
1723 value in the ``where`` dictionary or the name of a column to be
1724 updated.
1726 Returns
1727 -------
1728 count : `int`
1729 Number of rows matched (regardless of whether the update actually
1730 modified them).
1732 Raises
1733 ------
1734 ReadOnlyDatabaseError
1735 Raised if `isWriteable` returns `False` when this method is called.
1737 Notes
1738 -----
1739 May be used inside transaction contexts, so implementations may not
1740 perform operations that interrupt transactions.
1742 The default implementation should be sufficient for most derived
1743 classes.
1744 """
1745 self.assertTableWriteable(table, f"Cannot update read-only table {table}.")
1746 if not rows:
1747 return 0
1748 sql = table.update().where(
1749 sqlalchemy.sql.and_(*[table.columns[k] == sqlalchemy.sql.bindparam(v) for k, v in where.items()])
1750 )
1751 with self._transaction() as (_, connection):
1752 return connection.execute(sql, rows).rowcount
1754 @contextmanager
1755 def query(
1756 self,
1757 sql: sqlalchemy.sql.expression.Executable | sqlalchemy.sql.expression.SelectBase,
1758 *args: Any,
1759 **kwargs: Any,
1760 ) -> Iterator[sqlalchemy.engine.CursorResult]:
1761 """Run a SELECT query against the database.
1763 Parameters
1764 ----------
1765 sql : `sqlalchemy.sql.expression.SelectBase`
1766 A SQLAlchemy representation of a ``SELECT`` query.
1767 *args
1768 Additional positional arguments are forwarded to
1769 `sqlalchemy.engine.Connection.execute`.
1770 **kwargs
1771 Additional keyword arguments are forwarded to
1772 `sqlalchemy.engine.Connection.execute`.
1774 Returns
1775 -------
1776 result_context : `sqlalchemy.engine.CursorResults`
1777 Context manager that returns the query result object when entered.
1778 These results are invalidated when the context is exited.
1779 """
1780 if self._session_connection is None:
1781 connection = self._engine.connect()
1782 else:
1783 connection = self._session_connection
1784 # TODO: SelectBase is not good for execute(), but it used everywhere,
1785 # e.g. in daf_relation. We should switch to Executable at some point.
1786 result = connection.execute(cast(sqlalchemy.sql.expression.Executable, sql), *args, **kwargs)
1787 try:
1788 yield result
1789 finally:
1790 if connection is not self._session_connection:
1791 connection.close()
1793 @abstractmethod
1794 def constant_rows(
1795 self,
1796 fields: NamedValueAbstractSet[ddl.FieldSpec],
1797 *rows: dict,
1798 name: Optional[str] = None,
1799 ) -> sqlalchemy.sql.FromClause:
1800 """Return a SQLAlchemy object that represents a small number of
1801 constant-valued rows.
1803 Parameters
1804 ----------
1805 fields : `NamedValueAbstractSet` [ `ddl.FieldSpec` ]
1806 The columns of the rows. Unique and foreign key constraints are
1807 ignored.
1808 *rows : `dict`
1809 Values for the rows.
1810 name : `str`, optional
1811 If provided, the name of the SQL construct. If not provided, an
1812 opaque but unique identifier is generated.
1814 Returns
1815 -------
1816 from_clause : `sqlalchemy.sql.FromClause`
1817 SQLAlchemy object representing the given rows. This is guaranteed
1818 to be something that can be directly joined into a ``SELECT``
1819 query's ``FROM`` clause, and will not involve a temporary table
1820 that needs to be cleaned up later.
1822 Notes
1823 -----
1824 The default implementation uses the SQL-standard ``VALUES`` construct,
1825 but support for that construct is varied enough across popular RDBMSs
1826 that the method is still marked abstract to force explicit opt-in via
1827 delegation to `super`.
1828 """
1829 if name is None:
1830 name = f"tmp_{uuid.uuid4().hex}"
1831 return sqlalchemy.sql.values(
1832 *[sqlalchemy.Column(field.name, field.getSizedColumnType()) for field in fields],
1833 name=name,
1834 ).data([tuple(row[name] for name in fields.names) for row in rows])
1836 def get_constant_rows_max(self) -> int:
1837 """Return the maximum number of rows that should be passed to
1838 `constant_rows` for this backend.
1840 Returns
1841 -------
1842 max : `int`
1843 Maximum number of rows.
1845 Notes
1846 -----
1847 This should reflect typical performance profiles (or a guess at these),
1848 not just hard database engine limits.
1849 """
1850 return 100
1852 origin: int
1853 """An integer ID that should be used as the default for any datasets,
1854 quanta, or other entities that use a (autoincrement, origin) compound
1855 primary key (`int`).
1856 """
1858 namespace: Optional[str]
1859 """The schema or namespace this database instance is associated with
1860 (`str` or `None`).
1861 """