Coverage for python/lsst/daf/butler/registry/interfaces/_database.py: 14%
413 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-02-28 10:37 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2023-02-28 10:37 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = [
24 "Database",
25 "ReadOnlyDatabaseError",
26 "DatabaseConflictError",
27 "SchemaAlreadyDefinedError",
28 "StaticTablesContext",
29]
31import uuid
32import warnings
33from abc import ABC, abstractmethod
34from collections import defaultdict
35from contextlib import contextmanager
36from typing import (
37 Any,
38 Callable,
39 Dict,
40 Iterable,
41 Iterator,
42 List,
43 Optional,
44 Sequence,
45 Set,
46 Tuple,
47 Type,
48 Union,
49 cast,
50 final,
51)
53import astropy.time
54import sqlalchemy
56from ...core import TimespanDatabaseRepresentation, ddl, time_utils
57from ...core.named import NamedValueAbstractSet
58from .._exceptions import ConflictingDefinitionError
61# TODO: method is called with list[ReflectedColumn] in SA 2, and
62# ReflectedColumn does not exist in 1.4.
63def _checkExistingTableDefinition(name: str, spec: ddl.TableSpec, inspection: list) -> None:
64 """Test that the definition of a table in a `ddl.TableSpec` and from
65 database introspection are consistent.
67 Parameters
68 ----------
69 name : `str`
70 Name of the table (only used in error messages).
71 spec : `ddl.TableSpec`
72 Specification of the table.
73 inspection : `dict`
74 Dictionary returned by
75 `sqlalchemy.engine.reflection.Inspector.get_columns`.
77 Raises
78 ------
79 DatabaseConflictError
80 Raised if the definitions are inconsistent.
81 """
82 columnNames = [c["name"] for c in inspection]
83 if spec.fields.names != set(columnNames):
84 raise DatabaseConflictError(
85 f"Table '{name}' exists but is defined differently in the database; "
86 f"specification has columns {list(spec.fields.names)}, while the "
87 f"table in the database has {columnNames}."
88 )
91class ReadOnlyDatabaseError(RuntimeError):
92 """Exception raised when a write operation is called on a read-only
93 `Database`.
94 """
97class DatabaseConflictError(ConflictingDefinitionError):
98 """Exception raised when database content (row values or schema entities)
99 are inconsistent with what this client expects.
100 """
103class SchemaAlreadyDefinedError(RuntimeError):
104 """Exception raised when trying to initialize database schema when some
105 tables already exist.
106 """
109class StaticTablesContext:
110 """Helper class used to declare the static schema for a registry layer
111 in a database.
113 An instance of this class is returned by `Database.declareStaticTables`,
114 which should be the only way it should be constructed.
115 """
117 def __init__(self, db: Database, connection: sqlalchemy.engine.Connection):
118 self._db = db
119 self._foreignKeys: List[Tuple[sqlalchemy.schema.Table, sqlalchemy.schema.ForeignKeyConstraint]] = []
120 self._inspector = sqlalchemy.inspect(connection)
121 self._tableNames = frozenset(self._inspector.get_table_names(schema=self._db.namespace))
122 self._initializers: List[Callable[[Database], None]] = []
124 def addTable(self, name: str, spec: ddl.TableSpec) -> sqlalchemy.schema.Table:
125 """Add a new table to the schema, returning its sqlalchemy
126 representation.
128 The new table may not actually be created until the end of the
129 context created by `Database.declareStaticTables`, allowing tables
130 to be declared in any order even in the presence of foreign key
131 relationships.
132 """
133 name = self._db._mangleTableName(name)
134 if name in self._tableNames:
135 _checkExistingTableDefinition(
136 name, spec, self._inspector.get_columns(name, schema=self._db.namespace)
137 )
138 metadata = self._db._metadata
139 assert metadata is not None, "Guaranteed by context manager that returns this object."
140 table = self._db._convertTableSpec(name, spec, metadata)
141 for foreignKeySpec in spec.foreignKeys:
142 self._foreignKeys.append((table, self._db._convertForeignKeySpec(name, foreignKeySpec, metadata)))
143 return table
145 def addTableTuple(self, specs: Tuple[ddl.TableSpec, ...]) -> Tuple[sqlalchemy.schema.Table, ...]:
146 """Add a named tuple of tables to the schema, returning their
147 SQLAlchemy representations in a named tuple of the same type.
149 The new tables may not actually be created until the end of the
150 context created by `Database.declareStaticTables`, allowing tables
151 to be declared in any order even in the presence of foreign key
152 relationships.
154 Notes
155 -----
156 ``specs`` *must* be an instance of a type created by
157 `collections.namedtuple`, not just regular tuple, and the returned
158 object is guaranteed to be the same. Because `~collections.namedtuple`
159 is just a factory for `type` objects, not an actual type itself,
160 we cannot represent this with type annotations.
161 """
162 return specs._make( # type: ignore
163 self.addTable(name, spec) for name, spec in zip(specs._fields, specs) # type: ignore
164 )
166 def addInitializer(self, initializer: Callable[[Database], None]) -> None:
167 """Add a method that does one-time initialization of a database.
169 Initialization can mean anything that changes state of a database
170 and needs to be done exactly once after database schema was created.
171 An example for that could be population of schema attributes.
173 Parameters
174 ----------
175 initializer : callable
176 Method of a single argument which is a `Database` instance.
177 """
178 self._initializers.append(initializer)
181class Database(ABC):
182 """An abstract interface that represents a particular database engine's
183 representation of a single schema/namespace/database.
185 Parameters
186 ----------
187 origin : `int`
188 An integer ID that should be used as the default for any datasets,
189 quanta, or other entities that use a (autoincrement, origin) compound
190 primary key.
191 engine : `sqlalchemy.engine.Engine`
192 The SQLAlchemy engine for this `Database`.
193 namespace : `str`, optional
194 Name of the schema or namespace this instance is associated with.
195 This is passed as the ``schema`` argument when constructing a
196 `sqlalchemy.schema.MetaData` instance. We use ``namespace`` instead to
197 avoid confusion between "schema means namespace" and "schema means
198 table definitions".
200 Notes
201 -----
202 `Database` requires all write operations to go through its special named
203 methods. Our write patterns are sufficiently simple that we don't really
204 need the full flexibility of SQL insert/update/delete syntax, and we need
205 non-standard (but common) functionality in these operations sufficiently
206 often that it seems worthwhile to provide our own generic API.
208 In contrast, `Database.query` allows arbitrary ``SELECT`` queries (via
209 their SQLAlchemy representation) to be run, as we expect these to require
210 significantly more sophistication while still being limited to standard
211 SQL.
213 `Database` itself has several underscore-prefixed attributes:
215 - ``_engine``: SQLAlchemy object representing its engine.
216 - ``_connection``: method returning a context manager for
217 `sqlalchemy.engine.Connection` object.
218 - ``_metadata``: the `sqlalchemy.schema.MetaData` object representing
219 the tables and other schema entities.
221 These are considered protected (derived classes may access them, but other
222 code should not), and read-only, aside from executing SQL via
223 ``_connection``.
224 """
226 def __init__(self, *, origin: int, engine: sqlalchemy.engine.Engine, namespace: Optional[str] = None):
227 self.origin = origin
228 self.namespace = namespace
229 self._engine = engine
230 self._session_connection: Optional[sqlalchemy.engine.Connection] = None
231 self._metadata: Optional[sqlalchemy.schema.MetaData] = None
232 self._temp_tables: Set[str] = set()
234 def __repr__(self) -> str:
235 # Rather than try to reproduce all the parameters used to create
236 # the object, instead report the more useful information of the
237 # connection URL.
238 if self._engine.url.password is not None:
239 uri = str(self._engine.url.set(password="***"))
240 else:
241 uri = str(self._engine.url)
242 if self.namespace:
243 uri += f"#{self.namespace}"
244 return f'{type(self).__name__}("{uri}")'
246 @classmethod
247 def makeDefaultUri(cls, root: str) -> Optional[str]:
248 """Create a default connection URI appropriate for the given root
249 directory, or `None` if there can be no such default.
250 """
251 return None
253 @classmethod
254 def fromUri(
255 cls, uri: str, *, origin: int, namespace: Optional[str] = None, writeable: bool = True
256 ) -> Database:
257 """Construct a database from a SQLAlchemy URI.
259 Parameters
260 ----------
261 uri : `str`
262 A SQLAlchemy URI connection string.
263 origin : `int`
264 An integer ID that should be used as the default for any datasets,
265 quanta, or other entities that use a (autoincrement, origin)
266 compound primary key.
267 namespace : `str`, optional
268 A database namespace (i.e. schema) the new instance should be
269 associated with. If `None` (default), the namespace (if any) is
270 inferred from the URI.
271 writeable : `bool`, optional
272 If `True`, allow write operations on the database, including
273 ``CREATE TABLE``.
275 Returns
276 -------
277 db : `Database`
278 A new `Database` instance.
279 """
280 return cls.fromEngine(
281 cls.makeEngine(uri, writeable=writeable), origin=origin, namespace=namespace, writeable=writeable
282 )
284 @classmethod
285 @abstractmethod
286 def makeEngine(cls, uri: str, *, writeable: bool = True) -> sqlalchemy.engine.Engine:
287 """Create a `sqlalchemy.engine.Engine` from a SQLAlchemy URI.
289 Parameters
290 ----------
291 uri : `str`
292 A SQLAlchemy URI connection string.
293 writeable : `bool`, optional
294 If `True`, allow write operations on the database, including
295 ``CREATE TABLE``.
297 Returns
298 -------
299 engine : `sqlalchemy.engine.Engine`
300 A database engine.
302 Notes
303 -----
304 Subclasses that support other ways to connect to a database are
305 encouraged to add optional arguments to their implementation of this
306 method, as long as they maintain compatibility with the base class
307 call signature.
308 """
309 raise NotImplementedError()
311 @classmethod
312 @abstractmethod
313 def fromEngine(
314 cls,
315 engine: sqlalchemy.engine.Engine,
316 *,
317 origin: int,
318 namespace: Optional[str] = None,
319 writeable: bool = True,
320 ) -> Database:
321 """Create a new `Database` from an existing `sqlalchemy.engine.Engine`.
323 Parameters
324 ----------
325 engine : `sqlalchemy.engine.Engine`
326 The engine for the database. May be shared between `Database`
327 instances.
328 origin : `int`
329 An integer ID that should be used as the default for any datasets,
330 quanta, or other entities that use a (autoincrement, origin)
331 compound primary key.
332 namespace : `str`, optional
333 A different database namespace (i.e. schema) the new instance
334 should be associated with. If `None` (default), the namespace
335 (if any) is inferred from the connection.
336 writeable : `bool`, optional
337 If `True`, allow write operations on the database, including
338 ``CREATE TABLE``.
340 Returns
341 -------
342 db : `Database`
343 A new `Database` instance.
345 Notes
346 -----
347 This method allows different `Database` instances to share the same
348 engine, which is desirable when they represent different namespaces
349 can be queried together.
350 """
351 raise NotImplementedError()
353 @final
354 @contextmanager
355 def session(self) -> Iterator[None]:
356 """Return a context manager that represents a session (persistent
357 connection to a database).
359 Returns
360 -------
361 context : `AbstractContextManager` [ `None` ]
362 A context manager that does not return a value when entered.
364 Notes
365 -----
366 This method should be used when a sequence of read-only SQL operations
367 will be performed in rapid succession *without* a requirement that they
368 yield consistent results in the presence of concurrent writes (or, more
369 rarely, when conflicting concurrent writes are rare/impossible and the
370 session will be open long enough that a transaction is inadvisable).
371 """
372 with self._session():
373 yield
375 @final
376 @contextmanager
377 def transaction(
378 self,
379 *,
380 interrupting: bool = False,
381 savepoint: bool = False,
382 lock: Iterable[sqlalchemy.schema.Table] = (),
383 for_temp_tables: bool = False,
384 ) -> Iterator[None]:
385 """Return a context manager that represents a transaction.
387 Parameters
388 ----------
389 interrupting : `bool`, optional
390 If `True` (`False` is default), this transaction block may not be
391 nested without an outer one, and attempting to do so is a logic
392 (i.e. assertion) error.
393 savepoint : `bool`, optional
394 If `True` (`False` is default), create a `SAVEPOINT`, allowing
395 exceptions raised by the database (e.g. due to constraint
396 violations) during this transaction's context to be caught outside
397 it without also rolling back all operations in an outer transaction
398 block. If `False`, transactions may still be nested, but a
399 rollback may be generated at any level and affects all levels, and
400 commits are deferred until the outermost block completes. If any
401 outer transaction block was created with ``savepoint=True``, all
402 inner blocks will be as well (regardless of the actual value
403 passed). This has no effect if this is the outermost transaction.
404 lock : `Iterable` [ `sqlalchemy.schema.Table` ], optional
405 A list of tables to lock for the duration of this transaction.
406 These locks are guaranteed to prevent concurrent writes and allow
407 this transaction (only) to acquire the same locks (others should
408 block), but only prevent concurrent reads if the database engine
409 requires that in order to block concurrent writes.
410 for_temp_tables : `bool`, optional
411 If `True`, this transaction may involve creating temporary tables.
413 Returns
414 -------
415 context : `AbstractContextManager` [ `None` ]
416 A context manager that commits the transaction when it is exited
417 without error and rolls back the transactoin when it is exited via
418 an exception.
420 Notes
421 -----
422 All transactions on a connection managed by one or more `Database`
423 instances _must_ go through this method, or transaction state will not
424 be correctly managed.
425 """
426 with self._transaction(
427 interrupting=interrupting, savepoint=savepoint, lock=lock, for_temp_tables=for_temp_tables
428 ):
429 yield
431 @contextmanager
432 def temporary_table(
433 self, spec: ddl.TableSpec, name: Optional[str] = None
434 ) -> Iterator[sqlalchemy.schema.Table]:
435 """Return a context manager that creates and then drops a temporary
436 table.
438 Parameters
439 ----------
440 spec : `ddl.TableSpec`
441 Specification for the columns. Unique and foreign key constraints
442 may be ignored.
443 name : `str`, optional
444 If provided, the name of the SQL construct. If not provided, an
445 opaque but unique identifier is generated.
447 Returns
448 -------
449 context : `AbstractContextManager` [ `sqlalchemy.schema.Table` ]
450 A context manager that returns a SQLAlchemy representation of the
451 temporary table when entered.
453 Notes
454 -----
455 Temporary tables may be created, dropped, and written to even in
456 read-only databases - at least according to the Python-level
457 protections in the `Database` classes. Server permissions may say
458 otherwise, but in that case they probably need to be modified to
459 support the full range of expected read-only butler behavior.
460 """
461 with self._session() as connection:
462 table = self._make_temporary_table(connection, spec=spec, name=name)
463 self._temp_tables.add(table.key)
464 try:
465 yield table
466 finally:
467 with self._transaction():
468 table.drop(connection)
469 self._temp_tables.remove(table.key)
471 @contextmanager
472 def _session(self) -> Iterator[sqlalchemy.engine.Connection]:
473 """Protected implementation for `session` that actually returns the
474 connection.
476 This method is for internal `Database` calls that need the actual
477 SQLAlchemy connection object. It should be overridden by subclasses
478 instead of `session` itself.
480 Returns
481 -------
482 context : `AbstractContextManager` [ `sqlalchemy.engine.Connection` ]
483 A context manager that returns a SQLALchemy connection when
484 entered.
486 """
487 if self._session_connection is not None:
488 # session already started, just reuse that
489 yield self._session_connection
490 else:
491 try:
492 # open new connection and close it when done
493 self._session_connection = self._engine.connect()
494 yield self._session_connection
495 finally:
496 if self._session_connection is not None:
497 self._session_connection.close()
498 self._session_connection = None
499 # Temporary tables only live within session
500 self._temp_tables = set()
502 @contextmanager
503 def _transaction(
504 self,
505 *,
506 interrupting: bool = False,
507 savepoint: bool = False,
508 lock: Iterable[sqlalchemy.schema.Table] = (),
509 for_temp_tables: bool = False,
510 ) -> Iterator[tuple[bool, sqlalchemy.engine.Connection]]:
511 """Protected implementation for `transaction` that actually returns the
512 connection and whether this is a new outermost transaction.
514 This method is for internal `Database` calls that need the actual
515 SQLAlchemy connection object. It should be overridden by subclasses
516 instead of `transaction` itself.
518 Parameters
519 ----------
520 interrupting : `bool`, optional
521 If `True` (`False` is default), this transaction block may not be
522 nested without an outer one, and attempting to do so is a logic
523 (i.e. assertion) error.
524 savepoint : `bool`, optional
525 If `True` (`False` is default), create a `SAVEPOINT`, allowing
526 exceptions raised by the database (e.g. due to constraint
527 violations) during this transaction's context to be caught outside
528 it without also rolling back all operations in an outer transaction
529 block. If `False`, transactions may still be nested, but a
530 rollback may be generated at any level and affects all levels, and
531 commits are deferred until the outermost block completes. If any
532 outer transaction block was created with ``savepoint=True``, all
533 inner blocks will be as well (regardless of the actual value
534 passed). This has no effect if this is the outermost transaction.
535 lock : `Iterable` [ `sqlalchemy.schema.Table` ], optional
536 A list of tables to lock for the duration of this transaction.
537 These locks are guaranteed to prevent concurrent writes and allow
538 this transaction (only) to acquire the same locks (others should
539 block), but only prevent concurrent reads if the database engine
540 requires that in order to block concurrent writes.
541 for_temp_tables : `bool`, optional
542 If `True`, this transaction may involve creating temporary tables.
544 Returns
545 -------
546 context : `AbstractContextManager` [ `tuple` [ `bool`,
547 `sqlalchemy.engine.Connection` ] ]
548 A context manager that commits the transaction when it is exited
549 without error and rolls back the transactoin when it is exited via
550 an exception. When entered, it returns a tuple of:
552 - ``is_new`` (`bool`): whether this is a new (outermost)
553 transaction;
554 - ``connection`` (`sqlalchemy.engine.Connection`): the connection.
555 """
556 with self._session() as connection:
557 already_in_transaction = connection.in_transaction()
558 assert not (interrupting and already_in_transaction), (
559 "Logic error in transaction nesting: an operation that would "
560 "interrupt the active transaction context has been requested."
561 )
562 savepoint = savepoint or connection.in_nested_transaction()
563 trans: sqlalchemy.engine.Transaction | None
564 if already_in_transaction:
565 if savepoint:
566 trans = connection.begin_nested()
567 else:
568 # Nested non-savepoint transactions don't do anything.
569 trans = None
570 else:
571 # Use a regular (non-savepoint) transaction always for the
572 # outermost context.
573 trans = connection.begin()
574 self._lockTables(connection, lock)
575 try:
576 yield not already_in_transaction, connection
577 if trans is not None:
578 trans.commit()
579 except BaseException:
580 if trans is not None:
581 trans.rollback()
582 raise
584 @abstractmethod
585 def _lockTables(
586 self, connection: sqlalchemy.engine.Connection, tables: Iterable[sqlalchemy.schema.Table] = ()
587 ) -> None:
588 """Acquire locks on the given tables.
590 This is an implementation hook for subclasses, called by `transaction`.
591 It should not be called directly by other code.
593 Parameters
594 ----------
595 connection : `sqlalchemy.engine.Connection`
596 Database connection object. It is guaranteed that transaction is
597 already in a progress for this connection.
598 tables : `Iterable` [ `sqlalchemy.schema.Table` ], optional
599 A list of tables to lock for the duration of this transaction.
600 These locks are guaranteed to prevent concurrent writes and allow
601 this transaction (only) to acquire the same locks (others should
602 block), but only prevent concurrent reads if the database engine
603 requires that in order to block concurrent writes.
604 """
605 raise NotImplementedError()
607 def isTableWriteable(self, table: sqlalchemy.schema.Table) -> bool:
608 """Check whether a table is writeable, either because the database
609 connection is read-write or the table is a temporary table.
611 Parameters
612 ----------
613 table : `sqlalchemy.schema.Table`
614 SQLAlchemy table object to check.
616 Returns
617 -------
618 writeable : `bool`
619 Whether this table is writeable.
620 """
621 return self.isWriteable() or table.key in self._temp_tables
623 def assertTableWriteable(self, table: sqlalchemy.schema.Table, msg: str) -> None:
624 """Raise if the given table is not writeable, either because the
625 database connection is read-write or the table is a temporary table.
627 Parameters
628 ----------
629 table : `sqlalchemy.schema.Table`
630 SQLAlchemy table object to check.
631 msg : `str`, optional
632 If provided, raise `ReadOnlyDatabaseError` instead of returning
633 `False`, with this message.
634 """
635 if not self.isTableWriteable(table):
636 raise ReadOnlyDatabaseError(msg)
638 @contextmanager
639 def declareStaticTables(self, *, create: bool) -> Iterator[StaticTablesContext]:
640 """Return a context manager in which the database's static DDL schema
641 can be declared.
643 Parameters
644 ----------
645 create : `bool`
646 If `True`, attempt to create all tables at the end of the context.
647 If `False`, they will be assumed to already exist.
649 Returns
650 -------
651 schema : `StaticTablesContext`
652 A helper object that is used to add new tables.
654 Raises
655 ------
656 ReadOnlyDatabaseError
657 Raised if ``create`` is `True`, `Database.isWriteable` is `False`,
658 and one or more declared tables do not already exist.
660 Examples
661 --------
662 Given a `Database` instance ``db``::
664 with db.declareStaticTables(create=True) as schema:
665 schema.addTable("table1", TableSpec(...))
666 schema.addTable("table2", TableSpec(...))
668 Notes
669 -----
670 A database's static DDL schema must be declared before any dynamic
671 tables are managed via calls to `ensureTableExists` or
672 `getExistingTable`. The order in which static schema tables are added
673 inside the context block is unimportant; they will automatically be
674 sorted and added in an order consistent with their foreign key
675 relationships.
676 """
677 if create and not self.isWriteable():
678 raise ReadOnlyDatabaseError(f"Cannot create tables in read-only database {self}.")
679 self._metadata = sqlalchemy.MetaData(schema=self.namespace)
680 try:
681 with self._transaction() as (_, connection):
682 context = StaticTablesContext(self, connection)
683 if create and context._tableNames:
684 # Looks like database is already initalized, to avoid
685 # danger of modifying/destroying valid schema we refuse to
686 # do anything in this case
687 raise SchemaAlreadyDefinedError(f"Cannot create tables in non-empty database {self}.")
688 yield context
689 for table, foreignKey in context._foreignKeys:
690 table.append_constraint(foreignKey)
691 if create:
692 if self.namespace is not None:
693 if self.namespace not in context._inspector.get_schema_names():
694 connection.execute(sqlalchemy.schema.CreateSchema(self.namespace))
695 # In our tables we have columns that make use of sqlalchemy
696 # Sequence objects. There is currently a bug in sqlalchemy
697 # that causes a deprecation warning to be thrown on a
698 # property of the Sequence object when the repr for the
699 # sequence is created. Here a filter is used to catch these
700 # deprecation warnings when tables are created.
701 with warnings.catch_warnings():
702 warnings.simplefilter("ignore", category=sqlalchemy.exc.SADeprecationWarning)
703 self._metadata.create_all(connection)
704 # call all initializer methods sequentially
705 for init in context._initializers:
706 init(self)
707 except BaseException:
708 self._metadata = None
709 raise
711 @abstractmethod
712 def isWriteable(self) -> bool:
713 """Return `True` if this database can be modified by this client."""
714 raise NotImplementedError()
716 @abstractmethod
717 def __str__(self) -> str:
718 """Return a human-readable identifier for this `Database`, including
719 any namespace or schema that identifies its names within a `Registry`.
720 """
721 raise NotImplementedError()
723 @property
724 def dialect(self) -> sqlalchemy.engine.Dialect:
725 """The SQLAlchemy dialect for this database engine
726 (`sqlalchemy.engine.Dialect`).
727 """
728 return self._engine.dialect
730 def shrinkDatabaseEntityName(self, original: str) -> str:
731 """Return a version of the given name that fits within this database
732 engine's length limits for table, constraint, indexes, and sequence
733 names.
735 Implementations should not assume that simple truncation is safe,
736 because multiple long names often begin with the same prefix.
738 The default implementation simply returns the given name.
740 Parameters
741 ----------
742 original : `str`
743 The original name.
745 Returns
746 -------
747 shrunk : `str`
748 The new, possibly shortened name.
749 """
750 return original
752 def expandDatabaseEntityName(self, shrunk: str) -> str:
753 """Retrieve the original name for a database entity that was too long
754 to fit within the database engine's limits.
756 Parameters
757 ----------
758 original : `str`
759 The original name.
761 Returns
762 -------
763 shrunk : `str`
764 The new, possibly shortened name.
765 """
766 return shrunk
768 def _mangleTableName(self, name: str) -> str:
769 """Map a logical, user-visible table name to the true table name used
770 in the database.
772 The default implementation returns the given name unchanged.
774 Parameters
775 ----------
776 name : `str`
777 Input table name. Should not include a namespace (i.e. schema)
778 prefix.
780 Returns
781 -------
782 mangled : `str`
783 Mangled version of the table name (still with no namespace prefix).
785 Notes
786 -----
787 Reimplementations of this method must be idempotent - mangling an
788 already-mangled name must have no effect.
789 """
790 return name
792 def _makeColumnConstraints(self, table: str, spec: ddl.FieldSpec) -> List[sqlalchemy.CheckConstraint]:
793 """Create constraints based on this spec.
795 Parameters
796 ----------
797 table : `str`
798 Name of the table this column is being added to.
799 spec : `FieldSpec`
800 Specification for the field to be added.
802 Returns
803 -------
804 constraint : `list` of `sqlalchemy.CheckConstraint`
805 Constraint added for this column.
806 """
807 # By default we return no additional constraints
808 return []
810 def _convertFieldSpec(
811 self, table: str, spec: ddl.FieldSpec, metadata: sqlalchemy.MetaData, **kwargs: Any
812 ) -> sqlalchemy.schema.Column:
813 """Convert a `FieldSpec` to a `sqlalchemy.schema.Column`.
815 Parameters
816 ----------
817 table : `str`
818 Name of the table this column is being added to.
819 spec : `FieldSpec`
820 Specification for the field to be added.
821 metadata : `sqlalchemy.MetaData`
822 SQLAlchemy representation of the DDL schema this field's table is
823 being added to.
824 **kwargs
825 Additional keyword arguments to forward to the
826 `sqlalchemy.schema.Column` constructor. This is provided to make
827 it easier for derived classes to delegate to ``super()`` while
828 making only minor changes.
830 Returns
831 -------
832 column : `sqlalchemy.schema.Column`
833 SQLAlchemy representation of the field.
834 """
835 args = []
836 if spec.autoincrement:
837 # Generate a sequence to use for auto incrementing for databases
838 # that do not support it natively. This will be ignored by
839 # sqlalchemy for databases that do support it.
840 args.append(
841 sqlalchemy.Sequence(
842 self.shrinkDatabaseEntityName(f"{table}_seq_{spec.name}"), metadata=metadata
843 )
844 )
845 assert spec.doc is None or isinstance(spec.doc, str), f"Bad doc for {table}.{spec.name}."
846 return sqlalchemy.schema.Column(
847 spec.name,
848 spec.getSizedColumnType(),
849 *args,
850 nullable=spec.nullable,
851 primary_key=spec.primaryKey,
852 comment=spec.doc,
853 server_default=spec.default,
854 **kwargs,
855 )
857 def _convertForeignKeySpec(
858 self, table: str, spec: ddl.ForeignKeySpec, metadata: sqlalchemy.MetaData, **kwargs: Any
859 ) -> sqlalchemy.schema.ForeignKeyConstraint:
860 """Convert a `ForeignKeySpec` to a
861 `sqlalchemy.schema.ForeignKeyConstraint`.
863 Parameters
864 ----------
865 table : `str`
866 Name of the table this foreign key is being added to.
867 spec : `ForeignKeySpec`
868 Specification for the foreign key to be added.
869 metadata : `sqlalchemy.MetaData`
870 SQLAlchemy representation of the DDL schema this constraint is
871 being added to.
872 **kwargs
873 Additional keyword arguments to forward to the
874 `sqlalchemy.schema.ForeignKeyConstraint` constructor. This is
875 provided to make it easier for derived classes to delegate to
876 ``super()`` while making only minor changes.
878 Returns
879 -------
880 constraint : `sqlalchemy.schema.ForeignKeyConstraint`
881 SQLAlchemy representation of the constraint.
882 """
883 name = self.shrinkDatabaseEntityName(
884 "_".join(
885 ["fkey", table, self._mangleTableName(spec.table)] + list(spec.target) + list(spec.source)
886 )
887 )
888 return sqlalchemy.schema.ForeignKeyConstraint(
889 spec.source,
890 [f"{self._mangleTableName(spec.table)}.{col}" for col in spec.target],
891 name=name,
892 ondelete=spec.onDelete,
893 )
895 def _convertExclusionConstraintSpec(
896 self,
897 table: str,
898 spec: Tuple[Union[str, Type[TimespanDatabaseRepresentation]], ...],
899 metadata: sqlalchemy.MetaData,
900 ) -> sqlalchemy.schema.Constraint:
901 """Convert a `tuple` from `ddl.TableSpec.exclusion` into a SQLAlchemy
902 constraint representation.
904 Parameters
905 ----------
906 table : `str`
907 Name of the table this constraint is being added to.
908 spec : `tuple` [ `str` or `type` ]
909 A tuple of `str` column names and the `type` object returned by
910 `getTimespanRepresentation` (which must appear exactly once),
911 indicating the order of the columns in the index used to back the
912 constraint.
913 metadata : `sqlalchemy.MetaData`
914 SQLAlchemy representation of the DDL schema this constraint is
915 being added to.
917 Returns
918 -------
919 constraint : `sqlalchemy.schema.Constraint`
920 SQLAlchemy representation of the constraint.
922 Raises
923 ------
924 NotImplementedError
925 Raised if this database does not support exclusion constraints.
926 """
927 raise NotImplementedError(f"Database {self} does not support exclusion constraints.")
929 def _convertTableSpec(
930 self, name: str, spec: ddl.TableSpec, metadata: sqlalchemy.MetaData, **kwargs: Any
931 ) -> sqlalchemy.schema.Table:
932 """Convert a `TableSpec` to a `sqlalchemy.schema.Table`.
934 Parameters
935 ----------
936 spec : `TableSpec`
937 Specification for the foreign key to be added.
938 metadata : `sqlalchemy.MetaData`
939 SQLAlchemy representation of the DDL schema this table is being
940 added to.
941 **kwargs
942 Additional keyword arguments to forward to the
943 `sqlalchemy.schema.Table` constructor. This is provided to make it
944 easier for derived classes to delegate to ``super()`` while making
945 only minor changes.
947 Returns
948 -------
949 table : `sqlalchemy.schema.Table`
950 SQLAlchemy representation of the table.
952 Notes
953 -----
954 This method does not handle ``spec.foreignKeys`` at all, in order to
955 avoid circular dependencies. These are added by higher-level logic in
956 `ensureTableExists`, `getExistingTable`, and `declareStaticTables`.
957 """
958 name = self._mangleTableName(name)
959 args: list[sqlalchemy.schema.SchemaItem] = [
960 self._convertFieldSpec(name, fieldSpec, metadata) for fieldSpec in spec.fields
961 ]
963 # Add any column constraints
964 for fieldSpec in spec.fields:
965 args.extend(self._makeColumnConstraints(name, fieldSpec))
967 # Track indexes added for primary key and unique constraints, to make
968 # sure we don't add duplicate explicit or foreign key indexes for
969 # those.
970 allIndexes = {tuple(fieldSpec.name for fieldSpec in spec.fields if fieldSpec.primaryKey)}
971 args.extend(
972 sqlalchemy.schema.UniqueConstraint(
973 *columns, name=self.shrinkDatabaseEntityName("_".join([name, "unq"] + list(columns)))
974 )
975 for columns in spec.unique
976 )
977 allIndexes.update(spec.unique)
978 args.extend(
979 sqlalchemy.schema.Index(
980 self.shrinkDatabaseEntityName("_".join([name, "idx"] + list(index.columns))),
981 *index.columns,
982 unique=(index.columns in spec.unique),
983 **index.kwargs,
984 )
985 for index in spec.indexes
986 if index.columns not in allIndexes
987 )
988 allIndexes.update(index.columns for index in spec.indexes)
989 args.extend(
990 sqlalchemy.schema.Index(
991 self.shrinkDatabaseEntityName("_".join((name, "fkidx") + fk.source)),
992 *fk.source,
993 )
994 for fk in spec.foreignKeys
995 if fk.addIndex and fk.source not in allIndexes
996 )
998 args.extend(self._convertExclusionConstraintSpec(name, excl, metadata) for excl in spec.exclusion)
1000 assert spec.doc is None or isinstance(spec.doc, str), f"Bad doc for {name}."
1001 return sqlalchemy.schema.Table(name, metadata, *args, comment=spec.doc, info={"spec": spec}, **kwargs)
1003 def ensureTableExists(self, name: str, spec: ddl.TableSpec) -> sqlalchemy.schema.Table:
1004 """Ensure that a table with the given name and specification exists,
1005 creating it if necessary.
1007 Parameters
1008 ----------
1009 name : `str`
1010 Name of the table (not including namespace qualifiers).
1011 spec : `TableSpec`
1012 Specification for the table. This will be used when creating the
1013 table, and *may* be used when obtaining an existing table to check
1014 for consistency, but no such check is guaranteed.
1016 Returns
1017 -------
1018 table : `sqlalchemy.schema.Table`
1019 SQLAlchemy representation of the table.
1021 Raises
1022 ------
1023 ReadOnlyDatabaseError
1024 Raised if `isWriteable` returns `False`, and the table does not
1025 already exist.
1026 DatabaseConflictError
1027 Raised if the table exists but ``spec`` is inconsistent with its
1028 definition.
1030 Notes
1031 -----
1032 This method may not be called within transactions. It may be called on
1033 read-only databases if and only if the table does in fact already
1034 exist.
1036 Subclasses may override this method, but usually should not need to.
1037 """
1038 # TODO: if _engine is used to make a table then it uses separate
1039 # connection and should not interfere with current transaction
1040 assert (
1041 self._session_connection is None or not self._session_connection.in_transaction()
1042 ), "Table creation interrupts transactions."
1043 assert self._metadata is not None, "Static tables must be declared before dynamic tables."
1044 table = self.getExistingTable(name, spec)
1045 if table is not None:
1046 return table
1047 if not self.isWriteable():
1048 raise ReadOnlyDatabaseError(
1049 f"Table {name} does not exist, and cannot be created because database {self} is read-only."
1050 )
1051 table = self._convertTableSpec(name, spec, self._metadata)
1052 for foreignKeySpec in spec.foreignKeys:
1053 table.append_constraint(self._convertForeignKeySpec(name, foreignKeySpec, self._metadata))
1054 try:
1055 with self._transaction() as (_, connection):
1056 table.create(connection)
1057 except sqlalchemy.exc.DatabaseError:
1058 # Some other process could have created the table meanwhile, which
1059 # usually causes OperationalError or ProgrammingError. We cannot
1060 # use IF NOT EXISTS clause in this case due to PostgreSQL race
1061 # condition on server side which causes IntegrityError. Instead we
1062 # catch these exceptions (they all inherit DatabaseError) and
1063 # re-check whether table is now there.
1064 table = self.getExistingTable(name, spec)
1065 if table is None:
1066 raise
1067 return table
1069 def getExistingTable(self, name: str, spec: ddl.TableSpec) -> Optional[sqlalchemy.schema.Table]:
1070 """Obtain an existing table with the given name and specification.
1072 Parameters
1073 ----------
1074 name : `str`
1075 Name of the table (not including namespace qualifiers).
1076 spec : `TableSpec`
1077 Specification for the table. This will be used when creating the
1078 SQLAlchemy representation of the table, and it is used to
1079 check that the actual table in the database is consistent.
1081 Returns
1082 -------
1083 table : `sqlalchemy.schema.Table` or `None`
1084 SQLAlchemy representation of the table, or `None` if it does not
1085 exist.
1087 Raises
1088 ------
1089 DatabaseConflictError
1090 Raised if the table exists but ``spec`` is inconsistent with its
1091 definition.
1093 Notes
1094 -----
1095 This method can be called within transactions and never modifies the
1096 database.
1098 Subclasses may override this method, but usually should not need to.
1099 """
1100 assert self._metadata is not None, "Static tables must be declared before dynamic tables."
1101 name = self._mangleTableName(name)
1102 table = self._metadata.tables.get(name if self.namespace is None else f"{self.namespace}.{name}")
1103 if table is not None:
1104 if spec.fields.names != set(table.columns.keys()):
1105 raise DatabaseConflictError(
1106 f"Table '{name}' has already been defined differently; the new "
1107 f"specification has columns {list(spec.fields.names)}, while "
1108 f"the previous definition has {list(table.columns.keys())}."
1109 )
1110 else:
1111 inspector = sqlalchemy.inspect(
1112 self._engine if self._session_connection is None else self._session_connection, raiseerr=True
1113 )
1114 if name in inspector.get_table_names(schema=self.namespace):
1115 _checkExistingTableDefinition(name, spec, inspector.get_columns(name, schema=self.namespace))
1116 table = self._convertTableSpec(name, spec, self._metadata)
1117 for foreignKeySpec in spec.foreignKeys:
1118 table.append_constraint(self._convertForeignKeySpec(name, foreignKeySpec, self._metadata))
1119 return table
1120 return table
1122 def _make_temporary_table(
1123 self,
1124 connection: sqlalchemy.engine.Connection,
1125 spec: ddl.TableSpec,
1126 name: Optional[str] = None,
1127 **kwargs: Any,
1128 ) -> sqlalchemy.schema.Table:
1129 """Create a temporary table.
1131 Parameters
1132 ----------
1133 connection : `sqlalchemy.engine.Connection`
1134 Connection to use when creating the table.
1135 spec : `TableSpec`
1136 Specification for the table.
1137 name : `str`, optional
1138 A unique (within this session/connetion) name for the table.
1139 Subclasses may override to modify the actual name used. If not
1140 provided, a unique name will be generated.
1141 **kwargs
1142 Additional keyword arguments to forward to the
1143 `sqlalchemy.schema.Table` constructor. This is provided to make it
1144 easier for derived classes to delegate to ``super()`` while making
1145 only minor changes.
1147 Returns
1148 -------
1149 table : `sqlalchemy.schema.Table`
1150 SQLAlchemy representation of the table.
1151 """
1152 if name is None:
1153 name = f"tmp_{uuid.uuid4().hex}"
1154 metadata = self._metadata
1155 if metadata is None:
1156 raise RuntimeError("Cannot create temporary table before static schema is defined.")
1157 table = self._convertTableSpec(
1158 name, spec, metadata, prefixes=["TEMPORARY"], schema=sqlalchemy.schema.BLANK_SCHEMA, **kwargs
1159 )
1160 if table.key in self._temp_tables:
1161 if table.key != name:
1162 raise ValueError(
1163 f"A temporary table with name {name} (transformed to {table.key} by "
1164 "Database) already exists."
1165 )
1166 for foreignKeySpec in spec.foreignKeys:
1167 table.append_constraint(self._convertForeignKeySpec(name, foreignKeySpec, metadata))
1168 with self._transaction():
1169 table.create(connection)
1170 return table
1172 @classmethod
1173 def getTimespanRepresentation(cls) -> Type[TimespanDatabaseRepresentation]:
1174 """Return a `type` that encapsulates the way `Timespan` objects are
1175 stored in this database.
1177 `Database` does not automatically use the return type of this method
1178 anywhere else; calling code is responsible for making sure that DDL
1179 and queries are consistent with it.
1181 Returns
1182 -------
1183 TimespanReprClass : `type` (`TimespanDatabaseRepresention` subclass)
1184 A type that encapsulates the way `Timespan` objects should be
1185 stored in this database.
1187 Notes
1188 -----
1189 There are two big reasons we've decided to keep timespan-mangling logic
1190 outside the `Database` implementations, even though the choice of
1191 representation is ultimately up to a `Database` implementation:
1193 - Timespans appear in relatively few tables and queries in our
1194 typical usage, and the code that operates on them is already aware
1195 that it is working with timespans. In contrast, a
1196 timespan-representation-aware implementation of, say, `insert`,
1197 would need to have extra logic to identify when timespan-mangling
1198 needed to occur, which would usually be useless overhead.
1200 - SQLAlchemy's rich SELECT query expression system has no way to wrap
1201 multiple columns in a single expression object (the ORM does, but
1202 we are not using the ORM). So we would have to wrap _much_ more of
1203 that code in our own interfaces to encapsulate timespan
1204 representations there.
1205 """
1206 return TimespanDatabaseRepresentation.Compound
1208 def sync(
1209 self,
1210 table: sqlalchemy.schema.Table,
1211 *,
1212 keys: Dict[str, Any],
1213 compared: Optional[Dict[str, Any]] = None,
1214 extra: Optional[Dict[str, Any]] = None,
1215 returning: Optional[Sequence[str]] = None,
1216 update: bool = False,
1217 ) -> Tuple[Optional[Dict[str, Any]], Union[bool, Dict[str, Any]]]:
1218 """Insert into a table as necessary to ensure database contains
1219 values equivalent to the given ones.
1221 Parameters
1222 ----------
1223 table : `sqlalchemy.schema.Table`
1224 Table to be queried and possibly inserted into.
1225 keys : `dict`
1226 Column name-value pairs used to search for an existing row; must
1227 be a combination that can be used to select a single row if one
1228 exists. If such a row does not exist, these values are used in
1229 the insert.
1230 compared : `dict`, optional
1231 Column name-value pairs that are compared to those in any existing
1232 row. If such a row does not exist, these rows are used in the
1233 insert.
1234 extra : `dict`, optional
1235 Column name-value pairs that are ignored if a matching row exists,
1236 but used in an insert if one is necessary.
1237 returning : `~collections.abc.Sequence` of `str`, optional
1238 The names of columns whose values should be returned.
1239 update : `bool`, optional
1240 If `True` (`False` is default), update the existing row with the
1241 values in ``compared`` instead of raising `DatabaseConflictError`.
1243 Returns
1244 -------
1245 row : `dict`, optional
1246 The value of the fields indicated by ``returning``, or `None` if
1247 ``returning`` is `None`.
1248 inserted_or_updated : `bool` or `dict`
1249 If `True`, a new row was inserted; if `False`, a matching row
1250 already existed. If a `dict` (only possible if ``update=True``),
1251 then an existing row was updated, and the dict maps the names of
1252 the updated columns to their *old* values (new values can be
1253 obtained from ``compared``).
1255 Raises
1256 ------
1257 DatabaseConflictError
1258 Raised if the values in ``compared`` do not match the values in the
1259 database.
1260 ReadOnlyDatabaseError
1261 Raised if `isWriteable` returns `False`, and no matching record
1262 already exists.
1264 Notes
1265 -----
1266 May be used inside transaction contexts, so implementations may not
1267 perform operations that interrupt transactions.
1269 It may be called on read-only databases if and only if the matching row
1270 does in fact already exist.
1271 """
1273 def check() -> Tuple[int, Optional[Dict[str, Any]], Optional[List]]:
1274 """Query for a row that matches the ``key`` argument, and compare
1275 to what was given by the caller.
1277 Returns
1278 -------
1279 n : `int`
1280 Number of matching rows. ``n != 1`` is always an error, but
1281 it's a different kind of error depending on where `check` is
1282 being called.
1283 bad : `dict` or `None`
1284 The subset of the keys of ``compared`` for which the existing
1285 values did not match the given one, mapped to the existing
1286 values in the database. Once again, ``not bad`` is always an
1287 error, but a different kind on context. `None` if ``n != 1``
1288 result : `list` or `None`
1289 Results in the database that correspond to the columns given
1290 in ``returning``, or `None` if ``returning is None``.
1291 """
1292 toSelect: Set[str] = set()
1293 if compared is not None:
1294 toSelect.update(compared.keys())
1295 if returning is not None:
1296 toSelect.update(returning)
1297 if not toSelect:
1298 # Need to select some column, even if we just want to see
1299 # how many rows we get back.
1300 toSelect.add(next(iter(keys.keys())))
1301 selectSql = (
1302 sqlalchemy.sql.select(*[table.columns[k].label(k) for k in toSelect])
1303 .select_from(table)
1304 .where(sqlalchemy.sql.and_(*[table.columns[k] == v for k, v in keys.items()]))
1305 )
1306 with self._transaction() as (_, connection):
1307 fetched = list(connection.execute(selectSql).mappings())
1308 if len(fetched) != 1:
1309 return len(fetched), None, None
1310 existing = fetched[0]
1311 if compared is not None:
1313 def safeNotEqual(a: Any, b: Any) -> bool:
1314 if isinstance(a, astropy.time.Time):
1315 return not time_utils.TimeConverter().times_equal(a, b)
1316 return a != b
1318 inconsistencies = {
1319 k: existing[k] for k, v in compared.items() if safeNotEqual(existing[k], v)
1320 }
1321 else:
1322 inconsistencies = {}
1323 if returning is not None:
1324 toReturn: Optional[list] = [existing[k] for k in returning]
1325 else:
1326 toReturn = None
1327 return 1, inconsistencies, toReturn
1329 def format_bad(inconsistencies: Dict[str, Any]) -> str:
1330 """Format the 'bad' dictionary of existing values returned by
1331 ``check`` into a string suitable for an error message.
1332 """
1333 assert compared is not None, "Should not be able to get inconsistencies without comparing."
1334 return ", ".join(f"{k}: {v!r} != {compared[k]!r}" for k, v in inconsistencies.items())
1336 if self.isTableWriteable(table):
1337 # Try an insert first, but allow it to fail (in only specific
1338 # ways).
1339 row = keys.copy()
1340 if compared is not None:
1341 row.update(compared)
1342 if extra is not None:
1343 row.update(extra)
1344 with self.transaction():
1345 inserted = bool(self.ensure(table, row))
1346 inserted_or_updated: Union[bool, Dict[str, Any]]
1347 # Need to perform check() for this branch inside the
1348 # transaction, so we roll back an insert that didn't do
1349 # what we expected. That limits the extent to which we
1350 # can reduce duplication between this block and the other
1351 # ones that perform similar logic.
1352 n, bad, result = check()
1353 if n < 1:
1354 raise ConflictingDefinitionError(
1355 f"Attempted to ensure {row} exists by inserting it with ON CONFLICT IGNORE, "
1356 f"but a post-insert query on {keys} returned no results. "
1357 f"Insert was {'' if inserted else 'not '}reported as successful. "
1358 "This can occur if the insert violated a database constraint other than the "
1359 "unique constraint or primary key used to identify the row in this call."
1360 )
1361 elif n > 1:
1362 raise RuntimeError(
1363 f"Keys passed to sync {keys.keys()} do not comprise a "
1364 f"unique constraint for table {table.name}."
1365 )
1366 elif bad:
1367 assert (
1368 compared is not None
1369 ), "Should not be able to get inconsistencies without comparing."
1370 if inserted:
1371 raise RuntimeError(
1372 f"Conflict ({bad}) in sync after successful insert; this is "
1373 "possible if the same table is being updated by a concurrent "
1374 "process that isn't using sync, but it may also be a bug in "
1375 "daf_butler."
1376 )
1377 elif update:
1378 with self._transaction() as (_, connection):
1379 connection.execute(
1380 table.update()
1381 .where(sqlalchemy.sql.and_(*[table.columns[k] == v for k, v in keys.items()]))
1382 .values(**{k: compared[k] for k in bad.keys()})
1383 )
1384 inserted_or_updated = bad
1385 else:
1386 raise DatabaseConflictError(
1387 f"Conflict in sync for table {table.name} on column(s) {format_bad(bad)}."
1388 )
1389 else:
1390 inserted_or_updated = inserted
1391 else:
1392 # Database is not writeable; just see if the row exists.
1393 n, bad, result = check()
1394 if n < 1:
1395 raise ReadOnlyDatabaseError("sync needs to insert, but database is read-only.")
1396 elif n > 1:
1397 raise RuntimeError("Keys passed to sync do not comprise a unique constraint.")
1398 elif bad:
1399 if update:
1400 raise ReadOnlyDatabaseError("sync needs to update, but database is read-only.")
1401 else:
1402 raise DatabaseConflictError(
1403 f"Conflict in sync for table {table.name} on column(s) {format_bad(bad)}."
1404 )
1405 inserted_or_updated = False
1406 if returning is None:
1407 return None, inserted_or_updated
1408 else:
1409 assert result is not None
1410 return {k: v for k, v in zip(returning, result)}, inserted_or_updated
1412 def insert(
1413 self,
1414 table: sqlalchemy.schema.Table,
1415 *rows: dict,
1416 returnIds: bool = False,
1417 select: Optional[sqlalchemy.sql.expression.SelectBase] = None,
1418 names: Optional[Iterable[str]] = None,
1419 ) -> Optional[List[int]]:
1420 """Insert one or more rows into a table, optionally returning
1421 autoincrement primary key values.
1423 Parameters
1424 ----------
1425 table : `sqlalchemy.schema.Table`
1426 Table rows should be inserted into.
1427 returnIds: `bool`
1428 If `True` (`False` is default), return the values of the table's
1429 autoincrement primary key field (which much exist).
1430 select : `sqlalchemy.sql.SelectBase`, optional
1431 A SELECT query expression to insert rows from. Cannot be provided
1432 with either ``rows`` or ``returnIds=True``.
1433 names : `Iterable` [ `str` ], optional
1434 Names of columns in ``table`` to be populated, ordered to match the
1435 columns returned by ``select``. Ignored if ``select`` is `None`.
1436 If not provided, the columns returned by ``select`` must be named
1437 to match the desired columns of ``table``.
1438 *rows
1439 Positional arguments are the rows to be inserted, as dictionaries
1440 mapping column name to value. The keys in all dictionaries must
1441 be the same.
1443 Returns
1444 -------
1445 ids : `None`, or `list` of `int`
1446 If ``returnIds`` is `True`, a `list` containing the inserted
1447 values for the table's autoincrement primary key.
1449 Raises
1450 ------
1451 ReadOnlyDatabaseError
1452 Raised if `isWriteable` returns `False` when this method is called.
1454 Notes
1455 -----
1456 The default implementation uses bulk insert syntax when ``returnIds``
1457 is `False`, and a loop over single-row insert operations when it is
1458 `True`.
1460 Derived classes should reimplement when they can provide a more
1461 efficient implementation (especially for the latter case).
1463 May be used inside transaction contexts, so implementations may not
1464 perform operations that interrupt transactions.
1465 """
1466 self.assertTableWriteable(table, f"Cannot insert into read-only table {table}.")
1467 if select is not None and (rows or returnIds):
1468 raise TypeError("'select' is incompatible with passing value rows or returnIds=True.")
1469 if not rows and select is None:
1470 if returnIds:
1471 return []
1472 else:
1473 return None
1474 with self._transaction() as (_, connection):
1475 if not returnIds:
1476 if select is not None:
1477 if names is None:
1478 # columns() is deprecated since 1.4, but
1479 # selected_columns() method did not exist in 1.3.
1480 if hasattr(select, "selected_columns"):
1481 names = select.selected_columns.keys()
1482 else:
1483 names = select.columns.keys()
1484 connection.execute(table.insert().from_select(list(names), select))
1485 else:
1486 connection.execute(table.insert(), rows)
1487 return None
1488 else:
1489 sql = table.insert()
1490 return [connection.execute(sql, row).inserted_primary_key[0] for row in rows]
1492 @abstractmethod
1493 def replace(self, table: sqlalchemy.schema.Table, *rows: dict) -> None:
1494 """Insert one or more rows into a table, replacing any existing rows
1495 for which insertion of a new row would violate the primary key
1496 constraint.
1498 Parameters
1499 ----------
1500 table : `sqlalchemy.schema.Table`
1501 Table rows should be inserted into.
1502 *rows
1503 Positional arguments are the rows to be inserted, as dictionaries
1504 mapping column name to value. The keys in all dictionaries must
1505 be the same.
1507 Raises
1508 ------
1509 ReadOnlyDatabaseError
1510 Raised if `isWriteable` returns `False` when this method is called.
1512 Notes
1513 -----
1514 May be used inside transaction contexts, so implementations may not
1515 perform operations that interrupt transactions.
1517 Implementations should raise a `sqlalchemy.exc.IntegrityError`
1518 exception when a constraint other than the primary key would be
1519 violated.
1521 Implementations are not required to support `replace` on tables
1522 with autoincrement keys.
1523 """
1524 raise NotImplementedError()
1526 @abstractmethod
1527 def ensure(self, table: sqlalchemy.schema.Table, *rows: dict, primary_key_only: bool = False) -> int:
1528 """Insert one or more rows into a table, skipping any rows for which
1529 insertion would violate a unique constraint.
1531 Parameters
1532 ----------
1533 table : `sqlalchemy.schema.Table`
1534 Table rows should be inserted into.
1535 *rows
1536 Positional arguments are the rows to be inserted, as dictionaries
1537 mapping column name to value. The keys in all dictionaries must
1538 be the same.
1539 primary_key_only : `bool`, optional
1540 If `True` (`False` is default), only skip rows that violate the
1541 primary key constraint, and raise an exception (and rollback
1542 transactions) for other constraint violations.
1544 Returns
1545 -------
1546 count : `int`
1547 The number of rows actually inserted.
1549 Raises
1550 ------
1551 ReadOnlyDatabaseError
1552 Raised if `isWriteable` returns `False` when this method is called.
1553 This is raised even if the operation would do nothing even on a
1554 writeable database.
1556 Notes
1557 -----
1558 May be used inside transaction contexts, so implementations may not
1559 perform operations that interrupt transactions.
1561 Implementations are not required to support `ensure` on tables
1562 with autoincrement keys.
1563 """
1564 raise NotImplementedError()
1566 def delete(self, table: sqlalchemy.schema.Table, columns: Iterable[str], *rows: dict) -> int:
1567 """Delete one or more rows from a table.
1569 Parameters
1570 ----------
1571 table : `sqlalchemy.schema.Table`
1572 Table that rows should be deleted from.
1573 columns: `~collections.abc.Iterable` of `str`
1574 The names of columns that will be used to constrain the rows to
1575 be deleted; these will be combined via ``AND`` to form the
1576 ``WHERE`` clause of the delete query.
1577 *rows
1578 Positional arguments are the keys of rows to be deleted, as
1579 dictionaries mapping column name to value. The keys in all
1580 dictionaries must be exactly the names in ``columns``.
1582 Returns
1583 -------
1584 count : `int`
1585 Number of rows deleted.
1587 Raises
1588 ------
1589 ReadOnlyDatabaseError
1590 Raised if `isWriteable` returns `False` when this method is called.
1592 Notes
1593 -----
1594 May be used inside transaction contexts, so implementations may not
1595 perform operations that interrupt transactions.
1597 The default implementation should be sufficient for most derived
1598 classes.
1599 """
1600 self.assertTableWriteable(table, f"Cannot delete from read-only table {table}.")
1601 if columns and not rows:
1602 # If there are no columns, this operation is supposed to delete
1603 # everything (so we proceed as usual). But if there are columns,
1604 # but no rows, it was a constrained bulk operation where the
1605 # constraint is that no rows match, and we should short-circuit
1606 # while reporting that no rows were affected.
1607 return 0
1608 sql = table.delete()
1609 columns = list(columns) # Force iterators to list
1611 # More efficient to use IN operator if there is only one
1612 # variable changing across all rows.
1613 content: Dict[str, Set] = defaultdict(set)
1614 if len(columns) == 1:
1615 # Nothing to calculate since we can always use IN
1616 column = columns[0]
1617 changing_columns = [column]
1618 content[column] = set(row[column] for row in rows)
1619 else:
1620 for row in rows:
1621 for k, v in row.items():
1622 content[k].add(v)
1623 changing_columns = [col for col, values in content.items() if len(values) > 1]
1625 if len(changing_columns) != 1:
1626 # More than one column changes each time so do explicit bind
1627 # parameters and have each row processed separately.
1628 whereTerms = [table.columns[name] == sqlalchemy.sql.bindparam(name) for name in columns]
1629 if whereTerms:
1630 sql = sql.where(sqlalchemy.sql.and_(*whereTerms))
1631 with self._transaction() as (_, connection):
1632 return connection.execute(sql, rows).rowcount
1633 else:
1634 # One of the columns has changing values but any others are
1635 # fixed. In this case we can use an IN operator and be more
1636 # efficient.
1637 name = changing_columns.pop()
1639 # Simple where clause for the unchanging columns
1640 clauses = []
1641 for k, v in content.items():
1642 if k == name:
1643 continue
1644 column = table.columns[k]
1645 # The set only has one element
1646 clauses.append(column == v.pop())
1648 # The IN operator will not work for "infinite" numbers of
1649 # rows so must batch it up into distinct calls.
1650 in_content = list(content[name])
1651 n_elements = len(in_content)
1653 rowcount = 0
1654 iposn = 0
1655 n_per_loop = 1_000 # Controls how many items to put in IN clause
1656 with self._transaction() as (_, connection):
1657 for iposn in range(0, n_elements, n_per_loop):
1658 endpos = iposn + n_per_loop
1659 in_clause = table.columns[name].in_(in_content[iposn:endpos])
1661 newsql = sql.where(sqlalchemy.sql.and_(*clauses, in_clause))
1662 rowcount += connection.execute(newsql).rowcount
1663 return rowcount
1665 def deleteWhere(self, table: sqlalchemy.schema.Table, where: sqlalchemy.sql.ColumnElement) -> int:
1666 """Delete rows from a table with pre-constructed WHERE clause.
1668 Parameters
1669 ----------
1670 table : `sqlalchemy.schema.Table`
1671 Table that rows should be deleted from.
1672 where: `sqlalchemy.sql.ClauseElement`
1673 The names of columns that will be used to constrain the rows to
1674 be deleted; these will be combined via ``AND`` to form the
1675 ``WHERE`` clause of the delete query.
1677 Returns
1678 -------
1679 count : `int`
1680 Number of rows deleted.
1682 Raises
1683 ------
1684 ReadOnlyDatabaseError
1685 Raised if `isWriteable` returns `False` when this method is called.
1687 Notes
1688 -----
1689 May be used inside transaction contexts, so implementations may not
1690 perform operations that interrupt transactions.
1692 The default implementation should be sufficient for most derived
1693 classes.
1694 """
1695 self.assertTableWriteable(table, f"Cannot delete from read-only table {table}.")
1697 sql = table.delete().where(where)
1698 with self._transaction() as (_, connection):
1699 return connection.execute(sql).rowcount
1701 def update(self, table: sqlalchemy.schema.Table, where: Dict[str, str], *rows: dict) -> int:
1702 """Update one or more rows in a table.
1704 Parameters
1705 ----------
1706 table : `sqlalchemy.schema.Table`
1707 Table containing the rows to be updated.
1708 where : `dict` [`str`, `str`]
1709 A mapping from the names of columns that will be used to search for
1710 existing rows to the keys that will hold these values in the
1711 ``rows`` dictionaries. Note that these may not be the same due to
1712 SQLAlchemy limitations.
1713 *rows
1714 Positional arguments are the rows to be updated. The keys in all
1715 dictionaries must be the same, and may correspond to either a
1716 value in the ``where`` dictionary or the name of a column to be
1717 updated.
1719 Returns
1720 -------
1721 count : `int`
1722 Number of rows matched (regardless of whether the update actually
1723 modified them).
1725 Raises
1726 ------
1727 ReadOnlyDatabaseError
1728 Raised if `isWriteable` returns `False` when this method is called.
1730 Notes
1731 -----
1732 May be used inside transaction contexts, so implementations may not
1733 perform operations that interrupt transactions.
1735 The default implementation should be sufficient for most derived
1736 classes.
1737 """
1738 self.assertTableWriteable(table, f"Cannot update read-only table {table}.")
1739 if not rows:
1740 return 0
1741 sql = table.update().where(
1742 sqlalchemy.sql.and_(*[table.columns[k] == sqlalchemy.sql.bindparam(v) for k, v in where.items()])
1743 )
1744 with self._transaction() as (_, connection):
1745 return connection.execute(sql, rows).rowcount
1747 @contextmanager
1748 def query(
1749 self,
1750 sql: sqlalchemy.sql.expression.Executable | sqlalchemy.sql.expression.SelectBase,
1751 *args: Any,
1752 **kwargs: Any,
1753 ) -> Iterator[sqlalchemy.engine.CursorResult]:
1754 """Run a SELECT query against the database.
1756 Parameters
1757 ----------
1758 sql : `sqlalchemy.sql.expression.SelectBase`
1759 A SQLAlchemy representation of a ``SELECT`` query.
1760 *args
1761 Additional positional arguments are forwarded to
1762 `sqlalchemy.engine.Connection.execute`.
1763 **kwargs
1764 Additional keyword arguments are forwarded to
1765 `sqlalchemy.engine.Connection.execute`.
1767 Returns
1768 -------
1769 result_context : `sqlalchemy.engine.CursorResults`
1770 Context manager that returns the query result object when entered.
1771 These results are invalidated when the context is exited.
1772 """
1773 if self._session_connection is None:
1774 connection = self._engine.connect()
1775 else:
1776 connection = self._session_connection
1777 # TODO: SelectBase is not good for execute(), but it used everywhere,
1778 # e.g. in daf_relation. We should switch to Executable at some point.
1779 result = connection.execute(cast(sqlalchemy.sql.expression.Executable, sql), *args, **kwargs)
1780 try:
1781 yield result
1782 finally:
1783 if connection is not self._session_connection:
1784 connection.close()
1786 @abstractmethod
1787 def constant_rows(
1788 self,
1789 fields: NamedValueAbstractSet[ddl.FieldSpec],
1790 *rows: dict,
1791 name: Optional[str] = None,
1792 ) -> sqlalchemy.sql.FromClause:
1793 """Return a SQLAlchemy object that represents a small number of
1794 constant-valued rows.
1796 Parameters
1797 ----------
1798 fields : `NamedValueAbstractSet` [ `ddl.FieldSpec` ]
1799 The columns of the rows. Unique and foreign key constraints are
1800 ignored.
1801 *rows : `dict`
1802 Values for the rows.
1803 name : `str`, optional
1804 If provided, the name of the SQL construct. If not provided, an
1805 opaque but unique identifier is generated.
1807 Returns
1808 -------
1809 from_clause : `sqlalchemy.sql.FromClause`
1810 SQLAlchemy object representing the given rows. This is guaranteed
1811 to be something that can be directly joined into a ``SELECT``
1812 query's ``FROM`` clause, and will not involve a temporary table
1813 that needs to be cleaned up later.
1815 Notes
1816 -----
1817 The default implementation uses the SQL-standard ``VALUES`` construct,
1818 but support for that construct is varied enough across popular RDBMSs
1819 that the method is still marked abstract to force explicit opt-in via
1820 delegation to `super`.
1821 """
1822 if name is None:
1823 name = f"tmp_{uuid.uuid4().hex}"
1824 return sqlalchemy.sql.values(
1825 *[sqlalchemy.Column(field.name, field.getSizedColumnType()) for field in fields],
1826 name=name,
1827 ).data([tuple(row[name] for name in fields.names) for row in rows])
1829 def get_constant_rows_max(self) -> int:
1830 """Return the maximum number of rows that should be passed to
1831 `constant_rows` for this backend.
1833 Returns
1834 -------
1835 max : `int`
1836 Maximum number of rows.
1838 Notes
1839 -----
1840 This should reflect typical performance profiles (or a guess at these),
1841 not just hard database engine limits.
1842 """
1843 return 100
1845 origin: int
1846 """An integer ID that should be used as the default for any datasets,
1847 quanta, or other entities that use a (autoincrement, origin) compound
1848 primary key (`int`).
1849 """
1851 namespace: Optional[str]
1852 """The schema or namespace this database instance is associated with
1853 (`str` or `None`).
1854 """