Coverage for python/lsst/daf/butler/registry/interfaces/_database.py: 16%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = [
24 "Database",
25 "ReadOnlyDatabaseError",
26 "DatabaseConflictError",
27 "SchemaAlreadyDefinedError",
28 "StaticTablesContext",
29]
31import uuid
32import warnings
33from abc import ABC, abstractmethod
34from collections import defaultdict
35from contextlib import contextmanager
36from typing import Any, Callable, Dict, Iterable, Iterator, List, Optional, Sequence, Set, Tuple, Type, Union
38import astropy.time
39import sqlalchemy
41from ...core import SpatialRegionDatabaseRepresentation, TimespanDatabaseRepresentation, ddl, time_utils
42from .._exceptions import ConflictingDefinitionError
44_IN_SAVEPOINT_TRANSACTION = "IN_SAVEPOINT_TRANSACTION"
47def _checkExistingTableDefinition(name: str, spec: ddl.TableSpec, inspection: List[Dict[str, Any]]) -> None:
48 """Test that the definition of a table in a `ddl.TableSpec` and from
49 database introspection are consistent.
51 Parameters
52 ----------
53 name : `str`
54 Name of the table (only used in error messages).
55 spec : `ddl.TableSpec`
56 Specification of the table.
57 inspection : `dict`
58 Dictionary returned by
59 `sqlalchemy.engine.reflection.Inspector.get_columns`.
61 Raises
62 ------
63 DatabaseConflictError
64 Raised if the definitions are inconsistent.
65 """
66 columnNames = [c["name"] for c in inspection]
67 if spec.fields.names != set(columnNames):
68 raise DatabaseConflictError(
69 f"Table '{name}' exists but is defined differently in the database; "
70 f"specification has columns {list(spec.fields.names)}, while the "
71 f"table in the database has {columnNames}."
72 )
75class ReadOnlyDatabaseError(RuntimeError):
76 """Exception raised when a write operation is called on a read-only
77 `Database`.
78 """
81class DatabaseConflictError(ConflictingDefinitionError):
82 """Exception raised when database content (row values or schema entities)
83 are inconsistent with what this client expects.
84 """
87class SchemaAlreadyDefinedError(RuntimeError):
88 """Exception raised when trying to initialize database schema when some
89 tables already exist.
90 """
93class StaticTablesContext:
94 """Helper class used to declare the static schema for a registry layer
95 in a database.
97 An instance of this class is returned by `Database.declareStaticTables`,
98 which should be the only way it should be constructed.
99 """
101 def __init__(self, db: Database):
102 self._db = db
103 self._foreignKeys: List[Tuple[sqlalchemy.schema.Table, sqlalchemy.schema.ForeignKeyConstraint]] = []
104 self._inspector = sqlalchemy.inspect(self._db._engine)
105 self._tableNames = frozenset(self._inspector.get_table_names(schema=self._db.namespace))
106 self._initializers: List[Callable[[Database], None]] = []
108 def addTable(self, name: str, spec: ddl.TableSpec) -> sqlalchemy.schema.Table:
109 """Add a new table to the schema, returning its sqlalchemy
110 representation.
112 The new table may not actually be created until the end of the
113 context created by `Database.declareStaticTables`, allowing tables
114 to be declared in any order even in the presence of foreign key
115 relationships.
116 """
117 name = self._db._mangleTableName(name)
118 if name in self._tableNames:
119 _checkExistingTableDefinition(
120 name, spec, self._inspector.get_columns(name, schema=self._db.namespace)
121 )
122 table = self._db._convertTableSpec(name, spec, self._db._metadata)
123 for foreignKeySpec in spec.foreignKeys:
124 self._foreignKeys.append(
125 (table, self._db._convertForeignKeySpec(name, foreignKeySpec, self._db._metadata))
126 )
127 return table
129 def addTableTuple(self, specs: Tuple[ddl.TableSpec, ...]) -> Tuple[sqlalchemy.schema.Table, ...]:
130 """Add a named tuple of tables to the schema, returning their
131 SQLAlchemy representations in a named tuple of the same type.
133 The new tables may not actually be created until the end of the
134 context created by `Database.declareStaticTables`, allowing tables
135 to be declared in any order even in the presence of foreign key
136 relationships.
138 Notes
139 -----
140 ``specs`` *must* be an instance of a type created by
141 `collections.namedtuple`, not just regular tuple, and the returned
142 object is guaranteed to be the same. Because `~collections.namedtuple`
143 is just a factory for `type` objects, not an actual type itself,
144 we cannot represent this with type annotations.
145 """
146 return specs._make( # type: ignore
147 self.addTable(name, spec) for name, spec in zip(specs._fields, specs) # type: ignore
148 )
150 def addInitializer(self, initializer: Callable[[Database], None]) -> None:
151 """Add a method that does one-time initialization of a database.
153 Initialization can mean anything that changes state of a database
154 and needs to be done exactly once after database schema was created.
155 An example for that could be population of schema attributes.
157 Parameters
158 ----------
159 initializer : callable
160 Method of a single argument which is a `Database` instance.
161 """
162 self._initializers.append(initializer)
165class Session:
166 """Class representing a persistent connection to a database.
168 Parameters
169 ----------
170 db : `Database`
171 Database instance.
173 Notes
174 -----
175 Instances of Session class should not be created by client code;
176 `Database.session` should be used to create context for a session::
178 with db.session() as session:
179 session.method()
180 db.method()
182 In the current implementation sessions can be nested and transactions can
183 be nested within a session. All nested sessions and transaction share the
184 same database connection.
186 Session class represents a limited subset of database API that requires
187 persistent connection to a database (e.g. temporary tables which have
188 lifetime of a session). Potentially most of the database API could be
189 associated with a Session class.
190 """
192 def __init__(self, db: Database):
193 self._db = db
195 def makeTemporaryTable(self, spec: ddl.TableSpec, name: Optional[str] = None) -> sqlalchemy.schema.Table:
196 """Create a temporary table.
198 Parameters
199 ----------
200 spec : `TableSpec`
201 Specification for the table.
202 name : `str`, optional
203 A unique (within this session/connetion) name for the table.
204 Subclasses may override to modify the actual name used. If not
205 provided, a unique name will be generated.
207 Returns
208 -------
209 table : `sqlalchemy.schema.Table`
210 SQLAlchemy representation of the table.
212 Notes
213 -----
214 Temporary tables may be created, dropped, and written to even in
215 read-only databases - at least according to the Python-level
216 protections in the `Database` classes. Server permissions may say
217 otherwise, but in that case they probably need to be modified to
218 support the full range of expected read-only butler behavior.
220 Temporary table rows are guaranteed to be dropped when a connection is
221 closed. `Database` implementations are permitted to allow the table to
222 remain as long as this is transparent to the user (i.e. "creating" the
223 temporary table in a new session should not be an error, even if it
224 does nothing).
226 It may not be possible to use temporary tables within transactions with
227 some database engines (or configurations thereof).
228 """
229 if name is None:
230 name = f"tmp_{uuid.uuid4().hex}"
231 table = self._db._convertTableSpec(
232 name, spec, self._db._metadata, prefixes=["TEMPORARY"], schema=sqlalchemy.schema.BLANK_SCHEMA
233 )
234 if table.key in self._db._tempTables:
235 if table.key != name:
236 raise ValueError(
237 f"A temporary table with name {name} (transformed to {table.key} by "
238 f"Database) already exists."
239 )
240 for foreignKeySpec in spec.foreignKeys:
241 table.append_constraint(self._db._convertForeignKeySpec(name, foreignKeySpec, self._db._metadata))
242 with self._db._connection() as connection:
243 table.create(connection)
244 self._db._tempTables.add(table.key)
245 return table
247 def dropTemporaryTable(self, table: sqlalchemy.schema.Table) -> None:
248 """Drop a temporary table.
250 Parameters
251 ----------
252 table : `sqlalchemy.schema.Table`
253 A SQLAlchemy object returned by a previous call to
254 `makeTemporaryTable`.
255 """
256 if table.key in self._db._tempTables:
257 with self._db._connection() as connection:
258 table.drop(connection)
259 self._db._tempTables.remove(table.key)
260 else:
261 raise TypeError(f"Table {table.key} was not created by makeTemporaryTable.")
264class Database(ABC):
265 """An abstract interface that represents a particular database engine's
266 representation of a single schema/namespace/database.
268 Parameters
269 ----------
270 origin : `int`
271 An integer ID that should be used as the default for any datasets,
272 quanta, or other entities that use a (autoincrement, origin) compound
273 primary key.
274 engine : `sqlalchemy.engine.Engine`
275 The SQLAlchemy engine for this `Database`.
276 namespace : `str`, optional
277 Name of the schema or namespace this instance is associated with.
278 This is passed as the ``schema`` argument when constructing a
279 `sqlalchemy.schema.MetaData` instance. We use ``namespace`` instead to
280 avoid confusion between "schema means namespace" and "schema means
281 table definitions".
283 Notes
284 -----
285 `Database` requires all write operations to go through its special named
286 methods. Our write patterns are sufficiently simple that we don't really
287 need the full flexibility of SQL insert/update/delete syntax, and we need
288 non-standard (but common) functionality in these operations sufficiently
289 often that it seems worthwhile to provide our own generic API.
291 In contrast, `Database.query` allows arbitrary ``SELECT`` queries (via
292 their SQLAlchemy representation) to be run, as we expect these to require
293 significantly more sophistication while still being limited to standard
294 SQL.
296 `Database` itself has several underscore-prefixed attributes:
298 - ``_engine``: SQLAlchemy object representing its engine.
299 - ``_connection``: method returning a context manager for
300 `sqlalchemy.engine.Connection` object.
301 - ``_metadata``: the `sqlalchemy.schema.MetaData` object representing
302 the tables and other schema entities.
304 These are considered protected (derived classes may access them, but other
305 code should not), and read-only, aside from executing SQL via
306 ``_connection``.
307 """
309 def __init__(self, *, origin: int, engine: sqlalchemy.engine.Engine, namespace: Optional[str] = None):
310 self.origin = origin
311 self.namespace = namespace
312 self._engine = engine
313 self._session_connection: Optional[sqlalchemy.engine.Connection] = None
314 self._metadata: Optional[sqlalchemy.schema.MetaData] = None
315 self._tempTables: Set[str] = set()
317 def __repr__(self) -> str:
318 # Rather than try to reproduce all the parameters used to create
319 # the object, instead report the more useful information of the
320 # connection URL.
321 if self._engine.url.password is not None:
322 uri = str(self._engine.url.set(password="***"))
323 else:
324 uri = str(self._engine.url)
325 if self.namespace:
326 uri += f"#{self.namespace}"
327 return f'{type(self).__name__}("{uri}")'
329 @classmethod
330 def makeDefaultUri(cls, root: str) -> Optional[str]:
331 """Create a default connection URI appropriate for the given root
332 directory, or `None` if there can be no such default.
333 """
334 return None
336 @classmethod
337 def fromUri(
338 cls, uri: str, *, origin: int, namespace: Optional[str] = None, writeable: bool = True
339 ) -> Database:
340 """Construct a database from a SQLAlchemy URI.
342 Parameters
343 ----------
344 uri : `str`
345 A SQLAlchemy URI connection string.
346 origin : `int`
347 An integer ID that should be used as the default for any datasets,
348 quanta, or other entities that use a (autoincrement, origin)
349 compound primary key.
350 namespace : `str`, optional
351 A database namespace (i.e. schema) the new instance should be
352 associated with. If `None` (default), the namespace (if any) is
353 inferred from the URI.
354 writeable : `bool`, optional
355 If `True`, allow write operations on the database, including
356 ``CREATE TABLE``.
358 Returns
359 -------
360 db : `Database`
361 A new `Database` instance.
362 """
363 return cls.fromEngine(
364 cls.makeEngine(uri, writeable=writeable), origin=origin, namespace=namespace, writeable=writeable
365 )
367 @classmethod
368 @abstractmethod
369 def makeEngine(cls, uri: str, *, writeable: bool = True) -> sqlalchemy.engine.Engine:
370 """Create a `sqlalchemy.engine.Engine` from a SQLAlchemy URI.
372 Parameters
373 ----------
374 uri : `str`
375 A SQLAlchemy URI connection string.
376 writeable : `bool`, optional
377 If `True`, allow write operations on the database, including
378 ``CREATE TABLE``.
380 Returns
381 -------
382 engine : `sqlalchemy.engine.Engine`
383 A database engine.
385 Notes
386 -----
387 Subclasses that support other ways to connect to a database are
388 encouraged to add optional arguments to their implementation of this
389 method, as long as they maintain compatibility with the base class
390 call signature.
391 """
392 raise NotImplementedError()
394 @classmethod
395 @abstractmethod
396 def fromEngine(
397 cls,
398 engine: sqlalchemy.engine.Engine,
399 *,
400 origin: int,
401 namespace: Optional[str] = None,
402 writeable: bool = True,
403 ) -> Database:
404 """Create a new `Database` from an existing `sqlalchemy.engine.Engine`.
406 Parameters
407 ----------
408 engine : `sqlalchemy.engine.Engine`
409 The engine for the database. May be shared between `Database`
410 instances.
411 origin : `int`
412 An integer ID that should be used as the default for any datasets,
413 quanta, or other entities that use a (autoincrement, origin)
414 compound primary key.
415 namespace : `str`, optional
416 A different database namespace (i.e. schema) the new instance
417 should be associated with. If `None` (default), the namespace
418 (if any) is inferred from the connection.
419 writeable : `bool`, optional
420 If `True`, allow write operations on the database, including
421 ``CREATE TABLE``.
423 Returns
424 -------
425 db : `Database`
426 A new `Database` instance.
428 Notes
429 -----
430 This method allows different `Database` instances to share the same
431 engine, which is desirable when they represent different namespaces
432 can be queried together.
433 """
434 raise NotImplementedError()
436 @contextmanager
437 def session(self) -> Iterator:
438 """Return a context manager that represents a session (persistent
439 connection to a database).
440 """
441 if self._session_connection is not None:
442 # session already started, just reuse that
443 yield Session(self)
444 else:
445 try:
446 # open new connection and close it when done
447 self._session_connection = self._engine.connect()
448 yield Session(self)
449 finally:
450 if self._session_connection is not None:
451 self._session_connection.close()
452 self._session_connection = None
453 # Temporary tables only live within session
454 self._tempTables = set()
456 @contextmanager
457 def transaction(
458 self,
459 *,
460 interrupting: bool = False,
461 savepoint: bool = False,
462 lock: Iterable[sqlalchemy.schema.Table] = (),
463 ) -> Iterator:
464 """Return a context manager that represents a transaction.
466 Parameters
467 ----------
468 interrupting : `bool`, optional
469 If `True` (`False` is default), this transaction block may not be
470 nested without an outer one, and attempting to do so is a logic
471 (i.e. assertion) error.
472 savepoint : `bool`, optional
473 If `True` (`False` is default), create a `SAVEPOINT`, allowing
474 exceptions raised by the database (e.g. due to constraint
475 violations) during this transaction's context to be caught outside
476 it without also rolling back all operations in an outer transaction
477 block. If `False`, transactions may still be nested, but a
478 rollback may be generated at any level and affects all levels, and
479 commits are deferred until the outermost block completes. If any
480 outer transaction block was created with ``savepoint=True``, all
481 inner blocks will be as well (regardless of the actual value
482 passed). This has no effect if this is the outermost transaction.
483 lock : `Iterable` [ `sqlalchemy.schema.Table` ], optional
484 A list of tables to lock for the duration of this transaction.
485 These locks are guaranteed to prevent concurrent writes and allow
486 this transaction (only) to acquire the same locks (others should
487 block), but only prevent concurrent reads if the database engine
488 requires that in order to block concurrent writes.
490 Notes
491 -----
492 All transactions on a connection managed by one or more `Database`
493 instances _must_ go through this method, or transaction state will not
494 be correctly managed.
495 """
496 # need a connection, use session to manage it
497 with self.session():
498 assert self._session_connection is not None
499 connection = self._session_connection
500 assert not (interrupting and connection.in_transaction()), (
501 "Logic error in transaction nesting: an operation that would "
502 "interrupt the active transaction context has been requested."
503 )
504 # We remember whether we are already in a SAVEPOINT transaction via
505 # the connection object's 'info' dict, which is explicitly for user
506 # information like this. This is safer than a regular `Database`
507 # instance attribute, because it guards against multiple `Database`
508 # instances sharing the same connection. The need to use our own
509 # flag here to track whether we're in a nested transaction should
510 # go away in SQLAlchemy 1.4, which seems to have a
511 # `Connection.in_nested_transaction()` method.
512 savepoint = savepoint or connection.info.get(_IN_SAVEPOINT_TRANSACTION, False)
513 connection.info[_IN_SAVEPOINT_TRANSACTION] = savepoint
514 if connection.in_transaction() and savepoint:
515 trans = connection.begin_nested()
516 elif not connection.in_transaction():
517 # Use a regular (non-savepoint) transaction always for the
518 # outermost context.
519 trans = connection.begin()
520 else:
521 # Nested non-savepoint transactions, don't do anything.
522 trans = None
523 self._lockTables(connection, lock)
524 try:
525 yield
526 if trans is not None:
527 trans.commit()
528 except BaseException:
529 if trans is not None:
530 trans.rollback()
531 raise
532 finally:
533 if not connection.in_transaction():
534 connection.info.pop(_IN_SAVEPOINT_TRANSACTION, None)
536 @contextmanager
537 def _connection(self) -> Iterator[sqlalchemy.engine.Connection]:
538 """Return context manager for Connection."""
539 if self._session_connection is not None:
540 # It means that we are in Session context, but we may not be in
541 # transaction context. Start a short transaction in that case.
542 if self._session_connection.in_transaction():
543 yield self._session_connection
544 else:
545 with self._session_connection.begin():
546 yield self._session_connection
547 else:
548 # Make new connection and transaction, transaction will be
549 # committed on context exit.
550 with self._engine.begin() as connection:
551 yield connection
553 @abstractmethod
554 def _lockTables(
555 self, connection: sqlalchemy.engine.Connection, tables: Iterable[sqlalchemy.schema.Table] = ()
556 ) -> None:
557 """Acquire locks on the given tables.
559 This is an implementation hook for subclasses, called by `transaction`.
560 It should not be called directly by other code.
562 Parameters
563 ----------
564 connection : `sqlalchemy.engine.Connection`
565 Database connection object. It is guaranteed that transaction is
566 already in a progress for this connection.
567 tables : `Iterable` [ `sqlalchemy.schema.Table` ], optional
568 A list of tables to lock for the duration of this transaction.
569 These locks are guaranteed to prevent concurrent writes and allow
570 this transaction (only) to acquire the same locks (others should
571 block), but only prevent concurrent reads if the database engine
572 requires that in order to block concurrent writes.
573 """
574 raise NotImplementedError()
576 def isTableWriteable(self, table: sqlalchemy.schema.Table) -> bool:
577 """Check whether a table is writeable, either because the database
578 connection is read-write or the table is a temporary table.
580 Parameters
581 ----------
582 table : `sqlalchemy.schema.Table`
583 SQLAlchemy table object to check.
585 Returns
586 -------
587 writeable : `bool`
588 Whether this table is writeable.
589 """
590 return self.isWriteable() or table.key in self._tempTables
592 def assertTableWriteable(self, table: sqlalchemy.schema.Table, msg: str) -> None:
593 """Raise if the given table is not writeable, either because the
594 database connection is read-write or the table is a temporary table.
596 Parameters
597 ----------
598 table : `sqlalchemy.schema.Table`
599 SQLAlchemy table object to check.
600 msg : `str`, optional
601 If provided, raise `ReadOnlyDatabaseError` instead of returning
602 `False`, with this message.
603 """
604 if not self.isTableWriteable(table):
605 raise ReadOnlyDatabaseError(msg)
607 @contextmanager
608 def declareStaticTables(self, *, create: bool) -> Iterator[StaticTablesContext]:
609 """Return a context manager in which the database's static DDL schema
610 can be declared.
612 Parameters
613 ----------
614 create : `bool`
615 If `True`, attempt to create all tables at the end of the context.
616 If `False`, they will be assumed to already exist.
618 Returns
619 -------
620 schema : `StaticTablesContext`
621 A helper object that is used to add new tables.
623 Raises
624 ------
625 ReadOnlyDatabaseError
626 Raised if ``create`` is `True`, `Database.isWriteable` is `False`,
627 and one or more declared tables do not already exist.
629 Examples
630 --------
631 Given a `Database` instance ``db``::
633 with db.declareStaticTables(create=True) as schema:
634 schema.addTable("table1", TableSpec(...))
635 schema.addTable("table2", TableSpec(...))
637 Notes
638 -----
639 A database's static DDL schema must be declared before any dynamic
640 tables are managed via calls to `ensureTableExists` or
641 `getExistingTable`. The order in which static schema tables are added
642 inside the context block is unimportant; they will automatically be
643 sorted and added in an order consistent with their foreign key
644 relationships.
645 """
646 if create and not self.isWriteable():
647 raise ReadOnlyDatabaseError(f"Cannot create tables in read-only database {self}.")
648 self._metadata = sqlalchemy.MetaData(schema=self.namespace)
649 try:
650 context = StaticTablesContext(self)
651 if create and context._tableNames:
652 # Looks like database is already initalized, to avoid danger
653 # of modifying/destroying valid schema we refuse to do
654 # anything in this case
655 raise SchemaAlreadyDefinedError(f"Cannot create tables in non-empty database {self}.")
656 yield context
657 for table, foreignKey in context._foreignKeys:
658 table.append_constraint(foreignKey)
659 if create:
660 if self.namespace is not None:
661 if self.namespace not in context._inspector.get_schema_names():
662 with self._connection() as connection:
663 connection.execute(sqlalchemy.schema.CreateSchema(self.namespace))
664 # In our tables we have columns that make use of sqlalchemy
665 # Sequence objects. There is currently a bug in sqlalchemy that
666 # causes a deprecation warning to be thrown on a property of
667 # the Sequence object when the repr for the sequence is
668 # created. Here a filter is used to catch these deprecation
669 # warnings when tables are created.
670 with warnings.catch_warnings():
671 warnings.simplefilter("ignore", category=sqlalchemy.exc.SADeprecationWarning)
672 self._metadata.create_all(self._engine)
673 # call all initializer methods sequentially
674 for init in context._initializers:
675 init(self)
676 except BaseException:
677 self._metadata = None
678 raise
680 @abstractmethod
681 def isWriteable(self) -> bool:
682 """Return `True` if this database can be modified by this client."""
683 raise NotImplementedError()
685 @abstractmethod
686 def __str__(self) -> str:
687 """Return a human-readable identifier for this `Database`, including
688 any namespace or schema that identifies its names within a `Registry`.
689 """
690 raise NotImplementedError()
692 @property
693 def dialect(self) -> sqlalchemy.engine.Dialect:
694 """The SQLAlchemy dialect for this database engine
695 (`sqlalchemy.engine.Dialect`).
696 """
697 return self._engine.dialect
699 def shrinkDatabaseEntityName(self, original: str) -> str:
700 """Return a version of the given name that fits within this database
701 engine's length limits for table, constraint, indexes, and sequence
702 names.
704 Implementations should not assume that simple truncation is safe,
705 because multiple long names often begin with the same prefix.
707 The default implementation simply returns the given name.
709 Parameters
710 ----------
711 original : `str`
712 The original name.
714 Returns
715 -------
716 shrunk : `str`
717 The new, possibly shortened name.
718 """
719 return original
721 def expandDatabaseEntityName(self, shrunk: str) -> str:
722 """Retrieve the original name for a database entity that was too long
723 to fit within the database engine's limits.
725 Parameters
726 ----------
727 original : `str`
728 The original name.
730 Returns
731 -------
732 shrunk : `str`
733 The new, possibly shortened name.
734 """
735 return shrunk
737 def _mangleTableName(self, name: str) -> str:
738 """Map a logical, user-visible table name to the true table name used
739 in the database.
741 The default implementation returns the given name unchanged.
743 Parameters
744 ----------
745 name : `str`
746 Input table name. Should not include a namespace (i.e. schema)
747 prefix.
749 Returns
750 -------
751 mangled : `str`
752 Mangled version of the table name (still with no namespace prefix).
754 Notes
755 -----
756 Reimplementations of this method must be idempotent - mangling an
757 already-mangled name must have no effect.
758 """
759 return name
761 def _makeColumnConstraints(self, table: str, spec: ddl.FieldSpec) -> List[sqlalchemy.CheckConstraint]:
762 """Create constraints based on this spec.
764 Parameters
765 ----------
766 table : `str`
767 Name of the table this column is being added to.
768 spec : `FieldSpec`
769 Specification for the field to be added.
771 Returns
772 -------
773 constraint : `list` of `sqlalchemy.CheckConstraint`
774 Constraint added for this column.
775 """
776 # By default we return no additional constraints
777 return []
779 def _convertFieldSpec(
780 self, table: str, spec: ddl.FieldSpec, metadata: sqlalchemy.MetaData, **kwargs: Any
781 ) -> sqlalchemy.schema.Column:
782 """Convert a `FieldSpec` to a `sqlalchemy.schema.Column`.
784 Parameters
785 ----------
786 table : `str`
787 Name of the table this column is being added to.
788 spec : `FieldSpec`
789 Specification for the field to be added.
790 metadata : `sqlalchemy.MetaData`
791 SQLAlchemy representation of the DDL schema this field's table is
792 being added to.
793 **kwargs
794 Additional keyword arguments to forward to the
795 `sqlalchemy.schema.Column` constructor. This is provided to make
796 it easier for derived classes to delegate to ``super()`` while
797 making only minor changes.
799 Returns
800 -------
801 column : `sqlalchemy.schema.Column`
802 SQLAlchemy representation of the field.
803 """
804 args = [spec.name, spec.getSizedColumnType()]
805 if spec.autoincrement:
806 # Generate a sequence to use for auto incrementing for databases
807 # that do not support it natively. This will be ignored by
808 # sqlalchemy for databases that do support it.
809 args.append(
810 sqlalchemy.Sequence(
811 self.shrinkDatabaseEntityName(f"{table}_seq_{spec.name}"), metadata=metadata
812 )
813 )
814 assert spec.doc is None or isinstance(spec.doc, str), f"Bad doc for {table}.{spec.name}."
815 return sqlalchemy.schema.Column(
816 *args,
817 nullable=spec.nullable,
818 primary_key=spec.primaryKey,
819 comment=spec.doc,
820 server_default=spec.default,
821 **kwargs,
822 )
824 def _convertForeignKeySpec(
825 self, table: str, spec: ddl.ForeignKeySpec, metadata: sqlalchemy.MetaData, **kwargs: Any
826 ) -> sqlalchemy.schema.ForeignKeyConstraint:
827 """Convert a `ForeignKeySpec` to a
828 `sqlalchemy.schema.ForeignKeyConstraint`.
830 Parameters
831 ----------
832 table : `str`
833 Name of the table this foreign key is being added to.
834 spec : `ForeignKeySpec`
835 Specification for the foreign key to be added.
836 metadata : `sqlalchemy.MetaData`
837 SQLAlchemy representation of the DDL schema this constraint is
838 being added to.
839 **kwargs
840 Additional keyword arguments to forward to the
841 `sqlalchemy.schema.ForeignKeyConstraint` constructor. This is
842 provided to make it easier for derived classes to delegate to
843 ``super()`` while making only minor changes.
845 Returns
846 -------
847 constraint : `sqlalchemy.schema.ForeignKeyConstraint`
848 SQLAlchemy representation of the constraint.
849 """
850 name = self.shrinkDatabaseEntityName(
851 "_".join(
852 ["fkey", table, self._mangleTableName(spec.table)] + list(spec.target) + list(spec.source)
853 )
854 )
855 return sqlalchemy.schema.ForeignKeyConstraint(
856 spec.source,
857 [f"{self._mangleTableName(spec.table)}.{col}" for col in spec.target],
858 name=name,
859 ondelete=spec.onDelete,
860 )
862 def _convertExclusionConstraintSpec(
863 self,
864 table: str,
865 spec: Tuple[Union[str, Type[TimespanDatabaseRepresentation]], ...],
866 metadata: sqlalchemy.MetaData,
867 ) -> sqlalchemy.schema.Constraint:
868 """Convert a `tuple` from `ddl.TableSpec.exclusion` into a SQLAlchemy
869 constraint representation.
871 Parameters
872 ----------
873 table : `str`
874 Name of the table this constraint is being added to.
875 spec : `tuple` [ `str` or `type` ]
876 A tuple of `str` column names and the `type` object returned by
877 `getTimespanRepresentation` (which must appear exactly once),
878 indicating the order of the columns in the index used to back the
879 constraint.
880 metadata : `sqlalchemy.MetaData`
881 SQLAlchemy representation of the DDL schema this constraint is
882 being added to.
884 Returns
885 -------
886 constraint : `sqlalchemy.schema.Constraint`
887 SQLAlchemy representation of the constraint.
889 Raises
890 ------
891 NotImplementedError
892 Raised if this database does not support exclusion constraints.
893 """
894 raise NotImplementedError(f"Database {self} does not support exclusion constraints.")
896 def _convertTableSpec(
897 self, name: str, spec: ddl.TableSpec, metadata: sqlalchemy.MetaData, **kwargs: Any
898 ) -> sqlalchemy.schema.Table:
899 """Convert a `TableSpec` to a `sqlalchemy.schema.Table`.
901 Parameters
902 ----------
903 spec : `TableSpec`
904 Specification for the foreign key to be added.
905 metadata : `sqlalchemy.MetaData`
906 SQLAlchemy representation of the DDL schema this table is being
907 added to.
908 **kwargs
909 Additional keyword arguments to forward to the
910 `sqlalchemy.schema.Table` constructor. This is provided to make it
911 easier for derived classes to delegate to ``super()`` while making
912 only minor changes.
914 Returns
915 -------
916 table : `sqlalchemy.schema.Table`
917 SQLAlchemy representation of the table.
919 Notes
920 -----
921 This method does not handle ``spec.foreignKeys`` at all, in order to
922 avoid circular dependencies. These are added by higher-level logic in
923 `ensureTableExists`, `getExistingTable`, and `declareStaticTables`.
924 """
925 name = self._mangleTableName(name)
926 args = [self._convertFieldSpec(name, fieldSpec, metadata) for fieldSpec in spec.fields]
928 # Add any column constraints
929 for fieldSpec in spec.fields:
930 args.extend(self._makeColumnConstraints(name, fieldSpec))
932 # Track indexes added for primary key and unique constraints, to make
933 # sure we don't add duplicate explicit or foreign key indexes for
934 # those.
935 allIndexes = {tuple(fieldSpec.name for fieldSpec in spec.fields if fieldSpec.primaryKey)}
936 args.extend(
937 sqlalchemy.schema.UniqueConstraint(
938 *columns, name=self.shrinkDatabaseEntityName("_".join([name, "unq"] + list(columns)))
939 )
940 for columns in spec.unique
941 )
942 allIndexes.update(spec.unique)
943 args.extend(
944 sqlalchemy.schema.Index(
945 self.shrinkDatabaseEntityName("_".join([name, "idx"] + list(columns))),
946 *columns,
947 unique=(columns in spec.unique),
948 )
949 for columns in spec.indexes
950 if columns not in allIndexes
951 )
952 allIndexes.update(spec.indexes)
953 args.extend(
954 sqlalchemy.schema.Index(
955 self.shrinkDatabaseEntityName("_".join((name, "fkidx") + fk.source)),
956 *fk.source,
957 )
958 for fk in spec.foreignKeys
959 if fk.addIndex and fk.source not in allIndexes
960 )
962 args.extend(self._convertExclusionConstraintSpec(name, excl, metadata) for excl in spec.exclusion)
964 assert spec.doc is None or isinstance(spec.doc, str), f"Bad doc for {name}."
965 return sqlalchemy.schema.Table(name, metadata, *args, comment=spec.doc, info=spec, **kwargs)
967 def ensureTableExists(self, name: str, spec: ddl.TableSpec) -> sqlalchemy.schema.Table:
968 """Ensure that a table with the given name and specification exists,
969 creating it if necessary.
971 Parameters
972 ----------
973 name : `str`
974 Name of the table (not including namespace qualifiers).
975 spec : `TableSpec`
976 Specification for the table. This will be used when creating the
977 table, and *may* be used when obtaining an existing table to check
978 for consistency, but no such check is guaranteed.
980 Returns
981 -------
982 table : `sqlalchemy.schema.Table`
983 SQLAlchemy representation of the table.
985 Raises
986 ------
987 ReadOnlyDatabaseError
988 Raised if `isWriteable` returns `False`, and the table does not
989 already exist.
990 DatabaseConflictError
991 Raised if the table exists but ``spec`` is inconsistent with its
992 definition.
994 Notes
995 -----
996 This method may not be called within transactions. It may be called on
997 read-only databases if and only if the table does in fact already
998 exist.
1000 Subclasses may override this method, but usually should not need to.
1001 """
1002 # TODO: if _engine is used to make a table then it uses separate
1003 # connection and should not interfere with current transaction
1004 assert (
1005 self._session_connection is None or not self._session_connection.in_transaction()
1006 ), "Table creation interrupts transactions."
1007 assert self._metadata is not None, "Static tables must be declared before dynamic tables."
1008 table = self.getExistingTable(name, spec)
1009 if table is not None:
1010 return table
1011 if not self.isWriteable():
1012 raise ReadOnlyDatabaseError(
1013 f"Table {name} does not exist, and cannot be created "
1014 f"because database {self} is read-only."
1015 )
1016 table = self._convertTableSpec(name, spec, self._metadata)
1017 for foreignKeySpec in spec.foreignKeys:
1018 table.append_constraint(self._convertForeignKeySpec(name, foreignKeySpec, self._metadata))
1019 with self._connection() as connection:
1020 table.create(connection)
1021 return table
1023 def getExistingTable(self, name: str, spec: ddl.TableSpec) -> Optional[sqlalchemy.schema.Table]:
1024 """Obtain an existing table with the given name and specification.
1026 Parameters
1027 ----------
1028 name : `str`
1029 Name of the table (not including namespace qualifiers).
1030 spec : `TableSpec`
1031 Specification for the table. This will be used when creating the
1032 SQLAlchemy representation of the table, and it is used to
1033 check that the actual table in the database is consistent.
1035 Returns
1036 -------
1037 table : `sqlalchemy.schema.Table` or `None`
1038 SQLAlchemy representation of the table, or `None` if it does not
1039 exist.
1041 Raises
1042 ------
1043 DatabaseConflictError
1044 Raised if the table exists but ``spec`` is inconsistent with its
1045 definition.
1047 Notes
1048 -----
1049 This method can be called within transactions and never modifies the
1050 database.
1052 Subclasses may override this method, but usually should not need to.
1053 """
1054 assert self._metadata is not None, "Static tables must be declared before dynamic tables."
1055 name = self._mangleTableName(name)
1056 table = self._metadata.tables.get(name if self.namespace is None else f"{self.namespace}.{name}")
1057 if table is not None:
1058 if spec.fields.names != set(table.columns.keys()):
1059 raise DatabaseConflictError(
1060 f"Table '{name}' has already been defined differently; the new "
1061 f"specification has columns {list(spec.fields.names)}, while "
1062 f"the previous definition has {list(table.columns.keys())}."
1063 )
1064 else:
1065 inspector = sqlalchemy.inspect(self._engine)
1066 if name in inspector.get_table_names(schema=self.namespace):
1067 _checkExistingTableDefinition(name, spec, inspector.get_columns(name, schema=self.namespace))
1068 table = self._convertTableSpec(name, spec, self._metadata)
1069 for foreignKeySpec in spec.foreignKeys:
1070 table.append_constraint(self._convertForeignKeySpec(name, foreignKeySpec, self._metadata))
1071 return table
1072 return table
1074 @classmethod
1075 def getTimespanRepresentation(cls) -> Type[TimespanDatabaseRepresentation]:
1076 """Return a `type` that encapsulates the way `Timespan` objects are
1077 stored in this database.
1079 `Database` does not automatically use the return type of this method
1080 anywhere else; calling code is responsible for making sure that DDL
1081 and queries are consistent with it.
1083 Returns
1084 -------
1085 TimespanReprClass : `type` (`TimespanDatabaseRepresention` subclass)
1086 A type that encapsulates the way `Timespan` objects should be
1087 stored in this database.
1089 Notes
1090 -----
1091 There are two big reasons we've decided to keep timespan-mangling logic
1092 outside the `Database` implementations, even though the choice of
1093 representation is ultimately up to a `Database` implementation:
1095 - Timespans appear in relatively few tables and queries in our
1096 typical usage, and the code that operates on them is already aware
1097 that it is working with timespans. In contrast, a
1098 timespan-representation-aware implementation of, say, `insert`,
1099 would need to have extra logic to identify when timespan-mangling
1100 needed to occur, which would usually be useless overhead.
1102 - SQLAlchemy's rich SELECT query expression system has no way to wrap
1103 multiple columns in a single expression object (the ORM does, but
1104 we are not using the ORM). So we would have to wrap _much_ more of
1105 that code in our own interfaces to encapsulate timespan
1106 representations there.
1107 """
1108 return TimespanDatabaseRepresentation.Compound
1110 @classmethod
1111 def getSpatialRegionRepresentation(cls) -> Type[SpatialRegionDatabaseRepresentation]:
1112 """Return a `type` that encapsulates the way `lsst.sphgeom.Region`
1113 objects are stored in this database.
1115 `Database` does not automatically use the return type of this method
1116 anywhere else; calling code is responsible for making sure that DDL
1117 and queries are consistent with it.
1119 Returns
1120 -------
1121 RegionReprClass : `type` (`SpatialRegionDatabaseRepresention` subclass)
1122 A type that encapsulates the way `lsst.sphgeom.Region` objects
1123 should be stored in this database.
1125 Notes
1126 -----
1127 See `getTimespanRepresentation` for comments on why this method is not
1128 more tightly integrated with the rest of the `Database` interface.
1129 """
1130 return SpatialRegionDatabaseRepresentation
1132 def sync(
1133 self,
1134 table: sqlalchemy.schema.Table,
1135 *,
1136 keys: Dict[str, Any],
1137 compared: Optional[Dict[str, Any]] = None,
1138 extra: Optional[Dict[str, Any]] = None,
1139 returning: Optional[Sequence[str]] = None,
1140 update: bool = False,
1141 ) -> Tuple[Optional[Dict[str, Any]], Union[bool, Dict[str, Any]]]:
1142 """Insert into a table as necessary to ensure database contains
1143 values equivalent to the given ones.
1145 Parameters
1146 ----------
1147 table : `sqlalchemy.schema.Table`
1148 Table to be queried and possibly inserted into.
1149 keys : `dict`
1150 Column name-value pairs used to search for an existing row; must
1151 be a combination that can be used to select a single row if one
1152 exists. If such a row does not exist, these values are used in
1153 the insert.
1154 compared : `dict`, optional
1155 Column name-value pairs that are compared to those in any existing
1156 row. If such a row does not exist, these rows are used in the
1157 insert.
1158 extra : `dict`, optional
1159 Column name-value pairs that are ignored if a matching row exists,
1160 but used in an insert if one is necessary.
1161 returning : `~collections.abc.Sequence` of `str`, optional
1162 The names of columns whose values should be returned.
1163 update : `bool`, optional
1164 If `True` (`False` is default), update the existing row with the
1165 values in ``compared`` instead of raising `DatabaseConflictError`.
1167 Returns
1168 -------
1169 row : `dict`, optional
1170 The value of the fields indicated by ``returning``, or `None` if
1171 ``returning`` is `None`.
1172 inserted_or_updated : `bool` or `dict`
1173 If `True`, a new row was inserted; if `False`, a matching row
1174 already existed. If a `dict` (only possible if ``update=True``),
1175 then an existing row was updated, and the dict maps the names of
1176 the updated columns to their *old* values (new values can be
1177 obtained from ``compared``).
1179 Raises
1180 ------
1181 DatabaseConflictError
1182 Raised if the values in ``compared`` do not match the values in the
1183 database.
1184 ReadOnlyDatabaseError
1185 Raised if `isWriteable` returns `False`, and no matching record
1186 already exists.
1188 Notes
1189 -----
1190 May be used inside transaction contexts, so implementations may not
1191 perform operations that interrupt transactions.
1193 It may be called on read-only databases if and only if the matching row
1194 does in fact already exist.
1195 """
1197 def check() -> Tuple[int, Optional[Dict[str, Any]], Optional[List]]:
1198 """Query for a row that matches the ``key`` argument, and compare
1199 to what was given by the caller.
1201 Returns
1202 -------
1203 n : `int`
1204 Number of matching rows. ``n != 1`` is always an error, but
1205 it's a different kind of error depending on where `check` is
1206 being called.
1207 bad : `dict` or `None`
1208 The subset of the keys of ``compared`` for which the existing
1209 values did not match the given one, mapped to the existing
1210 values in the database. Once again, ``not bad`` is always an
1211 error, but a different kind on context. `None` if ``n != 1``
1212 result : `list` or `None`
1213 Results in the database that correspond to the columns given
1214 in ``returning``, or `None` if ``returning is None``.
1215 """
1216 toSelect: Set[str] = set()
1217 if compared is not None:
1218 toSelect.update(compared.keys())
1219 if returning is not None:
1220 toSelect.update(returning)
1221 if not toSelect:
1222 # Need to select some column, even if we just want to see
1223 # how many rows we get back.
1224 toSelect.add(next(iter(keys.keys())))
1225 selectSql = (
1226 sqlalchemy.sql.select(*[table.columns[k].label(k) for k in toSelect])
1227 .select_from(table)
1228 .where(sqlalchemy.sql.and_(*[table.columns[k] == v for k, v in keys.items()]))
1229 )
1230 with self._connection() as connection:
1231 fetched = list(connection.execute(selectSql).mappings())
1232 if len(fetched) != 1:
1233 return len(fetched), None, None
1234 existing = fetched[0]
1235 if compared is not None:
1237 def safeNotEqual(a: Any, b: Any) -> bool:
1238 if isinstance(a, astropy.time.Time):
1239 return not time_utils.TimeConverter().times_equal(a, b)
1240 return a != b
1242 inconsistencies = {
1243 k: existing[k] for k, v in compared.items() if safeNotEqual(existing[k], v)
1244 }
1245 else:
1246 inconsistencies = {}
1247 if returning is not None:
1248 toReturn: Optional[list] = [existing[k] for k in returning]
1249 else:
1250 toReturn = None
1251 return 1, inconsistencies, toReturn
1253 def format_bad(inconsistencies: Dict[str, Any]) -> str:
1254 """Format the 'bad' dictionary of existing values returned by
1255 ``check`` into a string suitable for an error message.
1256 """
1257 assert compared is not None, "Should not be able to get inconsistencies without comparing."
1258 return ", ".join(f"{k}: {v!r} != {compared[k]!r}" for k, v in inconsistencies.items())
1260 if self.isTableWriteable(table):
1261 # Try an insert first, but allow it to fail (in only specific
1262 # ways).
1263 row = keys.copy()
1264 if compared is not None:
1265 row.update(compared)
1266 if extra is not None:
1267 row.update(extra)
1268 with self.transaction():
1269 inserted = bool(self.ensure(table, row))
1270 inserted_or_updated: Union[bool, Dict[str, Any]]
1271 # Need to perform check() for this branch inside the
1272 # transaction, so we roll back an insert that didn't do
1273 # what we expected. That limits the extent to which we
1274 # can reduce duplication between this block and the other
1275 # ones that perform similar logic.
1276 n, bad, result = check()
1277 if n < 1:
1278 raise ConflictingDefinitionError(
1279 f"Attempted to ensure {row} exists by inserting it with ON CONFLICT IGNORE, "
1280 f"but a post-insert query on {keys} returned no results. "
1281 f"Insert was {'' if inserted else 'not '}reported as successful. "
1282 "This can occur if the insert violated a database constraint other than the "
1283 "unique constraint or primary key used to identify the row in this call."
1284 )
1285 elif n > 1:
1286 raise RuntimeError(
1287 f"Keys passed to sync {keys.keys()} do not comprise a "
1288 f"unique constraint for table {table.name}."
1289 )
1290 elif bad:
1291 assert (
1292 compared is not None
1293 ), "Should not be able to get inconsistencies without comparing."
1294 if inserted:
1295 raise RuntimeError(
1296 f"Conflict ({bad}) in sync after successful insert; this is "
1297 "possible if the same table is being updated by a concurrent "
1298 "process that isn't using sync, but it may also be a bug in "
1299 "daf_butler."
1300 )
1301 elif update:
1302 with self._connection() as connection:
1303 connection.execute(
1304 table.update()
1305 .where(sqlalchemy.sql.and_(*[table.columns[k] == v for k, v in keys.items()]))
1306 .values(**{k: compared[k] for k in bad.keys()})
1307 )
1308 inserted_or_updated = bad
1309 else:
1310 raise DatabaseConflictError(
1311 f"Conflict in sync for table {table.name} on column(s) {format_bad(bad)}."
1312 )
1313 else:
1314 inserted_or_updated = inserted
1315 else:
1316 # Database is not writeable; just see if the row exists.
1317 n, bad, result = check()
1318 if n < 1:
1319 raise ReadOnlyDatabaseError("sync needs to insert, but database is read-only.")
1320 elif n > 1:
1321 raise RuntimeError("Keys passed to sync do not comprise a unique constraint.")
1322 elif bad:
1323 if update:
1324 raise ReadOnlyDatabaseError("sync needs to update, but database is read-only.")
1325 else:
1326 raise DatabaseConflictError(
1327 f"Conflict in sync for table {table.name} on column(s) {format_bad(bad)}."
1328 )
1329 inserted_or_updated = False
1330 if returning is None:
1331 return None, inserted_or_updated
1332 else:
1333 assert result is not None
1334 return {k: v for k, v in zip(returning, result)}, inserted_or_updated
1336 def insert(
1337 self,
1338 table: sqlalchemy.schema.Table,
1339 *rows: dict,
1340 returnIds: bool = False,
1341 select: Optional[sqlalchemy.sql.Select] = None,
1342 names: Optional[Iterable[str]] = None,
1343 ) -> Optional[List[int]]:
1344 """Insert one or more rows into a table, optionally returning
1345 autoincrement primary key values.
1347 Parameters
1348 ----------
1349 table : `sqlalchemy.schema.Table`
1350 Table rows should be inserted into.
1351 returnIds: `bool`
1352 If `True` (`False` is default), return the values of the table's
1353 autoincrement primary key field (which much exist).
1354 select : `sqlalchemy.sql.Select`, optional
1355 A SELECT query expression to insert rows from. Cannot be provided
1356 with either ``rows`` or ``returnIds=True``.
1357 names : `Iterable` [ `str` ], optional
1358 Names of columns in ``table`` to be populated, ordered to match the
1359 columns returned by ``select``. Ignored if ``select`` is `None`.
1360 If not provided, the columns returned by ``select`` must be named
1361 to match the desired columns of ``table``.
1362 *rows
1363 Positional arguments are the rows to be inserted, as dictionaries
1364 mapping column name to value. The keys in all dictionaries must
1365 be the same.
1367 Returns
1368 -------
1369 ids : `None`, or `list` of `int`
1370 If ``returnIds`` is `True`, a `list` containing the inserted
1371 values for the table's autoincrement primary key.
1373 Raises
1374 ------
1375 ReadOnlyDatabaseError
1376 Raised if `isWriteable` returns `False` when this method is called.
1378 Notes
1379 -----
1380 The default implementation uses bulk insert syntax when ``returnIds``
1381 is `False`, and a loop over single-row insert operations when it is
1382 `True`.
1384 Derived classes should reimplement when they can provide a more
1385 efficient implementation (especially for the latter case).
1387 May be used inside transaction contexts, so implementations may not
1388 perform operations that interrupt transactions.
1389 """
1390 self.assertTableWriteable(table, f"Cannot insert into read-only table {table}.")
1391 if select is not None and (rows or returnIds):
1392 raise TypeError("'select' is incompatible with passing value rows or returnIds=True.")
1393 if not rows and select is None:
1394 if returnIds:
1395 return []
1396 else:
1397 return None
1398 with self._connection() as connection:
1399 if not returnIds:
1400 if select is not None:
1401 if names is None:
1402 # columns() is deprecated since 1.4, but
1403 # selected_columns() method did not exist in 1.3.
1404 if hasattr(select, "selected_columns"):
1405 names = select.selected_columns.keys()
1406 else:
1407 names = select.columns.keys()
1408 connection.execute(table.insert().from_select(names, select))
1409 else:
1410 connection.execute(table.insert(), rows)
1411 return None
1412 else:
1413 sql = table.insert()
1414 return [connection.execute(sql, row).inserted_primary_key[0] for row in rows]
1416 @abstractmethod
1417 def replace(self, table: sqlalchemy.schema.Table, *rows: dict) -> None:
1418 """Insert one or more rows into a table, replacing any existing rows
1419 for which insertion of a new row would violate the primary key
1420 constraint.
1422 Parameters
1423 ----------
1424 table : `sqlalchemy.schema.Table`
1425 Table rows should be inserted into.
1426 *rows
1427 Positional arguments are the rows to be inserted, as dictionaries
1428 mapping column name to value. The keys in all dictionaries must
1429 be the same.
1431 Raises
1432 ------
1433 ReadOnlyDatabaseError
1434 Raised if `isWriteable` returns `False` when this method is called.
1436 Notes
1437 -----
1438 May be used inside transaction contexts, so implementations may not
1439 perform operations that interrupt transactions.
1441 Implementations should raise a `sqlalchemy.exc.IntegrityError`
1442 exception when a constraint other than the primary key would be
1443 violated.
1445 Implementations are not required to support `replace` on tables
1446 with autoincrement keys.
1447 """
1448 raise NotImplementedError()
1450 @abstractmethod
1451 def ensure(self, table: sqlalchemy.schema.Table, *rows: dict) -> int:
1452 """Insert one or more rows into a table, skipping any rows for which
1453 insertion would violate any constraint.
1455 Parameters
1456 ----------
1457 table : `sqlalchemy.schema.Table`
1458 Table rows should be inserted into.
1459 *rows
1460 Positional arguments are the rows to be inserted, as dictionaries
1461 mapping column name to value. The keys in all dictionaries must
1462 be the same.
1464 Returns
1465 -------
1466 count : `int`
1467 The number of rows actually inserted.
1469 Raises
1470 ------
1471 ReadOnlyDatabaseError
1472 Raised if `isWriteable` returns `False` when this method is called.
1473 This is raised even if the operation would do nothing even on a
1474 writeable database.
1476 Notes
1477 -----
1478 May be used inside transaction contexts, so implementations may not
1479 perform operations that interrupt transactions.
1481 Implementations are not required to support `ensure` on tables
1482 with autoincrement keys.
1483 """
1484 raise NotImplementedError()
1486 def delete(self, table: sqlalchemy.schema.Table, columns: Iterable[str], *rows: dict) -> int:
1487 """Delete one or more rows from a table.
1489 Parameters
1490 ----------
1491 table : `sqlalchemy.schema.Table`
1492 Table that rows should be deleted from.
1493 columns: `~collections.abc.Iterable` of `str`
1494 The names of columns that will be used to constrain the rows to
1495 be deleted; these will be combined via ``AND`` to form the
1496 ``WHERE`` clause of the delete query.
1497 *rows
1498 Positional arguments are the keys of rows to be deleted, as
1499 dictionaries mapping column name to value. The keys in all
1500 dictionaries must be exactly the names in ``columns``.
1502 Returns
1503 -------
1504 count : `int`
1505 Number of rows deleted.
1507 Raises
1508 ------
1509 ReadOnlyDatabaseError
1510 Raised if `isWriteable` returns `False` when this method is called.
1512 Notes
1513 -----
1514 May be used inside transaction contexts, so implementations may not
1515 perform operations that interrupt transactions.
1517 The default implementation should be sufficient for most derived
1518 classes.
1519 """
1520 self.assertTableWriteable(table, f"Cannot delete from read-only table {table}.")
1521 if columns and not rows:
1522 # If there are no columns, this operation is supposed to delete
1523 # everything (so we proceed as usual). But if there are columns,
1524 # but no rows, it was a constrained bulk operation where the
1525 # constraint is that no rows match, and we should short-circuit
1526 # while reporting that no rows were affected.
1527 return 0
1528 sql = table.delete()
1529 columns = list(columns) # Force iterators to list
1531 # More efficient to use IN operator if there is only one
1532 # variable changing across all rows.
1533 content: Dict[str, Set] = defaultdict(set)
1534 if len(columns) == 1:
1535 # Nothing to calculate since we can always use IN
1536 column = columns[0]
1537 changing_columns = [column]
1538 content[column] = set(row[column] for row in rows)
1539 else:
1540 for row in rows:
1541 for k, v in row.items():
1542 content[k].add(v)
1543 changing_columns = [col for col, values in content.items() if len(values) > 1]
1545 if len(changing_columns) != 1:
1546 # More than one column changes each time so do explicit bind
1547 # parameters and have each row processed separately.
1548 whereTerms = [table.columns[name] == sqlalchemy.sql.bindparam(name) for name in columns]
1549 if whereTerms:
1550 sql = sql.where(sqlalchemy.sql.and_(*whereTerms))
1551 with self._connection() as connection:
1552 return connection.execute(sql, rows).rowcount
1553 else:
1554 # One of the columns has changing values but any others are
1555 # fixed. In this case we can use an IN operator and be more
1556 # efficient.
1557 name = changing_columns.pop()
1559 # Simple where clause for the unchanging columns
1560 clauses = []
1561 for k, v in content.items():
1562 if k == name:
1563 continue
1564 column = table.columns[k]
1565 # The set only has one element
1566 clauses.append(column == v.pop())
1568 # The IN operator will not work for "infinite" numbers of
1569 # rows so must batch it up into distinct calls.
1570 in_content = list(content[name])
1571 n_elements = len(in_content)
1573 rowcount = 0
1574 iposn = 0
1575 n_per_loop = 1_000 # Controls how many items to put in IN clause
1576 with self._connection() as connection:
1577 for iposn in range(0, n_elements, n_per_loop):
1578 endpos = iposn + n_per_loop
1579 in_clause = table.columns[name].in_(in_content[iposn:endpos])
1581 newsql = sql.where(sqlalchemy.sql.and_(*clauses, in_clause))
1582 rowcount += connection.execute(newsql).rowcount
1583 return rowcount
1585 def deleteWhere(self, table: sqlalchemy.schema.Table, where: sqlalchemy.sql.ClauseElement) -> int:
1586 """Delete rows from a table with pre-constructed WHERE clause.
1588 Parameters
1589 ----------
1590 table : `sqlalchemy.schema.Table`
1591 Table that rows should be deleted from.
1592 where: `sqlalchemy.sql.ClauseElement`
1593 The names of columns that will be used to constrain the rows to
1594 be deleted; these will be combined via ``AND`` to form the
1595 ``WHERE`` clause of the delete query.
1597 Returns
1598 -------
1599 count : `int`
1600 Number of rows deleted.
1602 Raises
1603 ------
1604 ReadOnlyDatabaseError
1605 Raised if `isWriteable` returns `False` when this method is called.
1607 Notes
1608 -----
1609 May be used inside transaction contexts, so implementations may not
1610 perform operations that interrupt transactions.
1612 The default implementation should be sufficient for most derived
1613 classes.
1614 """
1615 self.assertTableWriteable(table, f"Cannot delete from read-only table {table}.")
1617 sql = table.delete().where(where)
1618 with self._connection() as connection:
1619 return connection.execute(sql).rowcount
1621 def update(self, table: sqlalchemy.schema.Table, where: Dict[str, str], *rows: dict) -> int:
1622 """Update one or more rows in a table.
1624 Parameters
1625 ----------
1626 table : `sqlalchemy.schema.Table`
1627 Table containing the rows to be updated.
1628 where : `dict` [`str`, `str`]
1629 A mapping from the names of columns that will be used to search for
1630 existing rows to the keys that will hold these values in the
1631 ``rows`` dictionaries. Note that these may not be the same due to
1632 SQLAlchemy limitations.
1633 *rows
1634 Positional arguments are the rows to be updated. The keys in all
1635 dictionaries must be the same, and may correspond to either a
1636 value in the ``where`` dictionary or the name of a column to be
1637 updated.
1639 Returns
1640 -------
1641 count : `int`
1642 Number of rows matched (regardless of whether the update actually
1643 modified them).
1645 Raises
1646 ------
1647 ReadOnlyDatabaseError
1648 Raised if `isWriteable` returns `False` when this method is called.
1650 Notes
1651 -----
1652 May be used inside transaction contexts, so implementations may not
1653 perform operations that interrupt transactions.
1655 The default implementation should be sufficient for most derived
1656 classes.
1657 """
1658 self.assertTableWriteable(table, f"Cannot update read-only table {table}.")
1659 if not rows:
1660 return 0
1661 sql = table.update().where(
1662 sqlalchemy.sql.and_(*[table.columns[k] == sqlalchemy.sql.bindparam(v) for k, v in where.items()])
1663 )
1664 with self._connection() as connection:
1665 return connection.execute(sql, rows).rowcount
1667 def query(
1668 self, sql: sqlalchemy.sql.FromClause, *args: Any, **kwargs: Any
1669 ) -> sqlalchemy.engine.ResultProxy:
1670 """Run a SELECT query against the database.
1672 Parameters
1673 ----------
1674 sql : `sqlalchemy.sql.FromClause`
1675 A SQLAlchemy representation of a ``SELECT`` query.
1676 *args
1677 Additional positional arguments are forwarded to
1678 `sqlalchemy.engine.Connection.execute`.
1679 **kwargs
1680 Additional keyword arguments are forwarded to
1681 `sqlalchemy.engine.Connection.execute`.
1683 Returns
1684 -------
1685 result : `sqlalchemy.engine.ResultProxy`
1686 Query results.
1688 Notes
1689 -----
1690 The default implementation should be sufficient for most derived
1691 classes.
1692 """
1693 # We are returning a Result object so we need to take care of
1694 # connection lifetime. If this is happening in transaction context
1695 # then just use existing connection, otherwise make a special
1696 # connection which will be closed when result is closed.
1697 #
1698 # TODO: May be better approach would be to make this method return a
1699 # context manager, but this means big changes for callers of this
1700 # method.
1701 if self._session_connection is not None:
1702 connection = self._session_connection
1703 else:
1704 connection = self._engine.connect(close_with_result=True)
1705 # TODO: should we guard against non-SELECT queries here?
1706 return connection.execute(sql, *args, **kwargs)
1708 origin: int
1709 """An integer ID that should be used as the default for any datasets,
1710 quanta, or other entities that use a (autoincrement, origin) compound
1711 primary key (`int`).
1712 """
1714 namespace: Optional[str]
1715 """The schema or namespace this database instance is associated with
1716 (`str` or `None`).
1717 """