Coverage for python/lsst/daf/butler/registry/interfaces/_database.py: 15%
407 statements
« prev ^ index » next coverage.py v6.4.1, created at 2022-06-23 02:27 -0700
« prev ^ index » next coverage.py v6.4.1, created at 2022-06-23 02:27 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = [
24 "Database",
25 "ReadOnlyDatabaseError",
26 "DatabaseConflictError",
27 "SchemaAlreadyDefinedError",
28 "StaticTablesContext",
29]
31import uuid
32import warnings
33from abc import ABC, abstractmethod
34from collections import defaultdict
35from contextlib import contextmanager
36from typing import Any, Callable, Dict, Iterable, Iterator, List, Optional, Sequence, Set, Tuple, Type, Union
38import astropy.time
39import sqlalchemy
41from ...core import SpatialRegionDatabaseRepresentation, TimespanDatabaseRepresentation, ddl, time_utils
42from .._exceptions import ConflictingDefinitionError
44_IN_SAVEPOINT_TRANSACTION = "IN_SAVEPOINT_TRANSACTION"
47def _checkExistingTableDefinition(name: str, spec: ddl.TableSpec, inspection: List[Dict[str, Any]]) -> None:
48 """Test that the definition of a table in a `ddl.TableSpec` and from
49 database introspection are consistent.
51 Parameters
52 ----------
53 name : `str`
54 Name of the table (only used in error messages).
55 spec : `ddl.TableSpec`
56 Specification of the table.
57 inspection : `dict`
58 Dictionary returned by
59 `sqlalchemy.engine.reflection.Inspector.get_columns`.
61 Raises
62 ------
63 DatabaseConflictError
64 Raised if the definitions are inconsistent.
65 """
66 columnNames = [c["name"] for c in inspection]
67 if spec.fields.names != set(columnNames):
68 raise DatabaseConflictError(
69 f"Table '{name}' exists but is defined differently in the database; "
70 f"specification has columns {list(spec.fields.names)}, while the "
71 f"table in the database has {columnNames}."
72 )
75class ReadOnlyDatabaseError(RuntimeError):
76 """Exception raised when a write operation is called on a read-only
77 `Database`.
78 """
81class DatabaseConflictError(ConflictingDefinitionError):
82 """Exception raised when database content (row values or schema entities)
83 are inconsistent with what this client expects.
84 """
87class SchemaAlreadyDefinedError(RuntimeError):
88 """Exception raised when trying to initialize database schema when some
89 tables already exist.
90 """
93class StaticTablesContext:
94 """Helper class used to declare the static schema for a registry layer
95 in a database.
97 An instance of this class is returned by `Database.declareStaticTables`,
98 which should be the only way it should be constructed.
99 """
101 def __init__(self, db: Database):
102 self._db = db
103 self._foreignKeys: List[Tuple[sqlalchemy.schema.Table, sqlalchemy.schema.ForeignKeyConstraint]] = []
104 self._inspector = sqlalchemy.inspect(self._db._engine)
105 self._tableNames = frozenset(self._inspector.get_table_names(schema=self._db.namespace))
106 self._initializers: List[Callable[[Database], None]] = []
108 def addTable(self, name: str, spec: ddl.TableSpec) -> sqlalchemy.schema.Table:
109 """Add a new table to the schema, returning its sqlalchemy
110 representation.
112 The new table may not actually be created until the end of the
113 context created by `Database.declareStaticTables`, allowing tables
114 to be declared in any order even in the presence of foreign key
115 relationships.
116 """
117 name = self._db._mangleTableName(name)
118 if name in self._tableNames:
119 _checkExistingTableDefinition(
120 name, spec, self._inspector.get_columns(name, schema=self._db.namespace)
121 )
122 metadata = self._db._metadata
123 assert metadata is not None, "Guaranteed by context manager that returns this object."
124 table = self._db._convertTableSpec(name, spec, metadata)
125 for foreignKeySpec in spec.foreignKeys:
126 self._foreignKeys.append((table, self._db._convertForeignKeySpec(name, foreignKeySpec, metadata)))
127 return table
129 def addTableTuple(self, specs: Tuple[ddl.TableSpec, ...]) -> Tuple[sqlalchemy.schema.Table, ...]:
130 """Add a named tuple of tables to the schema, returning their
131 SQLAlchemy representations in a named tuple of the same type.
133 The new tables may not actually be created until the end of the
134 context created by `Database.declareStaticTables`, allowing tables
135 to be declared in any order even in the presence of foreign key
136 relationships.
138 Notes
139 -----
140 ``specs`` *must* be an instance of a type created by
141 `collections.namedtuple`, not just regular tuple, and the returned
142 object is guaranteed to be the same. Because `~collections.namedtuple`
143 is just a factory for `type` objects, not an actual type itself,
144 we cannot represent this with type annotations.
145 """
146 return specs._make( # type: ignore
147 self.addTable(name, spec) for name, spec in zip(specs._fields, specs) # type: ignore
148 )
150 def addInitializer(self, initializer: Callable[[Database], None]) -> None:
151 """Add a method that does one-time initialization of a database.
153 Initialization can mean anything that changes state of a database
154 and needs to be done exactly once after database schema was created.
155 An example for that could be population of schema attributes.
157 Parameters
158 ----------
159 initializer : callable
160 Method of a single argument which is a `Database` instance.
161 """
162 self._initializers.append(initializer)
165class Session:
166 """Class representing a persistent connection to a database.
168 Parameters
169 ----------
170 db : `Database`
171 Database instance.
173 Notes
174 -----
175 Instances of Session class should not be created by client code;
176 `Database.session` should be used to create context for a session::
178 with db.session() as session:
179 session.method()
180 db.method()
182 In the current implementation sessions can be nested and transactions can
183 be nested within a session. All nested sessions and transaction share the
184 same database connection.
186 Session class represents a limited subset of database API that requires
187 persistent connection to a database (e.g. temporary tables which have
188 lifetime of a session). Potentially most of the database API could be
189 associated with a Session class.
190 """
192 def __init__(self, db: Database):
193 self._db = db
195 def makeTemporaryTable(self, spec: ddl.TableSpec, name: Optional[str] = None) -> sqlalchemy.schema.Table:
196 """Create a temporary table.
198 Parameters
199 ----------
200 spec : `TableSpec`
201 Specification for the table.
202 name : `str`, optional
203 A unique (within this session/connetion) name for the table.
204 Subclasses may override to modify the actual name used. If not
205 provided, a unique name will be generated.
207 Returns
208 -------
209 table : `sqlalchemy.schema.Table`
210 SQLAlchemy representation of the table.
212 Notes
213 -----
214 Temporary tables may be created, dropped, and written to even in
215 read-only databases - at least according to the Python-level
216 protections in the `Database` classes. Server permissions may say
217 otherwise, but in that case they probably need to be modified to
218 support the full range of expected read-only butler behavior.
220 Temporary table rows are guaranteed to be dropped when a connection is
221 closed. `Database` implementations are permitted to allow the table to
222 remain as long as this is transparent to the user (i.e. "creating" the
223 temporary table in a new session should not be an error, even if it
224 does nothing).
226 It may not be possible to use temporary tables within transactions with
227 some database engines (or configurations thereof).
228 """
229 if name is None:
230 name = f"tmp_{uuid.uuid4().hex}"
231 metadata = self._db._metadata
232 if metadata is None:
233 raise RuntimeError("Cannot create temporary table before static schema is defined.")
234 table = self._db._convertTableSpec(
235 name, spec, metadata, prefixes=["TEMPORARY"], schema=sqlalchemy.schema.BLANK_SCHEMA
236 )
237 if table.key in self._db._tempTables:
238 if table.key != name:
239 raise ValueError(
240 f"A temporary table with name {name} (transformed to {table.key} by "
241 f"Database) already exists."
242 )
243 for foreignKeySpec in spec.foreignKeys:
244 table.append_constraint(self._db._convertForeignKeySpec(name, foreignKeySpec, metadata))
245 with self._db._connection() as connection:
246 table.create(connection)
247 self._db._tempTables.add(table.key)
248 return table
250 def dropTemporaryTable(self, table: sqlalchemy.schema.Table) -> None:
251 """Drop a temporary table.
253 Parameters
254 ----------
255 table : `sqlalchemy.schema.Table`
256 A SQLAlchemy object returned by a previous call to
257 `makeTemporaryTable`.
258 """
259 if table.key in self._db._tempTables:
260 with self._db._connection() as connection:
261 table.drop(connection)
262 self._db._tempTables.remove(table.key)
263 else:
264 raise TypeError(f"Table {table.key} was not created by makeTemporaryTable.")
267class Database(ABC):
268 """An abstract interface that represents a particular database engine's
269 representation of a single schema/namespace/database.
271 Parameters
272 ----------
273 origin : `int`
274 An integer ID that should be used as the default for any datasets,
275 quanta, or other entities that use a (autoincrement, origin) compound
276 primary key.
277 engine : `sqlalchemy.engine.Engine`
278 The SQLAlchemy engine for this `Database`.
279 namespace : `str`, optional
280 Name of the schema or namespace this instance is associated with.
281 This is passed as the ``schema`` argument when constructing a
282 `sqlalchemy.schema.MetaData` instance. We use ``namespace`` instead to
283 avoid confusion between "schema means namespace" and "schema means
284 table definitions".
286 Notes
287 -----
288 `Database` requires all write operations to go through its special named
289 methods. Our write patterns are sufficiently simple that we don't really
290 need the full flexibility of SQL insert/update/delete syntax, and we need
291 non-standard (but common) functionality in these operations sufficiently
292 often that it seems worthwhile to provide our own generic API.
294 In contrast, `Database.query` allows arbitrary ``SELECT`` queries (via
295 their SQLAlchemy representation) to be run, as we expect these to require
296 significantly more sophistication while still being limited to standard
297 SQL.
299 `Database` itself has several underscore-prefixed attributes:
301 - ``_engine``: SQLAlchemy object representing its engine.
302 - ``_connection``: method returning a context manager for
303 `sqlalchemy.engine.Connection` object.
304 - ``_metadata``: the `sqlalchemy.schema.MetaData` object representing
305 the tables and other schema entities.
307 These are considered protected (derived classes may access them, but other
308 code should not), and read-only, aside from executing SQL via
309 ``_connection``.
310 """
312 def __init__(self, *, origin: int, engine: sqlalchemy.engine.Engine, namespace: Optional[str] = None):
313 self.origin = origin
314 self.namespace = namespace
315 self._engine = engine
316 self._session_connection: Optional[sqlalchemy.engine.Connection] = None
317 self._metadata: Optional[sqlalchemy.schema.MetaData] = None
318 self._tempTables: Set[str] = set()
320 def __repr__(self) -> str:
321 # Rather than try to reproduce all the parameters used to create
322 # the object, instead report the more useful information of the
323 # connection URL.
324 if self._engine.url.password is not None:
325 uri = str(self._engine.url.set(password="***"))
326 else:
327 uri = str(self._engine.url)
328 if self.namespace:
329 uri += f"#{self.namespace}"
330 return f'{type(self).__name__}("{uri}")'
332 @classmethod
333 def makeDefaultUri(cls, root: str) -> Optional[str]:
334 """Create a default connection URI appropriate for the given root
335 directory, or `None` if there can be no such default.
336 """
337 return None
339 @classmethod
340 def fromUri(
341 cls, uri: str, *, origin: int, namespace: Optional[str] = None, writeable: bool = True
342 ) -> Database:
343 """Construct a database from a SQLAlchemy URI.
345 Parameters
346 ----------
347 uri : `str`
348 A SQLAlchemy URI connection string.
349 origin : `int`
350 An integer ID that should be used as the default for any datasets,
351 quanta, or other entities that use a (autoincrement, origin)
352 compound primary key.
353 namespace : `str`, optional
354 A database namespace (i.e. schema) the new instance should be
355 associated with. If `None` (default), the namespace (if any) is
356 inferred from the URI.
357 writeable : `bool`, optional
358 If `True`, allow write operations on the database, including
359 ``CREATE TABLE``.
361 Returns
362 -------
363 db : `Database`
364 A new `Database` instance.
365 """
366 return cls.fromEngine(
367 cls.makeEngine(uri, writeable=writeable), origin=origin, namespace=namespace, writeable=writeable
368 )
370 @classmethod
371 @abstractmethod
372 def makeEngine(cls, uri: str, *, writeable: bool = True) -> sqlalchemy.engine.Engine:
373 """Create a `sqlalchemy.engine.Engine` from a SQLAlchemy URI.
375 Parameters
376 ----------
377 uri : `str`
378 A SQLAlchemy URI connection string.
379 writeable : `bool`, optional
380 If `True`, allow write operations on the database, including
381 ``CREATE TABLE``.
383 Returns
384 -------
385 engine : `sqlalchemy.engine.Engine`
386 A database engine.
388 Notes
389 -----
390 Subclasses that support other ways to connect to a database are
391 encouraged to add optional arguments to their implementation of this
392 method, as long as they maintain compatibility with the base class
393 call signature.
394 """
395 raise NotImplementedError()
397 @classmethod
398 @abstractmethod
399 def fromEngine(
400 cls,
401 engine: sqlalchemy.engine.Engine,
402 *,
403 origin: int,
404 namespace: Optional[str] = None,
405 writeable: bool = True,
406 ) -> Database:
407 """Create a new `Database` from an existing `sqlalchemy.engine.Engine`.
409 Parameters
410 ----------
411 engine : `sqlalchemy.engine.Engine`
412 The engine for the database. May be shared between `Database`
413 instances.
414 origin : `int`
415 An integer ID that should be used as the default for any datasets,
416 quanta, or other entities that use a (autoincrement, origin)
417 compound primary key.
418 namespace : `str`, optional
419 A different database namespace (i.e. schema) the new instance
420 should be associated with. If `None` (default), the namespace
421 (if any) is inferred from the connection.
422 writeable : `bool`, optional
423 If `True`, allow write operations on the database, including
424 ``CREATE TABLE``.
426 Returns
427 -------
428 db : `Database`
429 A new `Database` instance.
431 Notes
432 -----
433 This method allows different `Database` instances to share the same
434 engine, which is desirable when they represent different namespaces
435 can be queried together.
436 """
437 raise NotImplementedError()
439 @contextmanager
440 def session(self) -> Iterator:
441 """Return a context manager that represents a session (persistent
442 connection to a database).
443 """
444 if self._session_connection is not None:
445 # session already started, just reuse that
446 yield Session(self)
447 else:
448 try:
449 # open new connection and close it when done
450 self._session_connection = self._engine.connect()
451 yield Session(self)
452 finally:
453 if self._session_connection is not None:
454 self._session_connection.close()
455 self._session_connection = None
456 # Temporary tables only live within session
457 self._tempTables = set()
459 @contextmanager
460 def transaction(
461 self,
462 *,
463 interrupting: bool = False,
464 savepoint: bool = False,
465 lock: Iterable[sqlalchemy.schema.Table] = (),
466 ) -> Iterator:
467 """Return a context manager that represents a transaction.
469 Parameters
470 ----------
471 interrupting : `bool`, optional
472 If `True` (`False` is default), this transaction block may not be
473 nested without an outer one, and attempting to do so is a logic
474 (i.e. assertion) error.
475 savepoint : `bool`, optional
476 If `True` (`False` is default), create a `SAVEPOINT`, allowing
477 exceptions raised by the database (e.g. due to constraint
478 violations) during this transaction's context to be caught outside
479 it without also rolling back all operations in an outer transaction
480 block. If `False`, transactions may still be nested, but a
481 rollback may be generated at any level and affects all levels, and
482 commits are deferred until the outermost block completes. If any
483 outer transaction block was created with ``savepoint=True``, all
484 inner blocks will be as well (regardless of the actual value
485 passed). This has no effect if this is the outermost transaction.
486 lock : `Iterable` [ `sqlalchemy.schema.Table` ], optional
487 A list of tables to lock for the duration of this transaction.
488 These locks are guaranteed to prevent concurrent writes and allow
489 this transaction (only) to acquire the same locks (others should
490 block), but only prevent concurrent reads if the database engine
491 requires that in order to block concurrent writes.
493 Notes
494 -----
495 All transactions on a connection managed by one or more `Database`
496 instances _must_ go through this method, or transaction state will not
497 be correctly managed.
498 """
499 # need a connection, use session to manage it
500 with self.session():
501 assert self._session_connection is not None
502 connection = self._session_connection
503 assert not (interrupting and connection.in_transaction()), (
504 "Logic error in transaction nesting: an operation that would "
505 "interrupt the active transaction context has been requested."
506 )
507 # We remember whether we are already in a SAVEPOINT transaction via
508 # the connection object's 'info' dict, which is explicitly for user
509 # information like this. This is safer than a regular `Database`
510 # instance attribute, because it guards against multiple `Database`
511 # instances sharing the same connection. The need to use our own
512 # flag here to track whether we're in a nested transaction should
513 # go away in SQLAlchemy 1.4, which seems to have a
514 # `Connection.in_nested_transaction()` method.
515 savepoint = savepoint or connection.info.get(_IN_SAVEPOINT_TRANSACTION, False)
516 connection.info[_IN_SAVEPOINT_TRANSACTION] = savepoint
517 trans: sqlalchemy.engine.Transaction
518 if connection.in_transaction() and savepoint:
519 trans = connection.begin_nested()
520 elif not connection.in_transaction():
521 # Use a regular (non-savepoint) transaction always for the
522 # outermost context.
523 trans = connection.begin()
524 else:
525 # Nested non-savepoint transactions, don't do anything.
526 trans = None
527 self._lockTables(connection, lock)
528 try:
529 yield
530 if trans is not None:
531 trans.commit()
532 except BaseException:
533 if trans is not None:
534 trans.rollback()
535 raise
536 finally:
537 if not connection.in_transaction():
538 connection.info.pop(_IN_SAVEPOINT_TRANSACTION, None)
540 @contextmanager
541 def _connection(self) -> Iterator[sqlalchemy.engine.Connection]:
542 """Return context manager for Connection."""
543 if self._session_connection is not None:
544 # It means that we are in Session context, but we may not be in
545 # transaction context. Start a short transaction in that case.
546 if self._session_connection.in_transaction():
547 yield self._session_connection
548 else:
549 with self._session_connection.begin():
550 yield self._session_connection
551 else:
552 # Make new connection and transaction, transaction will be
553 # committed on context exit.
554 with self._engine.begin() as connection:
555 yield connection
557 @abstractmethod
558 def _lockTables(
559 self, connection: sqlalchemy.engine.Connection, tables: Iterable[sqlalchemy.schema.Table] = ()
560 ) -> None:
561 """Acquire locks on the given tables.
563 This is an implementation hook for subclasses, called by `transaction`.
564 It should not be called directly by other code.
566 Parameters
567 ----------
568 connection : `sqlalchemy.engine.Connection`
569 Database connection object. It is guaranteed that transaction is
570 already in a progress for this connection.
571 tables : `Iterable` [ `sqlalchemy.schema.Table` ], optional
572 A list of tables to lock for the duration of this transaction.
573 These locks are guaranteed to prevent concurrent writes and allow
574 this transaction (only) to acquire the same locks (others should
575 block), but only prevent concurrent reads if the database engine
576 requires that in order to block concurrent writes.
577 """
578 raise NotImplementedError()
580 def isTableWriteable(self, table: sqlalchemy.schema.Table) -> bool:
581 """Check whether a table is writeable, either because the database
582 connection is read-write or the table is a temporary table.
584 Parameters
585 ----------
586 table : `sqlalchemy.schema.Table`
587 SQLAlchemy table object to check.
589 Returns
590 -------
591 writeable : `bool`
592 Whether this table is writeable.
593 """
594 return self.isWriteable() or table.key in self._tempTables
596 def assertTableWriteable(self, table: sqlalchemy.schema.Table, msg: str) -> None:
597 """Raise if the given table is not writeable, either because the
598 database connection is read-write or the table is a temporary table.
600 Parameters
601 ----------
602 table : `sqlalchemy.schema.Table`
603 SQLAlchemy table object to check.
604 msg : `str`, optional
605 If provided, raise `ReadOnlyDatabaseError` instead of returning
606 `False`, with this message.
607 """
608 if not self.isTableWriteable(table):
609 raise ReadOnlyDatabaseError(msg)
611 @contextmanager
612 def declareStaticTables(self, *, create: bool) -> Iterator[StaticTablesContext]:
613 """Return a context manager in which the database's static DDL schema
614 can be declared.
616 Parameters
617 ----------
618 create : `bool`
619 If `True`, attempt to create all tables at the end of the context.
620 If `False`, they will be assumed to already exist.
622 Returns
623 -------
624 schema : `StaticTablesContext`
625 A helper object that is used to add new tables.
627 Raises
628 ------
629 ReadOnlyDatabaseError
630 Raised if ``create`` is `True`, `Database.isWriteable` is `False`,
631 and one or more declared tables do not already exist.
633 Examples
634 --------
635 Given a `Database` instance ``db``::
637 with db.declareStaticTables(create=True) as schema:
638 schema.addTable("table1", TableSpec(...))
639 schema.addTable("table2", TableSpec(...))
641 Notes
642 -----
643 A database's static DDL schema must be declared before any dynamic
644 tables are managed via calls to `ensureTableExists` or
645 `getExistingTable`. The order in which static schema tables are added
646 inside the context block is unimportant; they will automatically be
647 sorted and added in an order consistent with their foreign key
648 relationships.
649 """
650 if create and not self.isWriteable():
651 raise ReadOnlyDatabaseError(f"Cannot create tables in read-only database {self}.")
652 self._metadata = sqlalchemy.MetaData(schema=self.namespace)
653 try:
654 context = StaticTablesContext(self)
655 if create and context._tableNames:
656 # Looks like database is already initalized, to avoid danger
657 # of modifying/destroying valid schema we refuse to do
658 # anything in this case
659 raise SchemaAlreadyDefinedError(f"Cannot create tables in non-empty database {self}.")
660 yield context
661 for table, foreignKey in context._foreignKeys:
662 table.append_constraint(foreignKey)
663 if create:
664 if self.namespace is not None:
665 if self.namespace not in context._inspector.get_schema_names():
666 with self._connection() as connection:
667 connection.execute(sqlalchemy.schema.CreateSchema(self.namespace))
668 # In our tables we have columns that make use of sqlalchemy
669 # Sequence objects. There is currently a bug in sqlalchemy that
670 # causes a deprecation warning to be thrown on a property of
671 # the Sequence object when the repr for the sequence is
672 # created. Here a filter is used to catch these deprecation
673 # warnings when tables are created.
674 with warnings.catch_warnings():
675 warnings.simplefilter("ignore", category=sqlalchemy.exc.SADeprecationWarning)
676 self._metadata.create_all(self._engine)
677 # call all initializer methods sequentially
678 for init in context._initializers:
679 init(self)
680 except BaseException:
681 self._metadata = None
682 raise
684 @abstractmethod
685 def isWriteable(self) -> bool:
686 """Return `True` if this database can be modified by this client."""
687 raise NotImplementedError()
689 @abstractmethod
690 def __str__(self) -> str:
691 """Return a human-readable identifier for this `Database`, including
692 any namespace or schema that identifies its names within a `Registry`.
693 """
694 raise NotImplementedError()
696 @property
697 def dialect(self) -> sqlalchemy.engine.Dialect:
698 """The SQLAlchemy dialect for this database engine
699 (`sqlalchemy.engine.Dialect`).
700 """
701 return self._engine.dialect
703 def shrinkDatabaseEntityName(self, original: str) -> str:
704 """Return a version of the given name that fits within this database
705 engine's length limits for table, constraint, indexes, and sequence
706 names.
708 Implementations should not assume that simple truncation is safe,
709 because multiple long names often begin with the same prefix.
711 The default implementation simply returns the given name.
713 Parameters
714 ----------
715 original : `str`
716 The original name.
718 Returns
719 -------
720 shrunk : `str`
721 The new, possibly shortened name.
722 """
723 return original
725 def expandDatabaseEntityName(self, shrunk: str) -> str:
726 """Retrieve the original name for a database entity that was too long
727 to fit within the database engine's limits.
729 Parameters
730 ----------
731 original : `str`
732 The original name.
734 Returns
735 -------
736 shrunk : `str`
737 The new, possibly shortened name.
738 """
739 return shrunk
741 def _mangleTableName(self, name: str) -> str:
742 """Map a logical, user-visible table name to the true table name used
743 in the database.
745 The default implementation returns the given name unchanged.
747 Parameters
748 ----------
749 name : `str`
750 Input table name. Should not include a namespace (i.e. schema)
751 prefix.
753 Returns
754 -------
755 mangled : `str`
756 Mangled version of the table name (still with no namespace prefix).
758 Notes
759 -----
760 Reimplementations of this method must be idempotent - mangling an
761 already-mangled name must have no effect.
762 """
763 return name
765 def _makeColumnConstraints(self, table: str, spec: ddl.FieldSpec) -> List[sqlalchemy.CheckConstraint]:
766 """Create constraints based on this spec.
768 Parameters
769 ----------
770 table : `str`
771 Name of the table this column is being added to.
772 spec : `FieldSpec`
773 Specification for the field to be added.
775 Returns
776 -------
777 constraint : `list` of `sqlalchemy.CheckConstraint`
778 Constraint added for this column.
779 """
780 # By default we return no additional constraints
781 return []
783 def _convertFieldSpec(
784 self, table: str, spec: ddl.FieldSpec, metadata: sqlalchemy.MetaData, **kwargs: Any
785 ) -> sqlalchemy.schema.Column:
786 """Convert a `FieldSpec` to a `sqlalchemy.schema.Column`.
788 Parameters
789 ----------
790 table : `str`
791 Name of the table this column is being added to.
792 spec : `FieldSpec`
793 Specification for the field to be added.
794 metadata : `sqlalchemy.MetaData`
795 SQLAlchemy representation of the DDL schema this field's table is
796 being added to.
797 **kwargs
798 Additional keyword arguments to forward to the
799 `sqlalchemy.schema.Column` constructor. This is provided to make
800 it easier for derived classes to delegate to ``super()`` while
801 making only minor changes.
803 Returns
804 -------
805 column : `sqlalchemy.schema.Column`
806 SQLAlchemy representation of the field.
807 """
808 args = [spec.name, spec.getSizedColumnType()]
809 if spec.autoincrement:
810 # Generate a sequence to use for auto incrementing for databases
811 # that do not support it natively. This will be ignored by
812 # sqlalchemy for databases that do support it.
813 args.append(
814 sqlalchemy.Sequence(
815 self.shrinkDatabaseEntityName(f"{table}_seq_{spec.name}"), metadata=metadata
816 )
817 )
818 assert spec.doc is None or isinstance(spec.doc, str), f"Bad doc for {table}.{spec.name}."
819 return sqlalchemy.schema.Column(
820 *args,
821 nullable=spec.nullable,
822 primary_key=spec.primaryKey,
823 comment=spec.doc,
824 server_default=spec.default,
825 **kwargs,
826 )
828 def _convertForeignKeySpec(
829 self, table: str, spec: ddl.ForeignKeySpec, metadata: sqlalchemy.MetaData, **kwargs: Any
830 ) -> sqlalchemy.schema.ForeignKeyConstraint:
831 """Convert a `ForeignKeySpec` to a
832 `sqlalchemy.schema.ForeignKeyConstraint`.
834 Parameters
835 ----------
836 table : `str`
837 Name of the table this foreign key is being added to.
838 spec : `ForeignKeySpec`
839 Specification for the foreign key to be added.
840 metadata : `sqlalchemy.MetaData`
841 SQLAlchemy representation of the DDL schema this constraint is
842 being added to.
843 **kwargs
844 Additional keyword arguments to forward to the
845 `sqlalchemy.schema.ForeignKeyConstraint` constructor. This is
846 provided to make it easier for derived classes to delegate to
847 ``super()`` while making only minor changes.
849 Returns
850 -------
851 constraint : `sqlalchemy.schema.ForeignKeyConstraint`
852 SQLAlchemy representation of the constraint.
853 """
854 name = self.shrinkDatabaseEntityName(
855 "_".join(
856 ["fkey", table, self._mangleTableName(spec.table)] + list(spec.target) + list(spec.source)
857 )
858 )
859 return sqlalchemy.schema.ForeignKeyConstraint(
860 spec.source,
861 [f"{self._mangleTableName(spec.table)}.{col}" for col in spec.target],
862 name=name,
863 ondelete=spec.onDelete,
864 )
866 def _convertExclusionConstraintSpec(
867 self,
868 table: str,
869 spec: Tuple[Union[str, Type[TimespanDatabaseRepresentation]], ...],
870 metadata: sqlalchemy.MetaData,
871 ) -> sqlalchemy.schema.Constraint:
872 """Convert a `tuple` from `ddl.TableSpec.exclusion` into a SQLAlchemy
873 constraint representation.
875 Parameters
876 ----------
877 table : `str`
878 Name of the table this constraint is being added to.
879 spec : `tuple` [ `str` or `type` ]
880 A tuple of `str` column names and the `type` object returned by
881 `getTimespanRepresentation` (which must appear exactly once),
882 indicating the order of the columns in the index used to back the
883 constraint.
884 metadata : `sqlalchemy.MetaData`
885 SQLAlchemy representation of the DDL schema this constraint is
886 being added to.
888 Returns
889 -------
890 constraint : `sqlalchemy.schema.Constraint`
891 SQLAlchemy representation of the constraint.
893 Raises
894 ------
895 NotImplementedError
896 Raised if this database does not support exclusion constraints.
897 """
898 raise NotImplementedError(f"Database {self} does not support exclusion constraints.")
900 def _convertTableSpec(
901 self, name: str, spec: ddl.TableSpec, metadata: sqlalchemy.MetaData, **kwargs: Any
902 ) -> sqlalchemy.schema.Table:
903 """Convert a `TableSpec` to a `sqlalchemy.schema.Table`.
905 Parameters
906 ----------
907 spec : `TableSpec`
908 Specification for the foreign key to be added.
909 metadata : `sqlalchemy.MetaData`
910 SQLAlchemy representation of the DDL schema this table is being
911 added to.
912 **kwargs
913 Additional keyword arguments to forward to the
914 `sqlalchemy.schema.Table` constructor. This is provided to make it
915 easier for derived classes to delegate to ``super()`` while making
916 only minor changes.
918 Returns
919 -------
920 table : `sqlalchemy.schema.Table`
921 SQLAlchemy representation of the table.
923 Notes
924 -----
925 This method does not handle ``spec.foreignKeys`` at all, in order to
926 avoid circular dependencies. These are added by higher-level logic in
927 `ensureTableExists`, `getExistingTable`, and `declareStaticTables`.
928 """
929 name = self._mangleTableName(name)
930 args = [self._convertFieldSpec(name, fieldSpec, metadata) for fieldSpec in spec.fields]
932 # Add any column constraints
933 for fieldSpec in spec.fields:
934 args.extend(self._makeColumnConstraints(name, fieldSpec))
936 # Track indexes added for primary key and unique constraints, to make
937 # sure we don't add duplicate explicit or foreign key indexes for
938 # those.
939 allIndexes = {tuple(fieldSpec.name for fieldSpec in spec.fields if fieldSpec.primaryKey)}
940 args.extend(
941 sqlalchemy.schema.UniqueConstraint(
942 *columns, name=self.shrinkDatabaseEntityName("_".join([name, "unq"] + list(columns)))
943 )
944 for columns in spec.unique
945 )
946 allIndexes.update(spec.unique)
947 args.extend(
948 sqlalchemy.schema.Index(
949 self.shrinkDatabaseEntityName("_".join([name, "idx"] + list(columns))),
950 *columns,
951 unique=(columns in spec.unique),
952 )
953 for columns in spec.indexes
954 if columns not in allIndexes
955 )
956 allIndexes.update(spec.indexes)
957 args.extend(
958 sqlalchemy.schema.Index(
959 self.shrinkDatabaseEntityName("_".join((name, "fkidx") + fk.source)),
960 *fk.source,
961 )
962 for fk in spec.foreignKeys
963 if fk.addIndex and fk.source not in allIndexes
964 )
966 args.extend(self._convertExclusionConstraintSpec(name, excl, metadata) for excl in spec.exclusion)
968 assert spec.doc is None or isinstance(spec.doc, str), f"Bad doc for {name}."
969 return sqlalchemy.schema.Table(name, metadata, *args, comment=spec.doc, info=spec, **kwargs)
971 def ensureTableExists(self, name: str, spec: ddl.TableSpec) -> sqlalchemy.schema.Table:
972 """Ensure that a table with the given name and specification exists,
973 creating it if necessary.
975 Parameters
976 ----------
977 name : `str`
978 Name of the table (not including namespace qualifiers).
979 spec : `TableSpec`
980 Specification for the table. This will be used when creating the
981 table, and *may* be used when obtaining an existing table to check
982 for consistency, but no such check is guaranteed.
984 Returns
985 -------
986 table : `sqlalchemy.schema.Table`
987 SQLAlchemy representation of the table.
989 Raises
990 ------
991 ReadOnlyDatabaseError
992 Raised if `isWriteable` returns `False`, and the table does not
993 already exist.
994 DatabaseConflictError
995 Raised if the table exists but ``spec`` is inconsistent with its
996 definition.
998 Notes
999 -----
1000 This method may not be called within transactions. It may be called on
1001 read-only databases if and only if the table does in fact already
1002 exist.
1004 Subclasses may override this method, but usually should not need to.
1005 """
1006 # TODO: if _engine is used to make a table then it uses separate
1007 # connection and should not interfere with current transaction
1008 assert (
1009 self._session_connection is None or not self._session_connection.in_transaction()
1010 ), "Table creation interrupts transactions."
1011 assert self._metadata is not None, "Static tables must be declared before dynamic tables."
1012 table = self.getExistingTable(name, spec)
1013 if table is not None:
1014 return table
1015 if not self.isWriteable():
1016 raise ReadOnlyDatabaseError(
1017 f"Table {name} does not exist, and cannot be created "
1018 f"because database {self} is read-only."
1019 )
1020 table = self._convertTableSpec(name, spec, self._metadata)
1021 for foreignKeySpec in spec.foreignKeys:
1022 table.append_constraint(self._convertForeignKeySpec(name, foreignKeySpec, self._metadata))
1023 try:
1024 with self._connection() as connection:
1025 table.create(connection)
1026 except sqlalchemy.exc.DatabaseError:
1027 # Some other process could have created the table meanwhile, which
1028 # usually causes OperationalError or ProgrammingError. We cannot
1029 # use IF NOT EXISTS clause in this case due to PostgreSQL race
1030 # condition on server side which causes IntegrityError. Instead we
1031 # catch these exceptions (they all inherit DatabaseError) and
1032 # re-check whether table is now there.
1033 table = self.getExistingTable(name, spec)
1034 if table is None:
1035 raise
1036 return table
1038 def getExistingTable(self, name: str, spec: ddl.TableSpec) -> Optional[sqlalchemy.schema.Table]:
1039 """Obtain an existing table with the given name and specification.
1041 Parameters
1042 ----------
1043 name : `str`
1044 Name of the table (not including namespace qualifiers).
1045 spec : `TableSpec`
1046 Specification for the table. This will be used when creating the
1047 SQLAlchemy representation of the table, and it is used to
1048 check that the actual table in the database is consistent.
1050 Returns
1051 -------
1052 table : `sqlalchemy.schema.Table` or `None`
1053 SQLAlchemy representation of the table, or `None` if it does not
1054 exist.
1056 Raises
1057 ------
1058 DatabaseConflictError
1059 Raised if the table exists but ``spec`` is inconsistent with its
1060 definition.
1062 Notes
1063 -----
1064 This method can be called within transactions and never modifies the
1065 database.
1067 Subclasses may override this method, but usually should not need to.
1068 """
1069 assert self._metadata is not None, "Static tables must be declared before dynamic tables."
1070 name = self._mangleTableName(name)
1071 table = self._metadata.tables.get(name if self.namespace is None else f"{self.namespace}.{name}")
1072 if table is not None:
1073 if spec.fields.names != set(table.columns.keys()):
1074 raise DatabaseConflictError(
1075 f"Table '{name}' has already been defined differently; the new "
1076 f"specification has columns {list(spec.fields.names)}, while "
1077 f"the previous definition has {list(table.columns.keys())}."
1078 )
1079 else:
1080 inspector = sqlalchemy.inspect(self._engine)
1081 if name in inspector.get_table_names(schema=self.namespace):
1082 _checkExistingTableDefinition(name, spec, inspector.get_columns(name, schema=self.namespace))
1083 table = self._convertTableSpec(name, spec, self._metadata)
1084 for foreignKeySpec in spec.foreignKeys:
1085 table.append_constraint(self._convertForeignKeySpec(name, foreignKeySpec, self._metadata))
1086 return table
1087 return table
1089 @classmethod
1090 def getTimespanRepresentation(cls) -> Type[TimespanDatabaseRepresentation]:
1091 """Return a `type` that encapsulates the way `Timespan` objects are
1092 stored in this database.
1094 `Database` does not automatically use the return type of this method
1095 anywhere else; calling code is responsible for making sure that DDL
1096 and queries are consistent with it.
1098 Returns
1099 -------
1100 TimespanReprClass : `type` (`TimespanDatabaseRepresention` subclass)
1101 A type that encapsulates the way `Timespan` objects should be
1102 stored in this database.
1104 Notes
1105 -----
1106 There are two big reasons we've decided to keep timespan-mangling logic
1107 outside the `Database` implementations, even though the choice of
1108 representation is ultimately up to a `Database` implementation:
1110 - Timespans appear in relatively few tables and queries in our
1111 typical usage, and the code that operates on them is already aware
1112 that it is working with timespans. In contrast, a
1113 timespan-representation-aware implementation of, say, `insert`,
1114 would need to have extra logic to identify when timespan-mangling
1115 needed to occur, which would usually be useless overhead.
1117 - SQLAlchemy's rich SELECT query expression system has no way to wrap
1118 multiple columns in a single expression object (the ORM does, but
1119 we are not using the ORM). So we would have to wrap _much_ more of
1120 that code in our own interfaces to encapsulate timespan
1121 representations there.
1122 """
1123 return TimespanDatabaseRepresentation.Compound
1125 @classmethod
1126 def getSpatialRegionRepresentation(cls) -> Type[SpatialRegionDatabaseRepresentation]:
1127 """Return a `type` that encapsulates the way `lsst.sphgeom.Region`
1128 objects are stored in this database.
1130 `Database` does not automatically use the return type of this method
1131 anywhere else; calling code is responsible for making sure that DDL
1132 and queries are consistent with it.
1134 Returns
1135 -------
1136 RegionReprClass : `type` (`SpatialRegionDatabaseRepresention` subclass)
1137 A type that encapsulates the way `lsst.sphgeom.Region` objects
1138 should be stored in this database.
1140 Notes
1141 -----
1142 See `getTimespanRepresentation` for comments on why this method is not
1143 more tightly integrated with the rest of the `Database` interface.
1144 """
1145 return SpatialRegionDatabaseRepresentation
1147 def sync(
1148 self,
1149 table: sqlalchemy.schema.Table,
1150 *,
1151 keys: Dict[str, Any],
1152 compared: Optional[Dict[str, Any]] = None,
1153 extra: Optional[Dict[str, Any]] = None,
1154 returning: Optional[Sequence[str]] = None,
1155 update: bool = False,
1156 ) -> Tuple[Optional[Dict[str, Any]], Union[bool, Dict[str, Any]]]:
1157 """Insert into a table as necessary to ensure database contains
1158 values equivalent to the given ones.
1160 Parameters
1161 ----------
1162 table : `sqlalchemy.schema.Table`
1163 Table to be queried and possibly inserted into.
1164 keys : `dict`
1165 Column name-value pairs used to search for an existing row; must
1166 be a combination that can be used to select a single row if one
1167 exists. If such a row does not exist, these values are used in
1168 the insert.
1169 compared : `dict`, optional
1170 Column name-value pairs that are compared to those in any existing
1171 row. If such a row does not exist, these rows are used in the
1172 insert.
1173 extra : `dict`, optional
1174 Column name-value pairs that are ignored if a matching row exists,
1175 but used in an insert if one is necessary.
1176 returning : `~collections.abc.Sequence` of `str`, optional
1177 The names of columns whose values should be returned.
1178 update : `bool`, optional
1179 If `True` (`False` is default), update the existing row with the
1180 values in ``compared`` instead of raising `DatabaseConflictError`.
1182 Returns
1183 -------
1184 row : `dict`, optional
1185 The value of the fields indicated by ``returning``, or `None` if
1186 ``returning`` is `None`.
1187 inserted_or_updated : `bool` or `dict`
1188 If `True`, a new row was inserted; if `False`, a matching row
1189 already existed. If a `dict` (only possible if ``update=True``),
1190 then an existing row was updated, and the dict maps the names of
1191 the updated columns to their *old* values (new values can be
1192 obtained from ``compared``).
1194 Raises
1195 ------
1196 DatabaseConflictError
1197 Raised if the values in ``compared`` do not match the values in the
1198 database.
1199 ReadOnlyDatabaseError
1200 Raised if `isWriteable` returns `False`, and no matching record
1201 already exists.
1203 Notes
1204 -----
1205 May be used inside transaction contexts, so implementations may not
1206 perform operations that interrupt transactions.
1208 It may be called on read-only databases if and only if the matching row
1209 does in fact already exist.
1210 """
1212 def check() -> Tuple[int, Optional[Dict[str, Any]], Optional[List]]:
1213 """Query for a row that matches the ``key`` argument, and compare
1214 to what was given by the caller.
1216 Returns
1217 -------
1218 n : `int`
1219 Number of matching rows. ``n != 1`` is always an error, but
1220 it's a different kind of error depending on where `check` is
1221 being called.
1222 bad : `dict` or `None`
1223 The subset of the keys of ``compared`` for which the existing
1224 values did not match the given one, mapped to the existing
1225 values in the database. Once again, ``not bad`` is always an
1226 error, but a different kind on context. `None` if ``n != 1``
1227 result : `list` or `None`
1228 Results in the database that correspond to the columns given
1229 in ``returning``, or `None` if ``returning is None``.
1230 """
1231 toSelect: Set[str] = set()
1232 if compared is not None:
1233 toSelect.update(compared.keys())
1234 if returning is not None:
1235 toSelect.update(returning)
1236 if not toSelect:
1237 # Need to select some column, even if we just want to see
1238 # how many rows we get back.
1239 toSelect.add(next(iter(keys.keys())))
1240 selectSql = (
1241 sqlalchemy.sql.select(*[table.columns[k].label(k) for k in toSelect])
1242 .select_from(table)
1243 .where(sqlalchemy.sql.and_(*[table.columns[k] == v for k, v in keys.items()]))
1244 )
1245 with self._connection() as connection:
1246 fetched = list(connection.execute(selectSql).mappings())
1247 if len(fetched) != 1:
1248 return len(fetched), None, None
1249 existing = fetched[0]
1250 if compared is not None:
1252 def safeNotEqual(a: Any, b: Any) -> bool:
1253 if isinstance(a, astropy.time.Time):
1254 return not time_utils.TimeConverter().times_equal(a, b)
1255 return a != b
1257 inconsistencies = {
1258 k: existing[k] for k, v in compared.items() if safeNotEqual(existing[k], v)
1259 }
1260 else:
1261 inconsistencies = {}
1262 if returning is not None:
1263 toReturn: Optional[list] = [existing[k] for k in returning]
1264 else:
1265 toReturn = None
1266 return 1, inconsistencies, toReturn
1268 def format_bad(inconsistencies: Dict[str, Any]) -> str:
1269 """Format the 'bad' dictionary of existing values returned by
1270 ``check`` into a string suitable for an error message.
1271 """
1272 assert compared is not None, "Should not be able to get inconsistencies without comparing."
1273 return ", ".join(f"{k}: {v!r} != {compared[k]!r}" for k, v in inconsistencies.items())
1275 if self.isTableWriteable(table):
1276 # Try an insert first, but allow it to fail (in only specific
1277 # ways).
1278 row = keys.copy()
1279 if compared is not None:
1280 row.update(compared)
1281 if extra is not None:
1282 row.update(extra)
1283 with self.transaction():
1284 inserted = bool(self.ensure(table, row))
1285 inserted_or_updated: Union[bool, Dict[str, Any]]
1286 # Need to perform check() for this branch inside the
1287 # transaction, so we roll back an insert that didn't do
1288 # what we expected. That limits the extent to which we
1289 # can reduce duplication between this block and the other
1290 # ones that perform similar logic.
1291 n, bad, result = check()
1292 if n < 1:
1293 raise ConflictingDefinitionError(
1294 f"Attempted to ensure {row} exists by inserting it with ON CONFLICT IGNORE, "
1295 f"but a post-insert query on {keys} returned no results. "
1296 f"Insert was {'' if inserted else 'not '}reported as successful. "
1297 "This can occur if the insert violated a database constraint other than the "
1298 "unique constraint or primary key used to identify the row in this call."
1299 )
1300 elif n > 1:
1301 raise RuntimeError(
1302 f"Keys passed to sync {keys.keys()} do not comprise a "
1303 f"unique constraint for table {table.name}."
1304 )
1305 elif bad:
1306 assert (
1307 compared is not None
1308 ), "Should not be able to get inconsistencies without comparing."
1309 if inserted:
1310 raise RuntimeError(
1311 f"Conflict ({bad}) in sync after successful insert; this is "
1312 "possible if the same table is being updated by a concurrent "
1313 "process that isn't using sync, but it may also be a bug in "
1314 "daf_butler."
1315 )
1316 elif update:
1317 with self._connection() as connection:
1318 connection.execute(
1319 table.update()
1320 .where(sqlalchemy.sql.and_(*[table.columns[k] == v for k, v in keys.items()]))
1321 .values(**{k: compared[k] for k in bad.keys()})
1322 )
1323 inserted_or_updated = bad
1324 else:
1325 raise DatabaseConflictError(
1326 f"Conflict in sync for table {table.name} on column(s) {format_bad(bad)}."
1327 )
1328 else:
1329 inserted_or_updated = inserted
1330 else:
1331 # Database is not writeable; just see if the row exists.
1332 n, bad, result = check()
1333 if n < 1:
1334 raise ReadOnlyDatabaseError("sync needs to insert, but database is read-only.")
1335 elif n > 1:
1336 raise RuntimeError("Keys passed to sync do not comprise a unique constraint.")
1337 elif bad:
1338 if update:
1339 raise ReadOnlyDatabaseError("sync needs to update, but database is read-only.")
1340 else:
1341 raise DatabaseConflictError(
1342 f"Conflict in sync for table {table.name} on column(s) {format_bad(bad)}."
1343 )
1344 inserted_or_updated = False
1345 if returning is None:
1346 return None, inserted_or_updated
1347 else:
1348 assert result is not None
1349 return {k: v for k, v in zip(returning, result)}, inserted_or_updated
1351 def insert(
1352 self,
1353 table: sqlalchemy.schema.Table,
1354 *rows: dict,
1355 returnIds: bool = False,
1356 select: Optional[sqlalchemy.sql.Select] = None,
1357 names: Optional[Iterable[str]] = None,
1358 ) -> Optional[List[int]]:
1359 """Insert one or more rows into a table, optionally returning
1360 autoincrement primary key values.
1362 Parameters
1363 ----------
1364 table : `sqlalchemy.schema.Table`
1365 Table rows should be inserted into.
1366 returnIds: `bool`
1367 If `True` (`False` is default), return the values of the table's
1368 autoincrement primary key field (which much exist).
1369 select : `sqlalchemy.sql.Select`, optional
1370 A SELECT query expression to insert rows from. Cannot be provided
1371 with either ``rows`` or ``returnIds=True``.
1372 names : `Iterable` [ `str` ], optional
1373 Names of columns in ``table`` to be populated, ordered to match the
1374 columns returned by ``select``. Ignored if ``select`` is `None`.
1375 If not provided, the columns returned by ``select`` must be named
1376 to match the desired columns of ``table``.
1377 *rows
1378 Positional arguments are the rows to be inserted, as dictionaries
1379 mapping column name to value. The keys in all dictionaries must
1380 be the same.
1382 Returns
1383 -------
1384 ids : `None`, or `list` of `int`
1385 If ``returnIds`` is `True`, a `list` containing the inserted
1386 values for the table's autoincrement primary key.
1388 Raises
1389 ------
1390 ReadOnlyDatabaseError
1391 Raised if `isWriteable` returns `False` when this method is called.
1393 Notes
1394 -----
1395 The default implementation uses bulk insert syntax when ``returnIds``
1396 is `False`, and a loop over single-row insert operations when it is
1397 `True`.
1399 Derived classes should reimplement when they can provide a more
1400 efficient implementation (especially for the latter case).
1402 May be used inside transaction contexts, so implementations may not
1403 perform operations that interrupt transactions.
1404 """
1405 self.assertTableWriteable(table, f"Cannot insert into read-only table {table}.")
1406 if select is not None and (rows or returnIds):
1407 raise TypeError("'select' is incompatible with passing value rows or returnIds=True.")
1408 if not rows and select is None:
1409 if returnIds:
1410 return []
1411 else:
1412 return None
1413 with self._connection() as connection:
1414 if not returnIds:
1415 if select is not None:
1416 if names is None:
1417 # columns() is deprecated since 1.4, but
1418 # selected_columns() method did not exist in 1.3.
1419 if hasattr(select, "selected_columns"):
1420 names = select.selected_columns.keys()
1421 else:
1422 names = select.columns.keys()
1423 connection.execute(table.insert().from_select(names, select))
1424 else:
1425 connection.execute(table.insert(), rows)
1426 return None
1427 else:
1428 sql = table.insert()
1429 return [connection.execute(sql, row).inserted_primary_key[0] for row in rows]
1431 @abstractmethod
1432 def replace(self, table: sqlalchemy.schema.Table, *rows: dict) -> None:
1433 """Insert one or more rows into a table, replacing any existing rows
1434 for which insertion of a new row would violate the primary key
1435 constraint.
1437 Parameters
1438 ----------
1439 table : `sqlalchemy.schema.Table`
1440 Table rows should be inserted into.
1441 *rows
1442 Positional arguments are the rows to be inserted, as dictionaries
1443 mapping column name to value. The keys in all dictionaries must
1444 be the same.
1446 Raises
1447 ------
1448 ReadOnlyDatabaseError
1449 Raised if `isWriteable` returns `False` when this method is called.
1451 Notes
1452 -----
1453 May be used inside transaction contexts, so implementations may not
1454 perform operations that interrupt transactions.
1456 Implementations should raise a `sqlalchemy.exc.IntegrityError`
1457 exception when a constraint other than the primary key would be
1458 violated.
1460 Implementations are not required to support `replace` on tables
1461 with autoincrement keys.
1462 """
1463 raise NotImplementedError()
1465 @abstractmethod
1466 def ensure(self, table: sqlalchemy.schema.Table, *rows: dict, primary_key_only: bool = False) -> int:
1467 """Insert one or more rows into a table, skipping any rows for which
1468 insertion would violate a unique constraint.
1470 Parameters
1471 ----------
1472 table : `sqlalchemy.schema.Table`
1473 Table rows should be inserted into.
1474 *rows
1475 Positional arguments are the rows to be inserted, as dictionaries
1476 mapping column name to value. The keys in all dictionaries must
1477 be the same.
1478 primary_key_only : `bool`, optional
1479 If `True` (`False` is default), only skip rows that violate the
1480 primary key constraint, and raise an exception (and rollback
1481 transactions) for other constraint violations.
1483 Returns
1484 -------
1485 count : `int`
1486 The number of rows actually inserted.
1488 Raises
1489 ------
1490 ReadOnlyDatabaseError
1491 Raised if `isWriteable` returns `False` when this method is called.
1492 This is raised even if the operation would do nothing even on a
1493 writeable database.
1495 Notes
1496 -----
1497 May be used inside transaction contexts, so implementations may not
1498 perform operations that interrupt transactions.
1500 Implementations are not required to support `ensure` on tables
1501 with autoincrement keys.
1502 """
1503 raise NotImplementedError()
1505 def delete(self, table: sqlalchemy.schema.Table, columns: Iterable[str], *rows: dict) -> int:
1506 """Delete one or more rows from a table.
1508 Parameters
1509 ----------
1510 table : `sqlalchemy.schema.Table`
1511 Table that rows should be deleted from.
1512 columns: `~collections.abc.Iterable` of `str`
1513 The names of columns that will be used to constrain the rows to
1514 be deleted; these will be combined via ``AND`` to form the
1515 ``WHERE`` clause of the delete query.
1516 *rows
1517 Positional arguments are the keys of rows to be deleted, as
1518 dictionaries mapping column name to value. The keys in all
1519 dictionaries must be exactly the names in ``columns``.
1521 Returns
1522 -------
1523 count : `int`
1524 Number of rows deleted.
1526 Raises
1527 ------
1528 ReadOnlyDatabaseError
1529 Raised if `isWriteable` returns `False` when this method is called.
1531 Notes
1532 -----
1533 May be used inside transaction contexts, so implementations may not
1534 perform operations that interrupt transactions.
1536 The default implementation should be sufficient for most derived
1537 classes.
1538 """
1539 self.assertTableWriteable(table, f"Cannot delete from read-only table {table}.")
1540 if columns and not rows:
1541 # If there are no columns, this operation is supposed to delete
1542 # everything (so we proceed as usual). But if there are columns,
1543 # but no rows, it was a constrained bulk operation where the
1544 # constraint is that no rows match, and we should short-circuit
1545 # while reporting that no rows were affected.
1546 return 0
1547 sql = table.delete()
1548 columns = list(columns) # Force iterators to list
1550 # More efficient to use IN operator if there is only one
1551 # variable changing across all rows.
1552 content: Dict[str, Set] = defaultdict(set)
1553 if len(columns) == 1:
1554 # Nothing to calculate since we can always use IN
1555 column = columns[0]
1556 changing_columns = [column]
1557 content[column] = set(row[column] for row in rows)
1558 else:
1559 for row in rows:
1560 for k, v in row.items():
1561 content[k].add(v)
1562 changing_columns = [col for col, values in content.items() if len(values) > 1]
1564 if len(changing_columns) != 1:
1565 # More than one column changes each time so do explicit bind
1566 # parameters and have each row processed separately.
1567 whereTerms = [table.columns[name] == sqlalchemy.sql.bindparam(name) for name in columns]
1568 if whereTerms:
1569 sql = sql.where(sqlalchemy.sql.and_(*whereTerms))
1570 with self._connection() as connection:
1571 return connection.execute(sql, rows).rowcount
1572 else:
1573 # One of the columns has changing values but any others are
1574 # fixed. In this case we can use an IN operator and be more
1575 # efficient.
1576 name = changing_columns.pop()
1578 # Simple where clause for the unchanging columns
1579 clauses = []
1580 for k, v in content.items():
1581 if k == name:
1582 continue
1583 column = table.columns[k]
1584 # The set only has one element
1585 clauses.append(column == v.pop())
1587 # The IN operator will not work for "infinite" numbers of
1588 # rows so must batch it up into distinct calls.
1589 in_content = list(content[name])
1590 n_elements = len(in_content)
1592 rowcount = 0
1593 iposn = 0
1594 n_per_loop = 1_000 # Controls how many items to put in IN clause
1595 with self._connection() as connection:
1596 for iposn in range(0, n_elements, n_per_loop):
1597 endpos = iposn + n_per_loop
1598 in_clause = table.columns[name].in_(in_content[iposn:endpos])
1600 newsql = sql.where(sqlalchemy.sql.and_(*clauses, in_clause))
1601 rowcount += connection.execute(newsql).rowcount
1602 return rowcount
1604 def deleteWhere(self, table: sqlalchemy.schema.Table, where: sqlalchemy.sql.ClauseElement) -> int:
1605 """Delete rows from a table with pre-constructed WHERE clause.
1607 Parameters
1608 ----------
1609 table : `sqlalchemy.schema.Table`
1610 Table that rows should be deleted from.
1611 where: `sqlalchemy.sql.ClauseElement`
1612 The names of columns that will be used to constrain the rows to
1613 be deleted; these will be combined via ``AND`` to form the
1614 ``WHERE`` clause of the delete query.
1616 Returns
1617 -------
1618 count : `int`
1619 Number of rows deleted.
1621 Raises
1622 ------
1623 ReadOnlyDatabaseError
1624 Raised if `isWriteable` returns `False` when this method is called.
1626 Notes
1627 -----
1628 May be used inside transaction contexts, so implementations may not
1629 perform operations that interrupt transactions.
1631 The default implementation should be sufficient for most derived
1632 classes.
1633 """
1634 self.assertTableWriteable(table, f"Cannot delete from read-only table {table}.")
1636 sql = table.delete().where(where)
1637 with self._connection() as connection:
1638 return connection.execute(sql).rowcount
1640 def update(self, table: sqlalchemy.schema.Table, where: Dict[str, str], *rows: dict) -> int:
1641 """Update one or more rows in a table.
1643 Parameters
1644 ----------
1645 table : `sqlalchemy.schema.Table`
1646 Table containing the rows to be updated.
1647 where : `dict` [`str`, `str`]
1648 A mapping from the names of columns that will be used to search for
1649 existing rows to the keys that will hold these values in the
1650 ``rows`` dictionaries. Note that these may not be the same due to
1651 SQLAlchemy limitations.
1652 *rows
1653 Positional arguments are the rows to be updated. The keys in all
1654 dictionaries must be the same, and may correspond to either a
1655 value in the ``where`` dictionary or the name of a column to be
1656 updated.
1658 Returns
1659 -------
1660 count : `int`
1661 Number of rows matched (regardless of whether the update actually
1662 modified them).
1664 Raises
1665 ------
1666 ReadOnlyDatabaseError
1667 Raised if `isWriteable` returns `False` when this method is called.
1669 Notes
1670 -----
1671 May be used inside transaction contexts, so implementations may not
1672 perform operations that interrupt transactions.
1674 The default implementation should be sufficient for most derived
1675 classes.
1676 """
1677 self.assertTableWriteable(table, f"Cannot update read-only table {table}.")
1678 if not rows:
1679 return 0
1680 sql = table.update().where(
1681 sqlalchemy.sql.and_(*[table.columns[k] == sqlalchemy.sql.bindparam(v) for k, v in where.items()])
1682 )
1683 with self._connection() as connection:
1684 return connection.execute(sql, rows).rowcount
1686 def query(
1687 self, sql: sqlalchemy.sql.Selectable, *args: Any, **kwargs: Any
1688 ) -> sqlalchemy.engine.ResultProxy:
1689 """Run a SELECT query against the database.
1691 Parameters
1692 ----------
1693 sql : `sqlalchemy.sql.Selectable`
1694 A SQLAlchemy representation of a ``SELECT`` query.
1695 *args
1696 Additional positional arguments are forwarded to
1697 `sqlalchemy.engine.Connection.execute`.
1698 **kwargs
1699 Additional keyword arguments are forwarded to
1700 `sqlalchemy.engine.Connection.execute`.
1702 Returns
1703 -------
1704 result : `sqlalchemy.engine.ResultProxy`
1705 Query results.
1707 Notes
1708 -----
1709 The default implementation should be sufficient for most derived
1710 classes.
1711 """
1712 # We are returning a Result object so we need to take care of
1713 # connection lifetime. If this is happening in transaction context
1714 # then just use existing connection, otherwise make a special
1715 # connection which will be closed when result is closed.
1716 #
1717 # TODO: May be better approach would be to make this method return a
1718 # context manager, but this means big changes for callers of this
1719 # method.
1720 if self._session_connection is not None:
1721 connection = self._session_connection
1722 else:
1723 connection = self._engine.connect(close_with_result=True)
1724 # TODO: should we guard against non-SELECT queries here?
1725 return connection.execute(sql, *args, **kwargs)
1727 origin: int
1728 """An integer ID that should be used as the default for any datasets,
1729 quanta, or other entities that use a (autoincrement, origin) compound
1730 primary key (`int`).
1731 """
1733 namespace: Optional[str]
1734 """The schema or namespace this database instance is associated with
1735 (`str` or `None`).
1736 """