Coverage for python/lsst/daf/butler/registry/interfaces/_database.py: 14%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = [
24 "Database",
25 "ReadOnlyDatabaseError",
26 "DatabaseConflictError",
27 "SchemaAlreadyDefinedError",
28 "StaticTablesContext",
29]
31from abc import ABC, abstractmethod
32from collections import defaultdict
33from contextlib import contextmanager
34from typing import (
35 Any,
36 Callable,
37 Dict,
38 Iterable,
39 Iterator,
40 List,
41 Optional,
42 Sequence,
43 Set,
44 Tuple,
45 Type,
46 Union,
47)
48import uuid
49import warnings
51import astropy.time
52import sqlalchemy
54from ...core import SpatialRegionDatabaseRepresentation, TimespanDatabaseRepresentation, ddl, time_utils
55from .._exceptions import ConflictingDefinitionError
57_IN_SAVEPOINT_TRANSACTION = "IN_SAVEPOINT_TRANSACTION"
60def _checkExistingTableDefinition(name: str, spec: ddl.TableSpec, inspection: List[Dict[str, Any]]) -> None:
61 """Test that the definition of a table in a `ddl.TableSpec` and from
62 database introspection are consistent.
64 Parameters
65 ----------
66 name : `str`
67 Name of the table (only used in error messages).
68 spec : `ddl.TableSpec`
69 Specification of the table.
70 inspection : `dict`
71 Dictionary returned by
72 `sqlalchemy.engine.reflection.Inspector.get_columns`.
74 Raises
75 ------
76 DatabaseConflictError
77 Raised if the definitions are inconsistent.
78 """
79 columnNames = [c["name"] for c in inspection]
80 if spec.fields.names != set(columnNames):
81 raise DatabaseConflictError(f"Table '{name}' exists but is defined differently in the database; "
82 f"specification has columns {list(spec.fields.names)}, while the "
83 f"table in the database has {columnNames}.")
86class ReadOnlyDatabaseError(RuntimeError):
87 """Exception raised when a write operation is called on a read-only
88 `Database`.
89 """
92class DatabaseConflictError(ConflictingDefinitionError):
93 """Exception raised when database content (row values or schema entities)
94 are inconsistent with what this client expects.
95 """
98class SchemaAlreadyDefinedError(RuntimeError):
99 """Exception raised when trying to initialize database schema when some
100 tables already exist.
101 """
104class StaticTablesContext:
105 """Helper class used to declare the static schema for a registry layer
106 in a database.
108 An instance of this class is returned by `Database.declareStaticTables`,
109 which should be the only way it should be constructed.
110 """
112 def __init__(self, db: Database):
113 self._db = db
114 self._foreignKeys: List[Tuple[sqlalchemy.schema.Table, sqlalchemy.schema.ForeignKeyConstraint]] = []
115 self._inspector = sqlalchemy.inspect(self._db._engine)
116 self._tableNames = frozenset(self._inspector.get_table_names(schema=self._db.namespace))
117 self._initializers: List[Callable[[Database], None]] = []
119 def addTable(self, name: str, spec: ddl.TableSpec) -> sqlalchemy.schema.Table:
120 """Add a new table to the schema, returning its sqlalchemy
121 representation.
123 The new table may not actually be created until the end of the
124 context created by `Database.declareStaticTables`, allowing tables
125 to be declared in any order even in the presence of foreign key
126 relationships.
127 """
128 name = self._db._mangleTableName(name)
129 if name in self._tableNames:
130 _checkExistingTableDefinition(name, spec, self._inspector.get_columns(name,
131 schema=self._db.namespace))
132 table = self._db._convertTableSpec(name, spec, self._db._metadata)
133 for foreignKeySpec in spec.foreignKeys:
134 self._foreignKeys.append(
135 (table, self._db._convertForeignKeySpec(name, foreignKeySpec, self._db._metadata))
136 )
137 return table
139 def addTableTuple(self, specs: Tuple[ddl.TableSpec, ...]) -> Tuple[sqlalchemy.schema.Table, ...]:
140 """Add a named tuple of tables to the schema, returning their
141 SQLAlchemy representations in a named tuple of the same type.
143 The new tables may not actually be created until the end of the
144 context created by `Database.declareStaticTables`, allowing tables
145 to be declared in any order even in the presence of foreign key
146 relationships.
148 Notes
149 -----
150 ``specs`` *must* be an instance of a type created by
151 `collections.namedtuple`, not just regular tuple, and the returned
152 object is guaranteed to be the same. Because `~collections.namedtuple`
153 is just a factory for `type` objects, not an actual type itself,
154 we cannot represent this with type annotations.
155 """
156 return specs._make(self.addTable(name, spec) # type: ignore
157 for name, spec in zip(specs._fields, specs)) # type: ignore
159 def addInitializer(self, initializer: Callable[[Database], None]) -> None:
160 """Add a method that does one-time initialization of a database.
162 Initialization can mean anything that changes state of a database
163 and needs to be done exactly once after database schema was created.
164 An example for that could be population of schema attributes.
166 Parameters
167 ----------
168 initializer : callable
169 Method of a single argument which is a `Database` instance.
170 """
171 self._initializers.append(initializer)
174class Session:
175 """Class representing a persistent connection to a database.
177 Parameters
178 ----------
179 db : `Database`
180 Database instance.
182 Notes
183 -----
184 Instances of Session class should not be created by client code;
185 `Database.session` should be used to create context for a session::
187 with db.session() as session:
188 session.method()
189 db.method()
191 In the current implementation sessions can be nested and transactions can
192 be nested within a session. All nested sessions and transaction share the
193 same database connection.
195 Session class represents a limited subset of database API that requires
196 persistent connection to a database (e.g. temporary tables which have
197 lifetime of a session). Potentially most of the database API could be
198 associated with a Session class.
199 """
200 def __init__(self, db: Database):
201 self._db = db
203 def makeTemporaryTable(self, spec: ddl.TableSpec, name: Optional[str] = None) -> sqlalchemy.schema.Table:
204 """Create a temporary table.
206 Parameters
207 ----------
208 spec : `TableSpec`
209 Specification for the table.
210 name : `str`, optional
211 A unique (within this session/connetion) name for the table.
212 Subclasses may override to modify the actual name used. If not
213 provided, a unique name will be generated.
215 Returns
216 -------
217 table : `sqlalchemy.schema.Table`
218 SQLAlchemy representation of the table.
220 Notes
221 -----
222 Temporary tables may be created, dropped, and written to even in
223 read-only databases - at least according to the Python-level
224 protections in the `Database` classes. Server permissions may say
225 otherwise, but in that case they probably need to be modified to
226 support the full range of expected read-only butler behavior.
228 Temporary table rows are guaranteed to be dropped when a connection is
229 closed. `Database` implementations are permitted to allow the table to
230 remain as long as this is transparent to the user (i.e. "creating" the
231 temporary table in a new session should not be an error, even if it
232 does nothing).
234 It may not be possible to use temporary tables within transactions with
235 some database engines (or configurations thereof).
236 """
237 if name is None:
238 name = f"tmp_{uuid.uuid4().hex}"
239 table = self._db._convertTableSpec(name, spec, self._db._metadata, prefixes=['TEMPORARY'],
240 schema=sqlalchemy.schema.BLANK_SCHEMA)
241 if table.key in self._db._tempTables:
242 if table.key != name:
243 raise ValueError(f"A temporary table with name {name} (transformed to {table.key} by "
244 f"Database) already exists.")
245 for foreignKeySpec in spec.foreignKeys:
246 table.append_constraint(self._db._convertForeignKeySpec(name, foreignKeySpec,
247 self._db._metadata))
248 with self._db._connection() as connection:
249 table.create(connection)
250 self._db._tempTables.add(table.key)
251 return table
253 def dropTemporaryTable(self, table: sqlalchemy.schema.Table) -> None:
254 """Drop a temporary table.
256 Parameters
257 ----------
258 table : `sqlalchemy.schema.Table`
259 A SQLAlchemy object returned by a previous call to
260 `makeTemporaryTable`.
261 """
262 if table.key in self._db._tempTables:
263 with self._db._connection() as connection:
264 table.drop(connection)
265 self._db._tempTables.remove(table.key)
266 else:
267 raise TypeError(f"Table {table.key} was not created by makeTemporaryTable.")
270class Database(ABC):
271 """An abstract interface that represents a particular database engine's
272 representation of a single schema/namespace/database.
274 Parameters
275 ----------
276 origin : `int`
277 An integer ID that should be used as the default for any datasets,
278 quanta, or other entities that use a (autoincrement, origin) compound
279 primary key.
280 engine : `sqlalchemy.engine.Engine`
281 The SQLAlchemy engine for this `Database`.
282 namespace : `str`, optional
283 Name of the schema or namespace this instance is associated with.
284 This is passed as the ``schema`` argument when constructing a
285 `sqlalchemy.schema.MetaData` instance. We use ``namespace`` instead to
286 avoid confusion between "schema means namespace" and "schema means
287 table definitions".
289 Notes
290 -----
291 `Database` requires all write operations to go through its special named
292 methods. Our write patterns are sufficiently simple that we don't really
293 need the full flexibility of SQL insert/update/delete syntax, and we need
294 non-standard (but common) functionality in these operations sufficiently
295 often that it seems worthwhile to provide our own generic API.
297 In contrast, `Database.query` allows arbitrary ``SELECT`` queries (via
298 their SQLAlchemy representation) to be run, as we expect these to require
299 significantly more sophistication while still being limited to standard
300 SQL.
302 `Database` itself has several underscore-prefixed attributes:
304 - ``_engine``: SQLAlchemy object representing its engine.
305 - ``_connection``: method returning a context manager for
306 `sqlalchemy.engine.Connection` object.
307 - ``_metadata``: the `sqlalchemy.schema.MetaData` object representing
308 the tables and other schema entities.
310 These are considered protected (derived classes may access them, but other
311 code should not), and read-only, aside from executing SQL via
312 ``_connection``.
313 """
315 def __init__(self, *, origin: int, engine: sqlalchemy.engine.Engine,
316 namespace: Optional[str] = None):
317 self.origin = origin
318 self.namespace = namespace
319 self._engine = engine
320 self._session_connection: Optional[sqlalchemy.engine.Connection] = None
321 self._metadata: Optional[sqlalchemy.schema.MetaData] = None
322 self._tempTables: Set[str] = set()
324 def __repr__(self) -> str:
325 # Rather than try to reproduce all the parameters used to create
326 # the object, instead report the more useful information of the
327 # connection URL.
328 if self._engine.url.password is not None:
329 uri = str(self._engine.url.set(password="***"))
330 else:
331 uri = str(self._engine.url)
332 if self.namespace:
333 uri += f"#{self.namespace}"
334 return f'{type(self).__name__}("{uri}")'
336 @classmethod
337 def makeDefaultUri(cls, root: str) -> Optional[str]:
338 """Create a default connection URI appropriate for the given root
339 directory, or `None` if there can be no such default.
340 """
341 return None
343 @classmethod
344 def fromUri(cls, uri: str, *, origin: int, namespace: Optional[str] = None,
345 writeable: bool = True) -> Database:
346 """Construct a database from a SQLAlchemy URI.
348 Parameters
349 ----------
350 uri : `str`
351 A SQLAlchemy URI connection string.
352 origin : `int`
353 An integer ID that should be used as the default for any datasets,
354 quanta, or other entities that use a (autoincrement, origin)
355 compound primary key.
356 namespace : `str`, optional
357 A database namespace (i.e. schema) the new instance should be
358 associated with. If `None` (default), the namespace (if any) is
359 inferred from the URI.
360 writeable : `bool`, optional
361 If `True`, allow write operations on the database, including
362 ``CREATE TABLE``.
364 Returns
365 -------
366 db : `Database`
367 A new `Database` instance.
368 """
369 return cls.fromEngine(cls.makeEngine(uri, writeable=writeable),
370 origin=origin,
371 namespace=namespace,
372 writeable=writeable)
374 @classmethod
375 @abstractmethod
376 def makeEngine(cls, uri: str, *, writeable: bool = True) -> sqlalchemy.engine.Engine:
377 """Create a `sqlalchemy.engine.Engine` from a SQLAlchemy URI.
379 Parameters
380 ----------
381 uri : `str`
382 A SQLAlchemy URI connection string.
383 writeable : `bool`, optional
384 If `True`, allow write operations on the database, including
385 ``CREATE TABLE``.
387 Returns
388 -------
389 engine : `sqlalchemy.engine.Engine`
390 A database engine.
392 Notes
393 -----
394 Subclasses that support other ways to connect to a database are
395 encouraged to add optional arguments to their implementation of this
396 method, as long as they maintain compatibility with the base class
397 call signature.
398 """
399 raise NotImplementedError()
401 @classmethod
402 @abstractmethod
403 def fromEngine(cls, engine: sqlalchemy.engine.Engine, *, origin: int,
404 namespace: Optional[str] = None, writeable: bool = True) -> Database:
405 """Create a new `Database` from an existing `sqlalchemy.engine.Engine`.
407 Parameters
408 ----------
409 engine : `sqlalchemy.engine.Engine`
410 The engine for the database. May be shared between `Database`
411 instances.
412 origin : `int`
413 An integer ID that should be used as the default for any datasets,
414 quanta, or other entities that use a (autoincrement, origin)
415 compound primary key.
416 namespace : `str`, optional
417 A different database namespace (i.e. schema) the new instance
418 should be associated with. If `None` (default), the namespace
419 (if any) is inferred from the connection.
420 writeable : `bool`, optional
421 If `True`, allow write operations on the database, including
422 ``CREATE TABLE``.
424 Returns
425 -------
426 db : `Database`
427 A new `Database` instance.
429 Notes
430 -----
431 This method allows different `Database` instances to share the same
432 engine, which is desirable when they represent different namespaces
433 can be queried together.
434 """
435 raise NotImplementedError()
437 @contextmanager
438 def session(self) -> Iterator:
439 """Return a context manager that represents a session (persistent
440 connection to a database).
441 """
442 if self._session_connection is not None:
443 # session already started, just reuse that
444 yield Session(self)
445 else:
446 try:
447 # open new connection and close it when done
448 self._session_connection = self._engine.connect()
449 yield Session(self)
450 finally:
451 if self._session_connection is not None:
452 self._session_connection.close()
453 self._session_connection = None
454 # Temporary tables only live within session
455 self._tempTables = set()
457 @contextmanager
458 def transaction(self, *, interrupting: bool = False, savepoint: bool = False,
459 lock: Iterable[sqlalchemy.schema.Table] = ()) -> Iterator:
460 """Return a context manager that represents a transaction.
462 Parameters
463 ----------
464 interrupting : `bool`, optional
465 If `True` (`False` is default), this transaction block may not be
466 nested without an outer one, and attempting to do so is a logic
467 (i.e. assertion) error.
468 savepoint : `bool`, optional
469 If `True` (`False` is default), create a `SAVEPOINT`, allowing
470 exceptions raised by the database (e.g. due to constraint
471 violations) during this transaction's context to be caught outside
472 it without also rolling back all operations in an outer transaction
473 block. If `False`, transactions may still be nested, but a
474 rollback may be generated at any level and affects all levels, and
475 commits are deferred until the outermost block completes. If any
476 outer transaction block was created with ``savepoint=True``, all
477 inner blocks will be as well (regardless of the actual value
478 passed). This has no effect if this is the outermost transaction.
479 lock : `Iterable` [ `sqlalchemy.schema.Table` ], optional
480 A list of tables to lock for the duration of this transaction.
481 These locks are guaranteed to prevent concurrent writes and allow
482 this transaction (only) to acquire the same locks (others should
483 block), but only prevent concurrent reads if the database engine
484 requires that in order to block concurrent writes.
486 Notes
487 -----
488 All transactions on a connection managed by one or more `Database`
489 instances _must_ go through this method, or transaction state will not
490 be correctly managed.
491 """
492 # need a connection, use session to manage it
493 with self.session():
494 assert self._session_connection is not None
495 connection = self._session_connection
496 assert not (interrupting and connection.in_transaction()), (
497 "Logic error in transaction nesting: an operation that would "
498 "interrupt the active transaction context has been requested."
499 )
500 # We remember whether we are already in a SAVEPOINT transaction via
501 # the connection object's 'info' dict, which is explicitly for user
502 # information like this. This is safer than a regular `Database`
503 # instance attribute, because it guards against multiple `Database`
504 # instances sharing the same connection. The need to use our own
505 # flag here to track whether we're in a nested transaction should
506 # go away in SQLAlchemy 1.4, which seems to have a
507 # `Connection.in_nested_transaction()` method.
508 savepoint = savepoint or connection.info.get(_IN_SAVEPOINT_TRANSACTION, False)
509 connection.info[_IN_SAVEPOINT_TRANSACTION] = savepoint
510 if connection.in_transaction() and savepoint:
511 trans = connection.begin_nested()
512 elif not connection.in_transaction():
513 # Use a regular (non-savepoint) transaction always for the
514 # outermost context.
515 trans = connection.begin()
516 else:
517 # Nested non-savepoint transactions, don't do anything.
518 trans = None
519 self._lockTables(connection, lock)
520 try:
521 yield
522 if trans is not None:
523 trans.commit()
524 except BaseException:
525 if trans is not None:
526 trans.rollback()
527 raise
528 finally:
529 if not connection.in_transaction():
530 connection.info.pop(_IN_SAVEPOINT_TRANSACTION, None)
532 @contextmanager
533 def _connection(self) -> Iterator[sqlalchemy.engine.Connection]:
534 """Return context manager for Connection.
535 """
536 if self._session_connection is not None:
537 # It means that we are in Session context, but we may not be in
538 # transaction context. Start a short transaction in that case.
539 if self._session_connection.in_transaction():
540 yield self._session_connection
541 else:
542 with self._session_connection.begin():
543 yield self._session_connection
544 else:
545 # Make new connection and transaction, transaction will be
546 # committed on context exit.
547 with self._engine.begin() as connection:
548 yield connection
550 @abstractmethod
551 def _lockTables(self, connection: sqlalchemy.engine.Connection,
552 tables: Iterable[sqlalchemy.schema.Table] = ()) -> None:
553 """Acquire locks on the given tables.
555 This is an implementation hook for subclasses, called by `transaction`.
556 It should not be called directly by other code.
558 Parameters
559 ----------
560 connection : `sqlalchemy.engine.Connection`
561 Database connection object. It is guaranteed that transaction is
562 already in a progress for this connection.
563 tables : `Iterable` [ `sqlalchemy.schema.Table` ], optional
564 A list of tables to lock for the duration of this transaction.
565 These locks are guaranteed to prevent concurrent writes and allow
566 this transaction (only) to acquire the same locks (others should
567 block), but only prevent concurrent reads if the database engine
568 requires that in order to block concurrent writes.
569 """
570 raise NotImplementedError()
572 def isTableWriteable(self, table: sqlalchemy.schema.Table) -> bool:
573 """Check whether a table is writeable, either because the database
574 connection is read-write or the table is a temporary table.
576 Parameters
577 ----------
578 table : `sqlalchemy.schema.Table`
579 SQLAlchemy table object to check.
581 Returns
582 -------
583 writeable : `bool`
584 Whether this table is writeable.
585 """
586 return self.isWriteable() or table.key in self._tempTables
588 def assertTableWriteable(self, table: sqlalchemy.schema.Table, msg: str) -> None:
589 """Raise if the given table is not writeable, either because the
590 database connection is read-write or the table is a temporary table.
592 Parameters
593 ----------
594 table : `sqlalchemy.schema.Table`
595 SQLAlchemy table object to check.
596 msg : `str`, optional
597 If provided, raise `ReadOnlyDatabaseError` instead of returning
598 `False`, with this message.
599 """
600 if not self.isTableWriteable(table):
601 raise ReadOnlyDatabaseError(msg)
603 @contextmanager
604 def declareStaticTables(self, *, create: bool) -> Iterator[StaticTablesContext]:
605 """Return a context manager in which the database's static DDL schema
606 can be declared.
608 Parameters
609 ----------
610 create : `bool`
611 If `True`, attempt to create all tables at the end of the context.
612 If `False`, they will be assumed to already exist.
614 Returns
615 -------
616 schema : `StaticTablesContext`
617 A helper object that is used to add new tables.
619 Raises
620 ------
621 ReadOnlyDatabaseError
622 Raised if ``create`` is `True`, `Database.isWriteable` is `False`,
623 and one or more declared tables do not already exist.
625 Examples
626 --------
627 Given a `Database` instance ``db``::
629 with db.declareStaticTables(create=True) as schema:
630 schema.addTable("table1", TableSpec(...))
631 schema.addTable("table2", TableSpec(...))
633 Notes
634 -----
635 A database's static DDL schema must be declared before any dynamic
636 tables are managed via calls to `ensureTableExists` or
637 `getExistingTable`. The order in which static schema tables are added
638 inside the context block is unimportant; they will automatically be
639 sorted and added in an order consistent with their foreign key
640 relationships.
641 """
642 if create and not self.isWriteable():
643 raise ReadOnlyDatabaseError(f"Cannot create tables in read-only database {self}.")
644 self._metadata = sqlalchemy.MetaData(schema=self.namespace)
645 try:
646 context = StaticTablesContext(self)
647 if create and context._tableNames:
648 # Looks like database is already initalized, to avoid danger
649 # of modifying/destroying valid schema we refuse to do
650 # anything in this case
651 raise SchemaAlreadyDefinedError(f"Cannot create tables in non-empty database {self}.")
652 yield context
653 for table, foreignKey in context._foreignKeys:
654 table.append_constraint(foreignKey)
655 if create:
656 if self.namespace is not None:
657 if self.namespace not in context._inspector.get_schema_names():
658 with self._connection() as connection:
659 connection.execute(sqlalchemy.schema.CreateSchema(self.namespace))
660 # In our tables we have columns that make use of sqlalchemy
661 # Sequence objects. There is currently a bug in sqlalchemy that
662 # causes a deprecation warning to be thrown on a property of
663 # the Sequence object when the repr for the sequence is
664 # created. Here a filter is used to catch these deprecation
665 # warnings when tables are created.
666 with warnings.catch_warnings():
667 warnings.simplefilter("ignore", category=sqlalchemy.exc.SADeprecationWarning)
668 self._metadata.create_all(self._engine)
669 # call all initializer methods sequentially
670 for init in context._initializers:
671 init(self)
672 except BaseException:
673 self._metadata = None
674 raise
676 @abstractmethod
677 def isWriteable(self) -> bool:
678 """Return `True` if this database can be modified by this client.
679 """
680 raise NotImplementedError()
682 @abstractmethod
683 def __str__(self) -> str:
684 """Return a human-readable identifier for this `Database`, including
685 any namespace or schema that identifies its names within a `Registry`.
686 """
687 raise NotImplementedError()
689 @property
690 def dialect(self) -> sqlalchemy.engine.Dialect:
691 """The SQLAlchemy dialect for this database engine
692 (`sqlalchemy.engine.Dialect`).
693 """
694 return self._engine.dialect
696 def shrinkDatabaseEntityName(self, original: str) -> str:
697 """Return a version of the given name that fits within this database
698 engine's length limits for table, constraint, indexes, and sequence
699 names.
701 Implementations should not assume that simple truncation is safe,
702 because multiple long names often begin with the same prefix.
704 The default implementation simply returns the given name.
706 Parameters
707 ----------
708 original : `str`
709 The original name.
711 Returns
712 -------
713 shrunk : `str`
714 The new, possibly shortened name.
715 """
716 return original
718 def expandDatabaseEntityName(self, shrunk: str) -> str:
719 """Retrieve the original name for a database entity that was too long
720 to fit within the database engine's limits.
722 Parameters
723 ----------
724 original : `str`
725 The original name.
727 Returns
728 -------
729 shrunk : `str`
730 The new, possibly shortened name.
731 """
732 return shrunk
734 def _mangleTableName(self, name: str) -> str:
735 """Map a logical, user-visible table name to the true table name used
736 in the database.
738 The default implementation returns the given name unchanged.
740 Parameters
741 ----------
742 name : `str`
743 Input table name. Should not include a namespace (i.e. schema)
744 prefix.
746 Returns
747 -------
748 mangled : `str`
749 Mangled version of the table name (still with no namespace prefix).
751 Notes
752 -----
753 Reimplementations of this method must be idempotent - mangling an
754 already-mangled name must have no effect.
755 """
756 return name
758 def _makeColumnConstraints(self, table: str, spec: ddl.FieldSpec) -> List[sqlalchemy.CheckConstraint]:
759 """Create constraints based on this spec.
761 Parameters
762 ----------
763 table : `str`
764 Name of the table this column is being added to.
765 spec : `FieldSpec`
766 Specification for the field to be added.
768 Returns
769 -------
770 constraint : `list` of `sqlalchemy.CheckConstraint`
771 Constraint added for this column.
772 """
773 # By default we return no additional constraints
774 return []
776 def _convertFieldSpec(self, table: str, spec: ddl.FieldSpec, metadata: sqlalchemy.MetaData,
777 **kwargs: Any) -> sqlalchemy.schema.Column:
778 """Convert a `FieldSpec` to a `sqlalchemy.schema.Column`.
780 Parameters
781 ----------
782 table : `str`
783 Name of the table this column is being added to.
784 spec : `FieldSpec`
785 Specification for the field to be added.
786 metadata : `sqlalchemy.MetaData`
787 SQLAlchemy representation of the DDL schema this field's table is
788 being added to.
789 **kwargs
790 Additional keyword arguments to forward to the
791 `sqlalchemy.schema.Column` constructor. This is provided to make
792 it easier for derived classes to delegate to ``super()`` while
793 making only minor changes.
795 Returns
796 -------
797 column : `sqlalchemy.schema.Column`
798 SQLAlchemy representation of the field.
799 """
800 args = [spec.name, spec.getSizedColumnType()]
801 if spec.autoincrement:
802 # Generate a sequence to use for auto incrementing for databases
803 # that do not support it natively. This will be ignored by
804 # sqlalchemy for databases that do support it.
805 args.append(sqlalchemy.Sequence(self.shrinkDatabaseEntityName(f"{table}_seq_{spec.name}"),
806 metadata=metadata))
807 assert spec.doc is None or isinstance(spec.doc, str), f"Bad doc for {table}.{spec.name}."
808 return sqlalchemy.schema.Column(*args, nullable=spec.nullable, primary_key=spec.primaryKey,
809 comment=spec.doc, server_default=spec.default, **kwargs)
811 def _convertForeignKeySpec(self, table: str, spec: ddl.ForeignKeySpec, metadata: sqlalchemy.MetaData,
812 **kwargs: Any) -> sqlalchemy.schema.ForeignKeyConstraint:
813 """Convert a `ForeignKeySpec` to a
814 `sqlalchemy.schema.ForeignKeyConstraint`.
816 Parameters
817 ----------
818 table : `str`
819 Name of the table this foreign key is being added to.
820 spec : `ForeignKeySpec`
821 Specification for the foreign key to be added.
822 metadata : `sqlalchemy.MetaData`
823 SQLAlchemy representation of the DDL schema this constraint is
824 being added to.
825 **kwargs
826 Additional keyword arguments to forward to the
827 `sqlalchemy.schema.ForeignKeyConstraint` constructor. This is
828 provided to make it easier for derived classes to delegate to
829 ``super()`` while making only minor changes.
831 Returns
832 -------
833 constraint : `sqlalchemy.schema.ForeignKeyConstraint`
834 SQLAlchemy representation of the constraint.
835 """
836 name = self.shrinkDatabaseEntityName(
837 "_".join(["fkey", table, self._mangleTableName(spec.table)]
838 + list(spec.target) + list(spec.source))
839 )
840 return sqlalchemy.schema.ForeignKeyConstraint(
841 spec.source,
842 [f"{self._mangleTableName(spec.table)}.{col}" for col in spec.target],
843 name=name,
844 ondelete=spec.onDelete
845 )
847 def _convertExclusionConstraintSpec(self, table: str,
848 spec: Tuple[Union[str, Type[TimespanDatabaseRepresentation]], ...],
849 metadata: sqlalchemy.MetaData) -> sqlalchemy.schema.Constraint:
850 """Convert a `tuple` from `ddl.TableSpec.exclusion` into a SQLAlchemy
851 constraint representation.
853 Parameters
854 ----------
855 table : `str`
856 Name of the table this constraint is being added to.
857 spec : `tuple` [ `str` or `type` ]
858 A tuple of `str` column names and the `type` object returned by
859 `getTimespanRepresentation` (which must appear exactly once),
860 indicating the order of the columns in the index used to back the
861 constraint.
862 metadata : `sqlalchemy.MetaData`
863 SQLAlchemy representation of the DDL schema this constraint is
864 being added to.
866 Returns
867 -------
868 constraint : `sqlalchemy.schema.Constraint`
869 SQLAlchemy representation of the constraint.
871 Raises
872 ------
873 NotImplementedError
874 Raised if this database does not support exclusion constraints.
875 """
876 raise NotImplementedError(f"Database {self} does not support exclusion constraints.")
878 def _convertTableSpec(self, name: str, spec: ddl.TableSpec, metadata: sqlalchemy.MetaData,
879 **kwargs: Any) -> sqlalchemy.schema.Table:
880 """Convert a `TableSpec` to a `sqlalchemy.schema.Table`.
882 Parameters
883 ----------
884 spec : `TableSpec`
885 Specification for the foreign key to be added.
886 metadata : `sqlalchemy.MetaData`
887 SQLAlchemy representation of the DDL schema this table is being
888 added to.
889 **kwargs
890 Additional keyword arguments to forward to the
891 `sqlalchemy.schema.Table` constructor. This is provided to make it
892 easier for derived classes to delegate to ``super()`` while making
893 only minor changes.
895 Returns
896 -------
897 table : `sqlalchemy.schema.Table`
898 SQLAlchemy representation of the table.
900 Notes
901 -----
902 This method does not handle ``spec.foreignKeys`` at all, in order to
903 avoid circular dependencies. These are added by higher-level logic in
904 `ensureTableExists`, `getExistingTable`, and `declareStaticTables`.
905 """
906 name = self._mangleTableName(name)
907 args = [self._convertFieldSpec(name, fieldSpec, metadata) for fieldSpec in spec.fields]
909 # Add any column constraints
910 for fieldSpec in spec.fields:
911 args.extend(self._makeColumnConstraints(name, fieldSpec))
913 # Track indexes added for primary key and unique constraints, to make
914 # sure we don't add duplicate explicit or foreign key indexes for
915 # those.
916 allIndexes = {tuple(fieldSpec.name for fieldSpec in spec.fields if fieldSpec.primaryKey)}
917 args.extend(
918 sqlalchemy.schema.UniqueConstraint(
919 *columns,
920 name=self.shrinkDatabaseEntityName("_".join([name, "unq"] + list(columns)))
921 )
922 for columns in spec.unique
923 )
924 allIndexes.update(spec.unique)
925 args.extend(
926 sqlalchemy.schema.Index(
927 self.shrinkDatabaseEntityName("_".join([name, "idx"] + list(columns))),
928 *columns,
929 unique=(columns in spec.unique)
930 )
931 for columns in spec.indexes if columns not in allIndexes
932 )
933 allIndexes.update(spec.indexes)
934 args.extend(
935 sqlalchemy.schema.Index(
936 self.shrinkDatabaseEntityName("_".join((name, "fkidx") + fk.source)),
937 *fk.source,
938 )
939 for fk in spec.foreignKeys if fk.addIndex and fk.source not in allIndexes
940 )
942 args.extend(self._convertExclusionConstraintSpec(name, excl, metadata) for excl in spec.exclusion)
944 assert spec.doc is None or isinstance(spec.doc, str), f"Bad doc for {name}."
945 return sqlalchemy.schema.Table(name, metadata, *args, comment=spec.doc, info=spec, **kwargs)
947 def ensureTableExists(self, name: str, spec: ddl.TableSpec) -> sqlalchemy.schema.Table:
948 """Ensure that a table with the given name and specification exists,
949 creating it if necessary.
951 Parameters
952 ----------
953 name : `str`
954 Name of the table (not including namespace qualifiers).
955 spec : `TableSpec`
956 Specification for the table. This will be used when creating the
957 table, and *may* be used when obtaining an existing table to check
958 for consistency, but no such check is guaranteed.
960 Returns
961 -------
962 table : `sqlalchemy.schema.Table`
963 SQLAlchemy representation of the table.
965 Raises
966 ------
967 ReadOnlyDatabaseError
968 Raised if `isWriteable` returns `False`, and the table does not
969 already exist.
970 DatabaseConflictError
971 Raised if the table exists but ``spec`` is inconsistent with its
972 definition.
974 Notes
975 -----
976 This method may not be called within transactions. It may be called on
977 read-only databases if and only if the table does in fact already
978 exist.
980 Subclasses may override this method, but usually should not need to.
981 """
982 # TODO: if _engine is used to make a table then it uses separate
983 # connection and should not interfere with current transaction
984 assert self._session_connection is None or not self._session_connection.in_transaction(), \
985 "Table creation interrupts transactions."
986 assert self._metadata is not None, "Static tables must be declared before dynamic tables."
987 table = self.getExistingTable(name, spec)
988 if table is not None:
989 return table
990 if not self.isWriteable():
991 raise ReadOnlyDatabaseError(
992 f"Table {name} does not exist, and cannot be created "
993 f"because database {self} is read-only."
994 )
995 table = self._convertTableSpec(name, spec, self._metadata)
996 for foreignKeySpec in spec.foreignKeys:
997 table.append_constraint(self._convertForeignKeySpec(name, foreignKeySpec, self._metadata))
998 with self._connection() as connection:
999 table.create(connection)
1000 return table
1002 def getExistingTable(self, name: str, spec: ddl.TableSpec) -> Optional[sqlalchemy.schema.Table]:
1003 """Obtain an existing table with the given name and specification.
1005 Parameters
1006 ----------
1007 name : `str`
1008 Name of the table (not including namespace qualifiers).
1009 spec : `TableSpec`
1010 Specification for the table. This will be used when creating the
1011 SQLAlchemy representation of the table, and it is used to
1012 check that the actual table in the database is consistent.
1014 Returns
1015 -------
1016 table : `sqlalchemy.schema.Table` or `None`
1017 SQLAlchemy representation of the table, or `None` if it does not
1018 exist.
1020 Raises
1021 ------
1022 DatabaseConflictError
1023 Raised if the table exists but ``spec`` is inconsistent with its
1024 definition.
1026 Notes
1027 -----
1028 This method can be called within transactions and never modifies the
1029 database.
1031 Subclasses may override this method, but usually should not need to.
1032 """
1033 assert self._metadata is not None, "Static tables must be declared before dynamic tables."
1034 name = self._mangleTableName(name)
1035 table = self._metadata.tables.get(name if self.namespace is None else f"{self.namespace}.{name}")
1036 if table is not None:
1037 if spec.fields.names != set(table.columns.keys()):
1038 raise DatabaseConflictError(f"Table '{name}' has already been defined differently; the new "
1039 f"specification has columns {list(spec.fields.names)}, while "
1040 f"the previous definition has {list(table.columns.keys())}.")
1041 else:
1042 inspector = sqlalchemy.inspect(self._engine)
1043 if name in inspector.get_table_names(schema=self.namespace):
1044 _checkExistingTableDefinition(name, spec, inspector.get_columns(name, schema=self.namespace))
1045 table = self._convertTableSpec(name, spec, self._metadata)
1046 for foreignKeySpec in spec.foreignKeys:
1047 table.append_constraint(self._convertForeignKeySpec(name, foreignKeySpec, self._metadata))
1048 return table
1049 return table
1051 @classmethod
1052 def getTimespanRepresentation(cls) -> Type[TimespanDatabaseRepresentation]:
1053 """Return a `type` that encapsulates the way `Timespan` objects are
1054 stored in this database.
1056 `Database` does not automatically use the return type of this method
1057 anywhere else; calling code is responsible for making sure that DDL
1058 and queries are consistent with it.
1060 Returns
1061 -------
1062 TimespanReprClass : `type` (`TimespanDatabaseRepresention` subclass)
1063 A type that encapsulates the way `Timespan` objects should be
1064 stored in this database.
1066 Notes
1067 -----
1068 There are two big reasons we've decided to keep timespan-mangling logic
1069 outside the `Database` implementations, even though the choice of
1070 representation is ultimately up to a `Database` implementation:
1072 - Timespans appear in relatively few tables and queries in our
1073 typical usage, and the code that operates on them is already aware
1074 that it is working with timespans. In contrast, a
1075 timespan-representation-aware implementation of, say, `insert`,
1076 would need to have extra logic to identify when timespan-mangling
1077 needed to occur, which would usually be useless overhead.
1079 - SQLAlchemy's rich SELECT query expression system has no way to wrap
1080 multiple columns in a single expression object (the ORM does, but
1081 we are not using the ORM). So we would have to wrap _much_ more of
1082 that code in our own interfaces to encapsulate timespan
1083 representations there.
1084 """
1085 return TimespanDatabaseRepresentation.Compound
1087 @classmethod
1088 def getSpatialRegionRepresentation(cls) -> Type[SpatialRegionDatabaseRepresentation]:
1089 """Return a `type` that encapsulates the way `lsst.sphgeom.Region`
1090 objects are stored in this database.
1092 `Database` does not automatically use the return type of this method
1093 anywhere else; calling code is responsible for making sure that DDL
1094 and queries are consistent with it.
1096 Returns
1097 -------
1098 RegionReprClass : `type` (`SpatialRegionDatabaseRepresention` subclass)
1099 A type that encapsulates the way `lsst.sphgeom.Region` objects
1100 should be stored in this database.
1102 Notes
1103 -----
1104 See `getTimespanRepresentation` for comments on why this method is not
1105 more tightly integrated with the rest of the `Database` interface.
1106 """
1107 return SpatialRegionDatabaseRepresentation
1109 def sync(self, table: sqlalchemy.schema.Table, *,
1110 keys: Dict[str, Any],
1111 compared: Optional[Dict[str, Any]] = None,
1112 extra: Optional[Dict[str, Any]] = None,
1113 returning: Optional[Sequence[str]] = None,
1114 update: bool = False,
1115 ) -> Tuple[Optional[Dict[str, Any]], Union[bool, Dict[str, Any]]]:
1116 """Insert into a table as necessary to ensure database contains
1117 values equivalent to the given ones.
1119 Parameters
1120 ----------
1121 table : `sqlalchemy.schema.Table`
1122 Table to be queried and possibly inserted into.
1123 keys : `dict`
1124 Column name-value pairs used to search for an existing row; must
1125 be a combination that can be used to select a single row if one
1126 exists. If such a row does not exist, these values are used in
1127 the insert.
1128 compared : `dict`, optional
1129 Column name-value pairs that are compared to those in any existing
1130 row. If such a row does not exist, these rows are used in the
1131 insert.
1132 extra : `dict`, optional
1133 Column name-value pairs that are ignored if a matching row exists,
1134 but used in an insert if one is necessary.
1135 returning : `~collections.abc.Sequence` of `str`, optional
1136 The names of columns whose values should be returned.
1137 update : `bool`, optional
1138 If `True` (`False` is default), update the existing row with the
1139 values in ``compared`` instead of raising `DatabaseConflictError`.
1141 Returns
1142 -------
1143 row : `dict`, optional
1144 The value of the fields indicated by ``returning``, or `None` if
1145 ``returning`` is `None`.
1146 inserted_or_updated : `bool` or `dict`
1147 If `True`, a new row was inserted; if `False`, a matching row
1148 already existed. If a `dict` (only possible if ``update=True``),
1149 then an existing row was updated, and the dict maps the names of
1150 the updated columns to their *old* values (new values can be
1151 obtained from ``compared``).
1153 Raises
1154 ------
1155 DatabaseConflictError
1156 Raised if the values in ``compared`` do not match the values in the
1157 database.
1158 ReadOnlyDatabaseError
1159 Raised if `isWriteable` returns `False`, and no matching record
1160 already exists.
1162 Notes
1163 -----
1164 May be used inside transaction contexts, so implementations may not
1165 perform operations that interrupt transactions.
1167 It may be called on read-only databases if and only if the matching row
1168 does in fact already exist.
1169 """
1171 def check() -> Tuple[int, Optional[Dict[str, Any]], Optional[List]]:
1172 """Query for a row that matches the ``key`` argument, and compare
1173 to what was given by the caller.
1175 Returns
1176 -------
1177 n : `int`
1178 Number of matching rows. ``n != 1`` is always an error, but
1179 it's a different kind of error depending on where `check` is
1180 being called.
1181 bad : `dict` or `None`
1182 The subset of the keys of ``compared`` for which the existing
1183 values did not match the given one, mapped to the existing
1184 values in the database. Once again, ``not bad`` is always an
1185 error, but a different kind on context. `None` if ``n != 1``
1186 result : `list` or `None`
1187 Results in the database that correspond to the columns given
1188 in ``returning``, or `None` if ``returning is None``.
1189 """
1190 toSelect: Set[str] = set()
1191 if compared is not None:
1192 toSelect.update(compared.keys())
1193 if returning is not None:
1194 toSelect.update(returning)
1195 if not toSelect:
1196 # Need to select some column, even if we just want to see
1197 # how many rows we get back.
1198 toSelect.add(next(iter(keys.keys())))
1199 selectSql = sqlalchemy.sql.select(
1200 *[table.columns[k].label(k) for k in toSelect]
1201 ).select_from(table).where(
1202 sqlalchemy.sql.and_(*[table.columns[k] == v for k, v in keys.items()])
1203 )
1204 with self._connection() as connection:
1205 fetched = list(connection.execute(selectSql).mappings())
1206 if len(fetched) != 1:
1207 return len(fetched), None, None
1208 existing = fetched[0]
1209 if compared is not None:
1211 def safeNotEqual(a: Any, b: Any) -> bool:
1212 if isinstance(a, astropy.time.Time):
1213 return not time_utils.TimeConverter().times_equal(a, b)
1214 return a != b
1216 inconsistencies = {
1217 k: existing[k]
1218 for k, v in compared.items()
1219 if safeNotEqual(existing[k], v)
1220 }
1221 else:
1222 inconsistencies = {}
1223 if returning is not None:
1224 toReturn: Optional[list] = [existing[k] for k in returning]
1225 else:
1226 toReturn = None
1227 return 1, inconsistencies, toReturn
1229 def format_bad(inconsistencies: Dict[str, Any]) -> str:
1230 """Format the 'bad' dictionary of existing values returned by
1231 ``check`` into a string suitable for an error message.
1232 """
1233 assert compared is not None, "Should not be able to get inconsistencies without comparing."
1234 return ", ".join(f"{k}: {v!r} != {compared[k]!r}" for k, v in inconsistencies.items())
1236 if self.isTableWriteable(table):
1237 # Try an insert first, but allow it to fail (in only specific
1238 # ways).
1239 row = keys.copy()
1240 if compared is not None:
1241 row.update(compared)
1242 if extra is not None:
1243 row.update(extra)
1244 with self.transaction():
1245 inserted = bool(self.ensure(table, row))
1246 inserted_or_updated: Union[bool, Dict[str, Any]]
1247 # Need to perform check() for this branch inside the
1248 # transaction, so we roll back an insert that didn't do
1249 # what we expected. That limits the extent to which we
1250 # can reduce duplication between this block and the other
1251 # ones that perform similar logic.
1252 n, bad, result = check()
1253 if n < 1:
1254 raise ConflictingDefinitionError(
1255 f"Attempted to ensure {row} exists by inserting it with ON CONFLICT IGNORE, "
1256 f"but a post-insert query on {keys} returned no results. "
1257 f"Insert was {'' if inserted else 'not '}reported as successful. "
1258 "This can occur if the insert violated a database constraint other than the "
1259 "unique constraint or primary key used to identify the row in this call."
1260 )
1261 elif n > 1:
1262 raise RuntimeError(f"Keys passed to sync {keys.keys()} do not comprise a "
1263 f"unique constraint for table {table.name}.")
1264 elif bad:
1265 assert compared is not None, \
1266 "Should not be able to get inconsistencies without comparing."
1267 if inserted:
1268 raise RuntimeError(
1269 f"Conflict ({bad}) in sync after successful insert; this is "
1270 "possible if the same table is being updated by a concurrent "
1271 "process that isn't using sync, but it may also be a bug in "
1272 "daf_butler."
1273 )
1274 elif update:
1275 with self._connection() as connection:
1276 connection.execute(
1277 table.update().where(
1278 sqlalchemy.sql.and_(*[table.columns[k] == v for k, v in keys.items()])
1279 ).values(
1280 **{k: compared[k] for k in bad.keys()}
1281 )
1282 )
1283 inserted_or_updated = bad
1284 else:
1285 raise DatabaseConflictError(
1286 f"Conflict in sync for table {table.name} on column(s) {format_bad(bad)}."
1287 )
1288 else:
1289 inserted_or_updated = inserted
1290 else:
1291 # Database is not writeable; just see if the row exists.
1292 n, bad, result = check()
1293 if n < 1:
1294 raise ReadOnlyDatabaseError("sync needs to insert, but database is read-only.")
1295 elif n > 1:
1296 raise RuntimeError("Keys passed to sync do not comprise a unique constraint.")
1297 elif bad:
1298 if update:
1299 raise ReadOnlyDatabaseError("sync needs to update, but database is read-only.")
1300 else:
1301 raise DatabaseConflictError(
1302 f"Conflict in sync for table {table.name} on column(s) {format_bad(bad)}."
1303 )
1304 inserted_or_updated = False
1305 if returning is None:
1306 return None, inserted_or_updated
1307 else:
1308 assert result is not None
1309 return {k: v for k, v in zip(returning, result)}, inserted_or_updated
1311 def insert(self, table: sqlalchemy.schema.Table, *rows: dict, returnIds: bool = False,
1312 select: Optional[sqlalchemy.sql.Select] = None,
1313 names: Optional[Iterable[str]] = None,
1314 ) -> Optional[List[int]]:
1315 """Insert one or more rows into a table, optionally returning
1316 autoincrement primary key values.
1318 Parameters
1319 ----------
1320 table : `sqlalchemy.schema.Table`
1321 Table rows should be inserted into.
1322 returnIds: `bool`
1323 If `True` (`False` is default), return the values of the table's
1324 autoincrement primary key field (which much exist).
1325 select : `sqlalchemy.sql.Select`, optional
1326 A SELECT query expression to insert rows from. Cannot be provided
1327 with either ``rows`` or ``returnIds=True``.
1328 names : `Iterable` [ `str` ], optional
1329 Names of columns in ``table`` to be populated, ordered to match the
1330 columns returned by ``select``. Ignored if ``select`` is `None`.
1331 If not provided, the columns returned by ``select`` must be named
1332 to match the desired columns of ``table``.
1333 *rows
1334 Positional arguments are the rows to be inserted, as dictionaries
1335 mapping column name to value. The keys in all dictionaries must
1336 be the same.
1338 Returns
1339 -------
1340 ids : `None`, or `list` of `int`
1341 If ``returnIds`` is `True`, a `list` containing the inserted
1342 values for the table's autoincrement primary key.
1344 Raises
1345 ------
1346 ReadOnlyDatabaseError
1347 Raised if `isWriteable` returns `False` when this method is called.
1349 Notes
1350 -----
1351 The default implementation uses bulk insert syntax when ``returnIds``
1352 is `False`, and a loop over single-row insert operations when it is
1353 `True`.
1355 Derived classes should reimplement when they can provide a more
1356 efficient implementation (especially for the latter case).
1358 May be used inside transaction contexts, so implementations may not
1359 perform operations that interrupt transactions.
1360 """
1361 self.assertTableWriteable(table, f"Cannot insert into read-only table {table}.")
1362 if select is not None and (rows or returnIds):
1363 raise TypeError("'select' is incompatible with passing value rows or returnIds=True.")
1364 if not rows and select is None:
1365 if returnIds:
1366 return []
1367 else:
1368 return None
1369 with self._connection() as connection:
1370 if not returnIds:
1371 if select is not None:
1372 if names is None:
1373 # columns() is deprecated since 1.4, but
1374 # selected_columns() method did not exist in 1.3.
1375 if hasattr(select, "selected_columns"):
1376 names = select.selected_columns.keys()
1377 else:
1378 names = select.columns.keys()
1379 connection.execute(table.insert().from_select(names, select))
1380 else:
1381 connection.execute(table.insert(), rows)
1382 return None
1383 else:
1384 sql = table.insert()
1385 return [connection.execute(sql, row).inserted_primary_key[0] for row in rows]
1387 @abstractmethod
1388 def replace(self, table: sqlalchemy.schema.Table, *rows: dict) -> None:
1389 """Insert one or more rows into a table, replacing any existing rows
1390 for which insertion of a new row would violate the primary key
1391 constraint.
1393 Parameters
1394 ----------
1395 table : `sqlalchemy.schema.Table`
1396 Table rows should be inserted into.
1397 *rows
1398 Positional arguments are the rows to be inserted, as dictionaries
1399 mapping column name to value. The keys in all dictionaries must
1400 be the same.
1402 Raises
1403 ------
1404 ReadOnlyDatabaseError
1405 Raised if `isWriteable` returns `False` when this method is called.
1407 Notes
1408 -----
1409 May be used inside transaction contexts, so implementations may not
1410 perform operations that interrupt transactions.
1412 Implementations should raise a `sqlalchemy.exc.IntegrityError`
1413 exception when a constraint other than the primary key would be
1414 violated.
1416 Implementations are not required to support `replace` on tables
1417 with autoincrement keys.
1418 """
1419 raise NotImplementedError()
1421 @abstractmethod
1422 def ensure(self, table: sqlalchemy.schema.Table, *rows: dict) -> int:
1423 """Insert one or more rows into a table, skipping any rows for which
1424 insertion would violate any constraint.
1426 Parameters
1427 ----------
1428 table : `sqlalchemy.schema.Table`
1429 Table rows should be inserted into.
1430 *rows
1431 Positional arguments are the rows to be inserted, as dictionaries
1432 mapping column name to value. The keys in all dictionaries must
1433 be the same.
1435 Returns
1436 -------
1437 count : `int`
1438 The number of rows actually inserted.
1440 Raises
1441 ------
1442 ReadOnlyDatabaseError
1443 Raised if `isWriteable` returns `False` when this method is called.
1444 This is raised even if the operation would do nothing even on a
1445 writeable database.
1447 Notes
1448 -----
1449 May be used inside transaction contexts, so implementations may not
1450 perform operations that interrupt transactions.
1452 Implementations are not required to support `ensure` on tables
1453 with autoincrement keys.
1454 """
1455 raise NotImplementedError()
1457 def delete(self, table: sqlalchemy.schema.Table, columns: Iterable[str], *rows: dict) -> int:
1458 """Delete one or more rows from a table.
1460 Parameters
1461 ----------
1462 table : `sqlalchemy.schema.Table`
1463 Table that rows should be deleted from.
1464 columns: `~collections.abc.Iterable` of `str`
1465 The names of columns that will be used to constrain the rows to
1466 be deleted; these will be combined via ``AND`` to form the
1467 ``WHERE`` clause of the delete query.
1468 *rows
1469 Positional arguments are the keys of rows to be deleted, as
1470 dictionaries mapping column name to value. The keys in all
1471 dictionaries must be exactly the names in ``columns``.
1473 Returns
1474 -------
1475 count : `int`
1476 Number of rows deleted.
1478 Raises
1479 ------
1480 ReadOnlyDatabaseError
1481 Raised if `isWriteable` returns `False` when this method is called.
1483 Notes
1484 -----
1485 May be used inside transaction contexts, so implementations may not
1486 perform operations that interrupt transactions.
1488 The default implementation should be sufficient for most derived
1489 classes.
1490 """
1491 self.assertTableWriteable(table, f"Cannot delete from read-only table {table}.")
1492 if columns and not rows:
1493 # If there are no columns, this operation is supposed to delete
1494 # everything (so we proceed as usual). But if there are columns,
1495 # but no rows, it was a constrained bulk operation where the
1496 # constraint is that no rows match, and we should short-circuit
1497 # while reporting that no rows were affected.
1498 return 0
1499 sql = table.delete()
1500 columns = list(columns) # Force iterators to list
1502 # More efficient to use IN operator if there is only one
1503 # variable changing across all rows.
1504 content: Dict[str, Set] = defaultdict(set)
1505 if len(columns) == 1:
1506 # Nothing to calculate since we can always use IN
1507 column = columns[0]
1508 changing_columns = [column]
1509 content[column] = set(row[column] for row in rows)
1510 else:
1511 for row in rows:
1512 for k, v in row.items():
1513 content[k].add(v)
1514 changing_columns = [col for col, values in content.items() if len(values) > 1]
1516 if len(changing_columns) != 1:
1517 # More than one column changes each time so do explicit bind
1518 # parameters and have each row processed separately.
1519 whereTerms = [table.columns[name] == sqlalchemy.sql.bindparam(name) for name in columns]
1520 if whereTerms:
1521 sql = sql.where(sqlalchemy.sql.and_(*whereTerms))
1522 with self._connection() as connection:
1523 return connection.execute(sql, rows).rowcount
1524 else:
1525 # One of the columns has changing values but any others are
1526 # fixed. In this case we can use an IN operator and be more
1527 # efficient.
1528 name = changing_columns.pop()
1530 # Simple where clause for the unchanging columns
1531 clauses = []
1532 for k, v in content.items():
1533 if k == name:
1534 continue
1535 column = table.columns[k]
1536 # The set only has one element
1537 clauses.append(column == v.pop())
1539 # The IN operator will not work for "infinite" numbers of
1540 # rows so must batch it up into distinct calls.
1541 in_content = list(content[name])
1542 n_elements = len(in_content)
1544 rowcount = 0
1545 iposn = 0
1546 n_per_loop = 1_000 # Controls how many items to put in IN clause
1547 with self._connection() as connection:
1548 for iposn in range(0, n_elements, n_per_loop):
1549 endpos = iposn + n_per_loop
1550 in_clause = table.columns[name].in_(in_content[iposn:endpos])
1552 newsql = sql.where(sqlalchemy.sql.and_(*clauses, in_clause))
1553 rowcount += connection.execute(newsql).rowcount
1554 return rowcount
1556 def deleteWhere(self, table: sqlalchemy.schema.Table, where: sqlalchemy.sql.ClauseElement) -> int:
1557 """Delete rows from a table with pre-constructed WHERE clause.
1559 Parameters
1560 ----------
1561 table : `sqlalchemy.schema.Table`
1562 Table that rows should be deleted from.
1563 where: `sqlalchemy.sql.ClauseElement`
1564 The names of columns that will be used to constrain the rows to
1565 be deleted; these will be combined via ``AND`` to form the
1566 ``WHERE`` clause of the delete query.
1568 Returns
1569 -------
1570 count : `int`
1571 Number of rows deleted.
1573 Raises
1574 ------
1575 ReadOnlyDatabaseError
1576 Raised if `isWriteable` returns `False` when this method is called.
1578 Notes
1579 -----
1580 May be used inside transaction contexts, so implementations may not
1581 perform operations that interrupt transactions.
1583 The default implementation should be sufficient for most derived
1584 classes.
1585 """
1586 self.assertTableWriteable(table, f"Cannot delete from read-only table {table}.")
1588 sql = table.delete().where(where)
1589 with self._connection() as connection:
1590 return connection.execute(sql).rowcount
1592 def update(self, table: sqlalchemy.schema.Table, where: Dict[str, str], *rows: dict) -> int:
1593 """Update one or more rows in a table.
1595 Parameters
1596 ----------
1597 table : `sqlalchemy.schema.Table`
1598 Table containing the rows to be updated.
1599 where : `dict` [`str`, `str`]
1600 A mapping from the names of columns that will be used to search for
1601 existing rows to the keys that will hold these values in the
1602 ``rows`` dictionaries. Note that these may not be the same due to
1603 SQLAlchemy limitations.
1604 *rows
1605 Positional arguments are the rows to be updated. The keys in all
1606 dictionaries must be the same, and may correspond to either a
1607 value in the ``where`` dictionary or the name of a column to be
1608 updated.
1610 Returns
1611 -------
1612 count : `int`
1613 Number of rows matched (regardless of whether the update actually
1614 modified them).
1616 Raises
1617 ------
1618 ReadOnlyDatabaseError
1619 Raised if `isWriteable` returns `False` when this method is called.
1621 Notes
1622 -----
1623 May be used inside transaction contexts, so implementations may not
1624 perform operations that interrupt transactions.
1626 The default implementation should be sufficient for most derived
1627 classes.
1628 """
1629 self.assertTableWriteable(table, f"Cannot update read-only table {table}.")
1630 if not rows:
1631 return 0
1632 sql = table.update().where(
1633 sqlalchemy.sql.and_(*[table.columns[k] == sqlalchemy.sql.bindparam(v) for k, v in where.items()])
1634 )
1635 with self._connection() as connection:
1636 return connection.execute(sql, rows).rowcount
1638 def query(self, sql: sqlalchemy.sql.FromClause,
1639 *args: Any, **kwargs: Any) -> sqlalchemy.engine.ResultProxy:
1640 """Run a SELECT query against the database.
1642 Parameters
1643 ----------
1644 sql : `sqlalchemy.sql.FromClause`
1645 A SQLAlchemy representation of a ``SELECT`` query.
1646 *args
1647 Additional positional arguments are forwarded to
1648 `sqlalchemy.engine.Connection.execute`.
1649 **kwargs
1650 Additional keyword arguments are forwarded to
1651 `sqlalchemy.engine.Connection.execute`.
1653 Returns
1654 -------
1655 result : `sqlalchemy.engine.ResultProxy`
1656 Query results.
1658 Notes
1659 -----
1660 The default implementation should be sufficient for most derived
1661 classes.
1662 """
1663 # We are returning a Result object so we need to take care of
1664 # connection lifetime. If this is happening in transaction context
1665 # then just use existing connection, otherwise make a special
1666 # connection which will be closed when result is closed.
1667 #
1668 # TODO: May be better approach would be to make this method return a
1669 # context manager, but this means big changes for callers of this
1670 # method.
1671 if self._session_connection is not None:
1672 connection = self._session_connection
1673 else:
1674 connection = self._engine.connect(close_with_result=True)
1675 # TODO: should we guard against non-SELECT queries here?
1676 return connection.execute(sql, *args, **kwargs)
1678 origin: int
1679 """An integer ID that should be used as the default for any datasets,
1680 quanta, or other entities that use a (autoincrement, origin) compound
1681 primary key (`int`).
1682 """
1684 namespace: Optional[str]
1685 """The schema or namespace this database instance is associated with
1686 (`str` or `None`).
1687 """