Coverage for python/lsst/daf/butler/registry/interfaces/_database.py : 15%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = [
24 "Database",
25 "ReadOnlyDatabaseError",
26 "DatabaseConflictError",
27 "SchemaAlreadyDefinedError",
28 "StaticTablesContext",
29]
31from abc import ABC, abstractmethod
32from collections import defaultdict
33from contextlib import contextmanager
34from typing import (
35 Any,
36 Callable,
37 Dict,
38 Iterable,
39 Iterator,
40 List,
41 Optional,
42 Sequence,
43 Set,
44 Tuple,
45 Type,
46 Union,
47)
48import uuid
49import warnings
51import astropy.time
52import sqlalchemy
54from ...core import SpatialRegionDatabaseRepresentation, TimespanDatabaseRepresentation, ddl, time_utils
55from .._exceptions import ConflictingDefinitionError
57_IN_SAVEPOINT_TRANSACTION = "IN_SAVEPOINT_TRANSACTION"
60def _checkExistingTableDefinition(name: str, spec: ddl.TableSpec, inspection: List[Dict[str, Any]]) -> None:
61 """Test that the definition of a table in a `ddl.TableSpec` and from
62 database introspection are consistent.
64 Parameters
65 ----------
66 name : `str`
67 Name of the table (only used in error messages).
68 spec : `ddl.TableSpec`
69 Specification of the table.
70 inspection : `dict`
71 Dictionary returned by
72 `sqlalchemy.engine.reflection.Inspector.get_columns`.
74 Raises
75 ------
76 DatabaseConflictError
77 Raised if the definitions are inconsistent.
78 """
79 columnNames = [c["name"] for c in inspection]
80 if spec.fields.names != set(columnNames):
81 raise DatabaseConflictError(f"Table '{name}' exists but is defined differently in the database; "
82 f"specification has columns {list(spec.fields.names)}, while the "
83 f"table in the database has {columnNames}.")
86class ReadOnlyDatabaseError(RuntimeError):
87 """Exception raised when a write operation is called on a read-only
88 `Database`.
89 """
92class DatabaseConflictError(ConflictingDefinitionError):
93 """Exception raised when database content (row values or schema entities)
94 are inconsistent with what this client expects.
95 """
98class SchemaAlreadyDefinedError(RuntimeError):
99 """Exception raised when trying to initialize database schema when some
100 tables already exist.
101 """
104class StaticTablesContext:
105 """Helper class used to declare the static schema for a registry layer
106 in a database.
108 An instance of this class is returned by `Database.declareStaticTables`,
109 which should be the only way it should be constructed.
110 """
112 def __init__(self, db: Database):
113 self._db = db
114 self._foreignKeys: List[Tuple[sqlalchemy.schema.Table, sqlalchemy.schema.ForeignKeyConstraint]] = []
115 self._inspector = sqlalchemy.inspect(self._db._connection)
116 self._tableNames = frozenset(self._inspector.get_table_names(schema=self._db.namespace))
117 self._initializers: List[Callable[[Database], None]] = []
119 def addTable(self, name: str, spec: ddl.TableSpec) -> sqlalchemy.schema.Table:
120 """Add a new table to the schema, returning its sqlalchemy
121 representation.
123 The new table may not actually be created until the end of the
124 context created by `Database.declareStaticTables`, allowing tables
125 to be declared in any order even in the presence of foreign key
126 relationships.
127 """
128 name = self._db._mangleTableName(name)
129 if name in self._tableNames:
130 _checkExistingTableDefinition(name, spec, self._inspector.get_columns(name,
131 schema=self._db.namespace))
132 table = self._db._convertTableSpec(name, spec, self._db._metadata)
133 for foreignKeySpec in spec.foreignKeys:
134 self._foreignKeys.append(
135 (table, self._db._convertForeignKeySpec(name, foreignKeySpec, self._db._metadata))
136 )
137 return table
139 def addTableTuple(self, specs: Tuple[ddl.TableSpec, ...]) -> Tuple[sqlalchemy.schema.Table, ...]:
140 """Add a named tuple of tables to the schema, returning their
141 SQLAlchemy representations in a named tuple of the same type.
143 The new tables may not actually be created until the end of the
144 context created by `Database.declareStaticTables`, allowing tables
145 to be declared in any order even in the presence of foreign key
146 relationships.
148 Notes
149 -----
150 ``specs`` *must* be an instance of a type created by
151 `collections.namedtuple`, not just regular tuple, and the returned
152 object is guaranteed to be the same. Because `~collections.namedtuple`
153 is just a factory for `type` objects, not an actual type itself,
154 we cannot represent this with type annotations.
155 """
156 return specs._make(self.addTable(name, spec) # type: ignore
157 for name, spec in zip(specs._fields, specs)) # type: ignore
159 def addInitializer(self, initializer: Callable[[Database], None]) -> None:
160 """Add a method that does one-time initialization of a database.
162 Initialization can mean anything that changes state of a database
163 and needs to be done exactly once after database schema was created.
164 An example for that could be population of schema attributes.
166 Parameters
167 ----------
168 initializer : callable
169 Method of a single argument which is a `Database` instance.
170 """
171 self._initializers.append(initializer)
174class Session:
175 """Class representing a persistent connection to a database.
177 Parameters
178 ----------
179 db : `Database`
180 Database instance.
182 Notes
183 -----
184 Instances of Session class should not be created by client code;
185 `Database.session` should be used to create context for a session::
187 with db.session() as session:
188 session.method()
189 db.method()
191 In the current implementation sessions can be nested and transactions can
192 be nested within a session. All nested sessions and transaction share the
193 same database connection.
195 Session class represents a limited subset of database API that requires
196 persistent connection to a database (e.g. temporary tables which have
197 lifetime of a session). Potentially most of the database API could be
198 associated with a Session class.
199 """
200 def __init__(self, db: Database):
201 self._db = db
203 def makeTemporaryTable(self, spec: ddl.TableSpec, name: Optional[str] = None) -> sqlalchemy.schema.Table:
204 """Create a temporary table.
206 Parameters
207 ----------
208 spec : `TableSpec`
209 Specification for the table.
210 name : `str`, optional
211 A unique (within this session/connetion) name for the table.
212 Subclasses may override to modify the actual name used. If not
213 provided, a unique name will be generated.
215 Returns
216 -------
217 table : `sqlalchemy.schema.Table`
218 SQLAlchemy representation of the table.
220 Notes
221 -----
222 Temporary tables may be created, dropped, and written to even in
223 read-only databases - at least according to the Python-level
224 protections in the `Database` classes. Server permissions may say
225 otherwise, but in that case they probably need to be modified to
226 support the full range of expected read-only butler behavior.
228 Temporary table rows are guaranteed to be dropped when a connection is
229 closed. `Database` implementations are permitted to allow the table to
230 remain as long as this is transparent to the user (i.e. "creating" the
231 temporary table in a new session should not be an error, even if it
232 does nothing).
234 It may not be possible to use temporary tables within transactions with
235 some database engines (or configurations thereof).
236 """
237 if name is None:
238 name = f"tmp_{uuid.uuid4().hex}"
239 table = self._db._convertTableSpec(name, spec, self._db._metadata, prefixes=['TEMPORARY'],
240 schema=sqlalchemy.schema.BLANK_SCHEMA)
241 if table.key in self._db._tempTables:
242 if table.key != name:
243 raise ValueError(f"A temporary table with name {name} (transformed to {table.key} by "
244 f"Database) already exists.")
245 for foreignKeySpec in spec.foreignKeys:
246 table.append_constraint(self._db._convertForeignKeySpec(name, foreignKeySpec,
247 self._db._metadata))
248 table.create(self._db._session_connection)
249 self._db._tempTables.add(table.key)
250 return table
252 def dropTemporaryTable(self, table: sqlalchemy.schema.Table) -> None:
253 """Drop a temporary table.
255 Parameters
256 ----------
257 table : `sqlalchemy.schema.Table`
258 A SQLAlchemy object returned by a previous call to
259 `makeTemporaryTable`.
260 """
261 if table.key in self._db._tempTables:
262 table.drop(self._db._session_connection)
263 self._db._tempTables.remove(table.key)
264 else:
265 raise TypeError(f"Table {table.key} was not created by makeTemporaryTable.")
268class Database(ABC):
269 """An abstract interface that represents a particular database engine's
270 representation of a single schema/namespace/database.
272 Parameters
273 ----------
274 origin : `int`
275 An integer ID that should be used as the default for any datasets,
276 quanta, or other entities that use a (autoincrement, origin) compound
277 primary key.
278 engine : `sqlalchemy.engine.Engine`
279 The SQLAlchemy engine for this `Database`.
280 namespace : `str`, optional
281 Name of the schema or namespace this instance is associated with.
282 This is passed as the ``schema`` argument when constructing a
283 `sqlalchemy.schema.MetaData` instance. We use ``namespace`` instead to
284 avoid confusion between "schema means namespace" and "schema means
285 table definitions".
287 Notes
288 -----
289 `Database` requires all write operations to go through its special named
290 methods. Our write patterns are sufficiently simple that we don't really
291 need the full flexibility of SQL insert/update/delete syntax, and we need
292 non-standard (but common) functionality in these operations sufficiently
293 often that it seems worthwhile to provide our own generic API.
295 In contrast, `Database.query` allows arbitrary ``SELECT`` queries (via
296 their SQLAlchemy representation) to be run, as we expect these to require
297 significantly more sophistication while still being limited to standard
298 SQL.
300 `Database` itself has several underscore-prefixed attributes:
302 - ``_engine``: SQLAlchemy object representing its engine.
303 - ``_connection``: the `sqlalchemy.engine.Connectable` object which can
304 be either an Engine or Connection if a session is active.
305 - ``_metadata``: the `sqlalchemy.schema.MetaData` object representing
306 the tables and other schema entities.
308 These are considered protected (derived classes may access them, but other
309 code should not), and read-only, aside from executing SQL via
310 ``_connection``.
311 """
313 def __init__(self, *, origin: int, engine: sqlalchemy.engine.Engine,
314 namespace: Optional[str] = None):
315 self.origin = origin
316 self.namespace = namespace
317 self._engine = engine
318 self._session_connection: Optional[sqlalchemy.engine.Connection] = None
319 self._metadata: Optional[sqlalchemy.schema.MetaData] = None
320 self._tempTables: Set[str] = set()
322 def __repr__(self) -> str:
323 # Rather than try to reproduce all the parameters used to create
324 # the object, instead report the more useful information of the
325 # connection URL.
326 if self._engine.url.password is not None:
327 uri = str(self._engine.url.set(password="***"))
328 else:
329 uri = str(self._engine.url)
330 if self.namespace:
331 uri += f"#{self.namespace}"
332 return f'{type(self).__name__}("{uri}")'
334 @classmethod
335 def makeDefaultUri(cls, root: str) -> Optional[str]:
336 """Create a default connection URI appropriate for the given root
337 directory, or `None` if there can be no such default.
338 """
339 return None
341 @classmethod
342 def fromUri(cls, uri: str, *, origin: int, namespace: Optional[str] = None,
343 writeable: bool = True) -> Database:
344 """Construct a database from a SQLAlchemy URI.
346 Parameters
347 ----------
348 uri : `str`
349 A SQLAlchemy URI connection string.
350 origin : `int`
351 An integer ID that should be used as the default for any datasets,
352 quanta, or other entities that use a (autoincrement, origin)
353 compound primary key.
354 namespace : `str`, optional
355 A database namespace (i.e. schema) the new instance should be
356 associated with. If `None` (default), the namespace (if any) is
357 inferred from the URI.
358 writeable : `bool`, optional
359 If `True`, allow write operations on the database, including
360 ``CREATE TABLE``.
362 Returns
363 -------
364 db : `Database`
365 A new `Database` instance.
366 """
367 return cls.fromEngine(cls.makeEngine(uri, writeable=writeable),
368 origin=origin,
369 namespace=namespace,
370 writeable=writeable)
372 @classmethod
373 @abstractmethod
374 def makeEngine(cls, uri: str, *, writeable: bool = True) -> sqlalchemy.engine.Engine:
375 """Create a `sqlalchemy.engine.Engine` from a SQLAlchemy URI.
377 Parameters
378 ----------
379 uri : `str`
380 A SQLAlchemy URI connection string.
381 writeable : `bool`, optional
382 If `True`, allow write operations on the database, including
383 ``CREATE TABLE``.
385 Returns
386 -------
387 engine : `sqlalchemy.engine.Engine`
388 A database engine.
390 Notes
391 -----
392 Subclasses that support other ways to connect to a database are
393 encouraged to add optional arguments to their implementation of this
394 method, as long as they maintain compatibility with the base class
395 call signature.
396 """
397 raise NotImplementedError()
399 @classmethod
400 @abstractmethod
401 def fromEngine(cls, engine: sqlalchemy.engine.Engine, *, origin: int,
402 namespace: Optional[str] = None, writeable: bool = True) -> Database:
403 """Create a new `Database` from an existing `sqlalchemy.engine.Engine`.
405 Parameters
406 ----------
407 engine : `sqllachemy.engine.Engine`
408 The engine for the database. May be shared between `Database`
409 instances.
410 origin : `int`
411 An integer ID that should be used as the default for any datasets,
412 quanta, or other entities that use a (autoincrement, origin)
413 compound primary key.
414 namespace : `str`, optional
415 A different database namespace (i.e. schema) the new instance
416 should be associated with. If `None` (default), the namespace
417 (if any) is inferred from the connection.
418 writeable : `bool`, optional
419 If `True`, allow write operations on the database, including
420 ``CREATE TABLE``.
422 Returns
423 -------
424 db : `Database`
425 A new `Database` instance.
427 Notes
428 -----
429 This method allows different `Database` instances to share the same
430 engine, which is desirable when they represent different namespaces
431 can be queried together.
432 """
433 raise NotImplementedError()
435 @contextmanager
436 def session(self) -> Iterator:
437 """Return a context manager that represents a session (persistent
438 connection to a database).
439 """
440 if self._session_connection is not None:
441 # session already started, just reuse that
442 yield Session(self)
443 else:
444 # open new connection and close it when done
445 self._session_connection = self._engine.connect()
446 yield Session(self)
447 self._session_connection.close()
448 self._session_connection = None
449 # Temporary tables only live within session
450 self._tempTables = set()
452 @contextmanager
453 def transaction(self, *, interrupting: bool = False, savepoint: bool = False,
454 lock: Iterable[sqlalchemy.schema.Table] = ()) -> Iterator:
455 """Return a context manager that represents a transaction.
457 Parameters
458 ----------
459 interrupting : `bool`, optional
460 If `True` (`False` is default), this transaction block may not be
461 nested without an outer one, and attempting to do so is a logic
462 (i.e. assertion) error.
463 savepoint : `bool`, optional
464 If `True` (`False` is default), create a `SAVEPOINT`, allowing
465 exceptions raised by the database (e.g. due to constraint
466 violations) during this transaction's context to be caught outside
467 it without also rolling back all operations in an outer transaction
468 block. If `False`, transactions may still be nested, but a
469 rollback may be generated at any level and affects all levels, and
470 commits are deferred until the outermost block completes. If any
471 outer transaction block was created with ``savepoint=True``, all
472 inner blocks will be as well (regardless of the actual value
473 passed). This has no effect if this is the outermost transaction.
474 lock : `Iterable` [ `sqlalchemy.schema.Table` ], optional
475 A list of tables to lock for the duration of this transaction.
476 These locks are guaranteed to prevent concurrent writes and allow
477 this transaction (only) to acquire the same locks (others should
478 block), but only prevent concurrent reads if the database engine
479 requires that in order to block concurrent writes.
481 Notes
482 -----
483 All transactions on a connection managed by one or more `Database`
484 instances _must_ go through this method, or transaction state will not
485 be correctly managed.
486 """
487 # need a connection, use session to manage it
488 with self.session():
489 assert self._session_connection is not None
490 connection = self._session_connection
491 assert not (interrupting and connection.in_transaction()), (
492 "Logic error in transaction nesting: an operation that would "
493 "interrupt the active transaction context has been requested."
494 )
495 # We remember whether we are already in a SAVEPOINT transaction via
496 # the connection object's 'info' dict, which is explicitly for user
497 # information like this. This is safer than a regular `Database`
498 # instance attribute, because it guards against multiple `Database`
499 # instances sharing the same connection. The need to use our own
500 # flag here to track whether we're in a nested transaction should
501 # go away in SQLAlchemy 1.4, which seems to have a
502 # `Connection.in_nested_transaction()` method.
503 savepoint = savepoint or connection.info.get(_IN_SAVEPOINT_TRANSACTION, False)
504 connection.info[_IN_SAVEPOINT_TRANSACTION] = savepoint
505 if connection.in_transaction() and savepoint:
506 trans = connection.begin_nested()
507 else:
508 # Use a regular (non-savepoint) transaction always for the
509 # outermost context, as well as when a savepoint was not
510 # requested.
511 trans = connection.begin()
512 self._lockTables(lock)
513 try:
514 yield
515 trans.commit()
516 except BaseException:
517 trans.rollback()
518 raise
519 finally:
520 if not connection.in_transaction():
521 connection.info.pop(_IN_SAVEPOINT_TRANSACTION, None)
523 @property
524 def _connection(self) -> sqlalchemy.engine.Connectable:
525 """Object that can be used to execute queries
526 (`sqlalchemy.engine.Connectable`)
527 """
528 return self._session_connection or self._engine
530 @abstractmethod
531 def _lockTables(self, tables: Iterable[sqlalchemy.schema.Table] = ()) -> None:
532 """Acquire locks on the given tables.
534 This is an implementation hook for subclasses, called by `transaction`.
535 It should not be called directly by other code.
537 Parameters
538 ----------
539 tables : `Iterable` [ `sqlalchemy.schema.Table` ], optional
540 A list of tables to lock for the duration of this transaction.
541 These locks are guaranteed to prevent concurrent writes and allow
542 this transaction (only) to acquire the same locks (others should
543 block), but only prevent concurrent reads if the database engine
544 requires that in order to block concurrent writes.
545 """
546 raise NotImplementedError()
548 def isTableWriteable(self, table: sqlalchemy.schema.Table) -> bool:
549 """Check whether a table is writeable, either because the database
550 connection is read-write or the table is a temporary table.
552 Parameters
553 ----------
554 table : `sqlalchemy.schema.Table`
555 SQLAlchemy table object to check.
557 Returns
558 -------
559 writeable : `bool`
560 Whether this table is writeable.
561 """
562 return self.isWriteable() or table.key in self._tempTables
564 def assertTableWriteable(self, table: sqlalchemy.schema.Table, msg: str) -> None:
565 """Raise if the given table is not writeable, either because the
566 database connection is read-write or the table is a temporary table.
568 Parameters
569 ----------
570 table : `sqlalchemy.schema.Table`
571 SQLAlchemy table object to check.
572 msg : `str`, optional
573 If provided, raise `ReadOnlyDatabaseError` instead of returning
574 `False`, with this message.
575 """
576 if not self.isTableWriteable(table):
577 raise ReadOnlyDatabaseError(msg)
579 @contextmanager
580 def declareStaticTables(self, *, create: bool) -> Iterator[StaticTablesContext]:
581 """Return a context manager in which the database's static DDL schema
582 can be declared.
584 Parameters
585 ----------
586 create : `bool`
587 If `True`, attempt to create all tables at the end of the context.
588 If `False`, they will be assumed to already exist.
590 Returns
591 -------
592 schema : `StaticTablesContext`
593 A helper object that is used to add new tables.
595 Raises
596 ------
597 ReadOnlyDatabaseError
598 Raised if ``create`` is `True`, `Database.isWriteable` is `False`,
599 and one or more declared tables do not already exist.
601 Examples
602 --------
603 Given a `Database` instance ``db``::
605 with db.declareStaticTables(create=True) as schema:
606 schema.addTable("table1", TableSpec(...))
607 schema.addTable("table2", TableSpec(...))
609 Notes
610 -----
611 A database's static DDL schema must be declared before any dynamic
612 tables are managed via calls to `ensureTableExists` or
613 `getExistingTable`. The order in which static schema tables are added
614 inside the context block is unimportant; they will automatically be
615 sorted and added in an order consistent with their foreign key
616 relationships.
617 """
618 if create and not self.isWriteable():
619 raise ReadOnlyDatabaseError(f"Cannot create tables in read-only database {self}.")
620 self._metadata = sqlalchemy.MetaData(schema=self.namespace)
621 try:
622 context = StaticTablesContext(self)
623 if create and context._tableNames:
624 # Looks like database is already initalized, to avoid danger
625 # of modifying/destroying valid schema we refuse to do
626 # anything in this case
627 raise SchemaAlreadyDefinedError(f"Cannot create tables in non-empty database {self}.")
628 yield context
629 for table, foreignKey in context._foreignKeys:
630 table.append_constraint(foreignKey)
631 if create:
632 if self.namespace is not None:
633 if self.namespace not in context._inspector.get_schema_names():
634 self._connection.execute(sqlalchemy.schema.CreateSchema(self.namespace))
635 # In our tables we have columns that make use of sqlalchemy
636 # Sequence objects. There is currently a bug in sqlalchemy that
637 # causes a deprecation warning to be thrown on a property of
638 # the Sequence object when the repr for the sequence is
639 # created. Here a filter is used to catch these deprecation
640 # warnings when tables are created.
641 with warnings.catch_warnings():
642 warnings.simplefilter("ignore", category=sqlalchemy.exc.SADeprecationWarning)
643 self._metadata.create_all(self._connection)
644 # call all initializer methods sequentially
645 for init in context._initializers:
646 init(self)
647 except BaseException:
648 self._metadata = None
649 raise
651 @abstractmethod
652 def isWriteable(self) -> bool:
653 """Return `True` if this database can be modified by this client.
654 """
655 raise NotImplementedError()
657 @abstractmethod
658 def __str__(self) -> str:
659 """Return a human-readable identifier for this `Database`, including
660 any namespace or schema that identifies its names within a `Registry`.
661 """
662 raise NotImplementedError()
664 @property
665 def dialect(self) -> sqlalchemy.engine.Dialect:
666 """The SQLAlchemy dialect for this database engine
667 (`sqlalchemy.engine.Dialect`).
668 """
669 return self._engine.dialect
671 def shrinkDatabaseEntityName(self, original: str) -> str:
672 """Return a version of the given name that fits within this database
673 engine's length limits for table, constraint, indexes, and sequence
674 names.
676 Implementations should not assume that simple truncation is safe,
677 because multiple long names often begin with the same prefix.
679 The default implementation simply returns the given name.
681 Parameters
682 ----------
683 original : `str`
684 The original name.
686 Returns
687 -------
688 shrunk : `str`
689 The new, possibly shortened name.
690 """
691 return original
693 def expandDatabaseEntityName(self, shrunk: str) -> str:
694 """Retrieve the original name for a database entity that was too long
695 to fit within the database engine's limits.
697 Parameters
698 ----------
699 original : `str`
700 The original name.
702 Returns
703 -------
704 shrunk : `str`
705 The new, possibly shortened name.
706 """
707 return shrunk
709 def _mangleTableName(self, name: str) -> str:
710 """Map a logical, user-visible table name to the true table name used
711 in the database.
713 The default implementation returns the given name unchanged.
715 Parameters
716 ----------
717 name : `str`
718 Input table name. Should not include a namespace (i.e. schema)
719 prefix.
721 Returns
722 -------
723 mangled : `str`
724 Mangled version of the table name (still with no namespace prefix).
726 Notes
727 -----
728 Reimplementations of this method must be idempotent - mangling an
729 already-mangled name must have no effect.
730 """
731 return name
733 def _makeColumnConstraints(self, table: str, spec: ddl.FieldSpec) -> List[sqlalchemy.CheckConstraint]:
734 """Create constraints based on this spec.
736 Parameters
737 ----------
738 table : `str`
739 Name of the table this column is being added to.
740 spec : `FieldSpec`
741 Specification for the field to be added.
743 Returns
744 -------
745 constraint : `list` of `sqlalchemy.CheckConstraint`
746 Constraint added for this column.
747 """
748 # By default we return no additional constraints
749 return []
751 def _convertFieldSpec(self, table: str, spec: ddl.FieldSpec, metadata: sqlalchemy.MetaData,
752 **kwargs: Any) -> sqlalchemy.schema.Column:
753 """Convert a `FieldSpec` to a `sqlalchemy.schema.Column`.
755 Parameters
756 ----------
757 table : `str`
758 Name of the table this column is being added to.
759 spec : `FieldSpec`
760 Specification for the field to be added.
761 metadata : `sqlalchemy.MetaData`
762 SQLAlchemy representation of the DDL schema this field's table is
763 being added to.
764 **kwargs
765 Additional keyword arguments to forward to the
766 `sqlalchemy.schema.Column` constructor. This is provided to make
767 it easier for derived classes to delegate to ``super()`` while
768 making only minor changes.
770 Returns
771 -------
772 column : `sqlalchemy.schema.Column`
773 SQLAlchemy representation of the field.
774 """
775 args = [spec.name, spec.getSizedColumnType()]
776 if spec.autoincrement:
777 # Generate a sequence to use for auto incrementing for databases
778 # that do not support it natively. This will be ignored by
779 # sqlalchemy for databases that do support it.
780 args.append(sqlalchemy.Sequence(self.shrinkDatabaseEntityName(f"{table}_seq_{spec.name}"),
781 metadata=metadata))
782 assert spec.doc is None or isinstance(spec.doc, str), f"Bad doc for {table}.{spec.name}."
783 return sqlalchemy.schema.Column(*args, nullable=spec.nullable, primary_key=spec.primaryKey,
784 comment=spec.doc, server_default=spec.default, **kwargs)
786 def _convertForeignKeySpec(self, table: str, spec: ddl.ForeignKeySpec, metadata: sqlalchemy.MetaData,
787 **kwargs: Any) -> sqlalchemy.schema.ForeignKeyConstraint:
788 """Convert a `ForeignKeySpec` to a
789 `sqlalchemy.schema.ForeignKeyConstraint`.
791 Parameters
792 ----------
793 table : `str`
794 Name of the table this foreign key is being added to.
795 spec : `ForeignKeySpec`
796 Specification for the foreign key to be added.
797 metadata : `sqlalchemy.MetaData`
798 SQLAlchemy representation of the DDL schema this constraint is
799 being added to.
800 **kwargs
801 Additional keyword arguments to forward to the
802 `sqlalchemy.schema.ForeignKeyConstraint` constructor. This is
803 provided to make it easier for derived classes to delegate to
804 ``super()`` while making only minor changes.
806 Returns
807 -------
808 constraint : `sqlalchemy.schema.ForeignKeyConstraint`
809 SQLAlchemy representation of the constraint.
810 """
811 name = self.shrinkDatabaseEntityName(
812 "_".join(["fkey", table, self._mangleTableName(spec.table)]
813 + list(spec.target) + list(spec.source))
814 )
815 return sqlalchemy.schema.ForeignKeyConstraint(
816 spec.source,
817 [f"{self._mangleTableName(spec.table)}.{col}" for col in spec.target],
818 name=name,
819 ondelete=spec.onDelete
820 )
822 def _convertExclusionConstraintSpec(self, table: str,
823 spec: Tuple[Union[str, Type[TimespanDatabaseRepresentation]], ...],
824 metadata: sqlalchemy.MetaData) -> sqlalchemy.schema.Constraint:
825 """Convert a `tuple` from `ddl.TableSpec.exclusion` into a SQLAlchemy
826 constraint representation.
828 Parameters
829 ----------
830 table : `str`
831 Name of the table this constraint is being added to.
832 spec : `tuple` [ `str` or `type` ]
833 A tuple of `str` column names and the `type` object returned by
834 `getTimespanRepresentation` (which must appear exactly once),
835 indicating the order of the columns in the index used to back the
836 constraint.
837 metadata : `sqlalchemy.MetaData`
838 SQLAlchemy representation of the DDL schema this constraint is
839 being added to.
841 Returns
842 -------
843 constraint : `sqlalchemy.schema.Constraint`
844 SQLAlchemy representation of the constraint.
846 Raises
847 ------
848 NotImplementedError
849 Raised if this database does not support exclusion constraints.
850 """
851 raise NotImplementedError(f"Database {self} does not support exclusion constraints.")
853 def _convertTableSpec(self, name: str, spec: ddl.TableSpec, metadata: sqlalchemy.MetaData,
854 **kwargs: Any) -> sqlalchemy.schema.Table:
855 """Convert a `TableSpec` to a `sqlalchemy.schema.Table`.
857 Parameters
858 ----------
859 spec : `TableSpec`
860 Specification for the foreign key to be added.
861 metadata : `sqlalchemy.MetaData`
862 SQLAlchemy representation of the DDL schema this table is being
863 added to.
864 **kwargs
865 Additional keyword arguments to forward to the
866 `sqlalchemy.schema.Table` constructor. This is provided to make it
867 easier for derived classes to delegate to ``super()`` while making
868 only minor changes.
870 Returns
871 -------
872 table : `sqlalchemy.schema.Table`
873 SQLAlchemy representation of the table.
875 Notes
876 -----
877 This method does not handle ``spec.foreignKeys`` at all, in order to
878 avoid circular dependencies. These are added by higher-level logic in
879 `ensureTableExists`, `getExistingTable`, and `declareStaticTables`.
880 """
881 name = self._mangleTableName(name)
882 args = [self._convertFieldSpec(name, fieldSpec, metadata) for fieldSpec in spec.fields]
884 # Add any column constraints
885 for fieldSpec in spec.fields:
886 args.extend(self._makeColumnConstraints(name, fieldSpec))
888 # Track indexes added for primary key and unique constraints, to make
889 # sure we don't add duplicate explicit or foreign key indexes for
890 # those.
891 allIndexes = {tuple(fieldSpec.name for fieldSpec in spec.fields if fieldSpec.primaryKey)}
892 args.extend(
893 sqlalchemy.schema.UniqueConstraint(
894 *columns,
895 name=self.shrinkDatabaseEntityName("_".join([name, "unq"] + list(columns)))
896 )
897 for columns in spec.unique
898 )
899 allIndexes.update(spec.unique)
900 args.extend(
901 sqlalchemy.schema.Index(
902 self.shrinkDatabaseEntityName("_".join([name, "idx"] + list(columns))),
903 *columns,
904 unique=(columns in spec.unique)
905 )
906 for columns in spec.indexes if columns not in allIndexes
907 )
908 allIndexes.update(spec.indexes)
909 args.extend(
910 sqlalchemy.schema.Index(
911 self.shrinkDatabaseEntityName("_".join((name, "fkidx") + fk.source)),
912 *fk.source,
913 )
914 for fk in spec.foreignKeys if fk.addIndex and fk.source not in allIndexes
915 )
917 args.extend(self._convertExclusionConstraintSpec(name, excl, metadata) for excl in spec.exclusion)
919 assert spec.doc is None or isinstance(spec.doc, str), f"Bad doc for {name}."
920 return sqlalchemy.schema.Table(name, metadata, *args, comment=spec.doc, info=spec, **kwargs)
922 def ensureTableExists(self, name: str, spec: ddl.TableSpec) -> sqlalchemy.schema.Table:
923 """Ensure that a table with the given name and specification exists,
924 creating it if necessary.
926 Parameters
927 ----------
928 name : `str`
929 Name of the table (not including namespace qualifiers).
930 spec : `TableSpec`
931 Specification for the table. This will be used when creating the
932 table, and *may* be used when obtaining an existing table to check
933 for consistency, but no such check is guaranteed.
935 Returns
936 -------
937 table : `sqlalchemy.schema.Table`
938 SQLAlchemy representation of the table.
940 Raises
941 ------
942 ReadOnlyDatabaseError
943 Raised if `isWriteable` returns `False`, and the table does not
944 already exist.
945 DatabaseConflictError
946 Raised if the table exists but ``spec`` is inconsistent with its
947 definition.
949 Notes
950 -----
951 This method may not be called within transactions. It may be called on
952 read-only databases if and only if the table does in fact already
953 exist.
955 Subclasses may override this method, but usually should not need to.
956 """
957 # TODO: if _engine is used to make a table then it uses separate
958 # connection and should not interfere with current transaction
959 assert self._session_connection is None or not self._session_connection.in_transaction(), \
960 "Table creation interrupts transactions."
961 assert self._metadata is not None, "Static tables must be declared before dynamic tables."
962 table = self.getExistingTable(name, spec)
963 if table is not None:
964 return table
965 if not self.isWriteable():
966 raise ReadOnlyDatabaseError(
967 f"Table {name} does not exist, and cannot be created "
968 f"because database {self} is read-only."
969 )
970 table = self._convertTableSpec(name, spec, self._metadata)
971 for foreignKeySpec in spec.foreignKeys:
972 table.append_constraint(self._convertForeignKeySpec(name, foreignKeySpec, self._metadata))
973 table.create(self._connection)
974 return table
976 def getExistingTable(self, name: str, spec: ddl.TableSpec) -> Optional[sqlalchemy.schema.Table]:
977 """Obtain an existing table with the given name and specification.
979 Parameters
980 ----------
981 name : `str`
982 Name of the table (not including namespace qualifiers).
983 spec : `TableSpec`
984 Specification for the table. This will be used when creating the
985 SQLAlchemy representation of the table, and it is used to
986 check that the actual table in the database is consistent.
988 Returns
989 -------
990 table : `sqlalchemy.schema.Table` or `None`
991 SQLAlchemy representation of the table, or `None` if it does not
992 exist.
994 Raises
995 ------
996 DatabaseConflictError
997 Raised if the table exists but ``spec`` is inconsistent with its
998 definition.
1000 Notes
1001 -----
1002 This method can be called within transactions and never modifies the
1003 database.
1005 Subclasses may override this method, but usually should not need to.
1006 """
1007 assert self._metadata is not None, "Static tables must be declared before dynamic tables."
1008 name = self._mangleTableName(name)
1009 table = self._metadata.tables.get(name if self.namespace is None else f"{self.namespace}.{name}")
1010 if table is not None:
1011 if spec.fields.names != set(table.columns.keys()):
1012 raise DatabaseConflictError(f"Table '{name}' has already been defined differently; the new "
1013 f"specification has columns {list(spec.fields.names)}, while "
1014 f"the previous definition has {list(table.columns.keys())}.")
1015 else:
1016 inspector = sqlalchemy.inspect(self._connection)
1017 if name in inspector.get_table_names(schema=self.namespace):
1018 _checkExistingTableDefinition(name, spec, inspector.get_columns(name, schema=self.namespace))
1019 table = self._convertTableSpec(name, spec, self._metadata)
1020 for foreignKeySpec in spec.foreignKeys:
1021 table.append_constraint(self._convertForeignKeySpec(name, foreignKeySpec, self._metadata))
1022 return table
1023 return table
1025 @classmethod
1026 def getTimespanRepresentation(cls) -> Type[TimespanDatabaseRepresentation]:
1027 """Return a `type` that encapsulates the way `Timespan` objects are
1028 stored in this database.
1030 `Database` does not automatically use the return type of this method
1031 anywhere else; calling code is responsible for making sure that DDL
1032 and queries are consistent with it.
1034 Returns
1035 -------
1036 TimespanReprClass : `type` (`TimespanDatabaseRepresention` subclass)
1037 A type that encapsulates the way `Timespan` objects should be
1038 stored in this database.
1040 Notes
1041 -----
1042 There are two big reasons we've decided to keep timespan-mangling logic
1043 outside the `Database` implementations, even though the choice of
1044 representation is ultimately up to a `Database` implementation:
1046 - Timespans appear in relatively few tables and queries in our
1047 typical usage, and the code that operates on them is already aware
1048 that it is working with timespans. In contrast, a
1049 timespan-representation-aware implementation of, say, `insert`,
1050 would need to have extra logic to identify when timespan-mangling
1051 needed to occur, which would usually be useless overhead.
1053 - SQLAlchemy's rich SELECT query expression system has no way to wrap
1054 multiple columns in a single expression object (the ORM does, but
1055 we are not using the ORM). So we would have to wrap _much_ more of
1056 that code in our own interfaces to encapsulate timespan
1057 representations there.
1058 """
1059 return TimespanDatabaseRepresentation.Compound
1061 @classmethod
1062 def getSpatialRegionRepresentation(cls) -> Type[SpatialRegionDatabaseRepresentation]:
1063 """Return a `type` that encapsulates the way `lsst.sphgeom.Region`
1064 objects are stored in this database.
1066 `Database` does not automatically use the return type of this method
1067 anywhere else; calling code is responsible for making sure that DDL
1068 and queries are consistent with it.
1070 Returns
1071 -------
1072 RegionReprClass : `type` (`SpatialRegionDatabaseRepresention` subclass)
1073 A type that encapsulates the way `lsst.sphgeom.Region` objects
1074 should be stored in this database.
1076 Notes
1077 -----
1078 See `getTimespanRepresentation` for comments on why this method is not
1079 more tightly integrated with the rest of the `Database` interface.
1080 """
1081 return SpatialRegionDatabaseRepresentation
1083 def sync(self, table: sqlalchemy.schema.Table, *,
1084 keys: Dict[str, Any],
1085 compared: Optional[Dict[str, Any]] = None,
1086 extra: Optional[Dict[str, Any]] = None,
1087 returning: Optional[Sequence[str]] = None,
1088 update: bool = False,
1089 ) -> Tuple[Optional[Dict[str, Any]], Union[bool, Dict[str, Any]]]:
1090 """Insert into a table as necessary to ensure database contains
1091 values equivalent to the given ones.
1093 Parameters
1094 ----------
1095 table : `sqlalchemy.schema.Table`
1096 Table to be queried and possibly inserted into.
1097 keys : `dict`
1098 Column name-value pairs used to search for an existing row; must
1099 be a combination that can be used to select a single row if one
1100 exists. If such a row does not exist, these values are used in
1101 the insert.
1102 compared : `dict`, optional
1103 Column name-value pairs that are compared to those in any existing
1104 row. If such a row does not exist, these rows are used in the
1105 insert.
1106 extra : `dict`, optional
1107 Column name-value pairs that are ignored if a matching row exists,
1108 but used in an insert if one is necessary.
1109 returning : `~collections.abc.Sequence` of `str`, optional
1110 The names of columns whose values should be returned.
1111 update : `bool`, optional
1112 If `True` (`False` is default), update the existing row with the
1113 values in ``compared`` instead of raising `DatabaseConflictError`.
1115 Returns
1116 -------
1117 row : `dict`, optional
1118 The value of the fields indicated by ``returning``, or `None` if
1119 ``returning`` is `None`.
1120 inserted_or_updated : `bool` or `dict`
1121 If `True`, a new row was inserted; if `False`, a matching row
1122 already existed. If a `dict` (only possible if ``update=True``),
1123 then an existing row was updated, and the dict maps the names of
1124 the updated columns to their *old* values (new values can be
1125 obtained from ``compared``).
1127 Raises
1128 ------
1129 DatabaseConflictError
1130 Raised if the values in ``compared`` do not match the values in the
1131 database.
1132 ReadOnlyDatabaseError
1133 Raised if `isWriteable` returns `False`, and no matching record
1134 already exists.
1136 Notes
1137 -----
1138 May be used inside transaction contexts, so implementations may not
1139 perform operations that interrupt transactions.
1141 It may be called on read-only databases if and only if the matching row
1142 does in fact already exist.
1143 """
1145 def check() -> Tuple[int, Optional[Dict[str, Any]], Optional[List]]:
1146 """Query for a row that matches the ``key`` argument, and compare
1147 to what was given by the caller.
1149 Returns
1150 -------
1151 n : `int`
1152 Number of matching rows. ``n != 1`` is always an error, but
1153 it's a different kind of error depending on where `check` is
1154 being called.
1155 bad : `dict` or `None`
1156 The subset of the keys of ``compared`` for which the existing
1157 values did not match the given one, mapped to the existing
1158 values in the database. Once again, ``not bad`` is always an
1159 error, but a different kind on context. `None` if ``n != 1``
1160 result : `list` or `None`
1161 Results in the database that correspond to the columns given
1162 in ``returning``, or `None` if ``returning is None``.
1163 """
1164 toSelect: Set[str] = set()
1165 if compared is not None:
1166 toSelect.update(compared.keys())
1167 if returning is not None:
1168 toSelect.update(returning)
1169 if not toSelect:
1170 # Need to select some column, even if we just want to see
1171 # how many rows we get back.
1172 toSelect.add(next(iter(keys.keys())))
1173 selectSql = sqlalchemy.sql.select(
1174 [table.columns[k].label(k) for k in toSelect]
1175 ).select_from(table).where(
1176 sqlalchemy.sql.and_(*[table.columns[k] == v for k, v in keys.items()])
1177 )
1178 fetched = list(self._connection.execute(selectSql).fetchall())
1179 if len(fetched) != 1:
1180 return len(fetched), None, None
1181 existing = fetched[0]
1182 if compared is not None:
1184 def safeNotEqual(a: Any, b: Any) -> bool:
1185 if isinstance(a, astropy.time.Time):
1186 return not time_utils.TimeConverter().times_equal(a, b)
1187 return a != b
1189 inconsistencies = {
1190 k: existing[k]
1191 for k, v in compared.items()
1192 if safeNotEqual(existing[k], v)
1193 }
1194 else:
1195 inconsistencies = {}
1196 if returning is not None:
1197 toReturn: Optional[list] = [existing[k] for k in returning]
1198 else:
1199 toReturn = None
1200 return 1, inconsistencies, toReturn
1202 def format_bad(inconsistencies: Dict[str, Any]) -> str:
1203 """Format the 'bad' dictionary of existing values returned by
1204 ``check`` into a string suitable for an error message.
1205 """
1206 assert compared is not None, "Should not be able to get inconsistencies without comparing."
1207 return ", ".join(f"{k}: {v!r} != {compared[k]!r}" for k, v in inconsistencies.items())
1209 if self.isTableWriteable(table):
1210 # Try an insert first, but allow it to fail (in only specific
1211 # ways).
1212 row = keys.copy()
1213 if compared is not None:
1214 row.update(compared)
1215 if extra is not None:
1216 row.update(extra)
1217 with self.transaction(lock=[table]):
1218 inserted = bool(self.ensure(table, row))
1219 inserted_or_updated: Union[bool, Dict[str, Any]]
1220 # Need to perform check() for this branch inside the
1221 # transaction, so we roll back an insert that didn't do
1222 # what we expected. That limits the extent to which we
1223 # can reduce duplication between this block and the other
1224 # ones that perform similar logic.
1225 n, bad, result = check()
1226 if n < 1:
1227 raise ConflictingDefinitionError(
1228 f"Attempted to ensure {row} exists by inserting it with ON CONFLICT IGNORE, "
1229 f"but a post-insert query on {keys} returned no results. "
1230 f"Insert was {'' if inserted else 'not '}reported as successful. "
1231 "This can occur if the insert violated a database constraint other than the "
1232 "unique constraint or primary key used to identify the row in this call."
1233 )
1234 elif n > 1:
1235 raise RuntimeError(f"Keys passed to sync {keys.keys()} do not comprise a "
1236 f"unique constraint for table {table.name}.")
1237 elif bad:
1238 assert compared is not None, \
1239 "Should not be able to get inconsistencies without comparing."
1240 if inserted:
1241 raise RuntimeError(
1242 f"Conflict ({bad}) in sync after successful insert; this is "
1243 "possible if the same table is being updated by a concurrent "
1244 "process that isn't using sync, but it may also be a bug in "
1245 "daf_butler."
1246 )
1247 elif update:
1248 self._connection.execute(
1249 table.update().where(
1250 sqlalchemy.sql.and_(*[table.columns[k] == v for k, v in keys.items()])
1251 ).values(
1252 **{k: compared[k] for k in bad.keys()}
1253 )
1254 )
1255 inserted_or_updated = bad
1256 else:
1257 raise DatabaseConflictError(
1258 f"Conflict in sync for table {table.name} on column(s) {format_bad(bad)}."
1259 )
1260 else:
1261 inserted_or_updated = inserted
1262 else:
1263 # Database is not writeable; just see if the row exists.
1264 n, bad, result = check()
1265 if n < 1:
1266 raise ReadOnlyDatabaseError("sync needs to insert, but database is read-only.")
1267 elif n > 1:
1268 raise RuntimeError("Keys passed to sync do not comprise a unique constraint.")
1269 elif bad:
1270 if update:
1271 raise ReadOnlyDatabaseError("sync needs to update, but database is read-only.")
1272 else:
1273 raise DatabaseConflictError(
1274 f"Conflict in sync for table {table.name} on column(s) {format_bad(bad)}."
1275 )
1276 inserted_or_updated = False
1277 if returning is None:
1278 return None, inserted_or_updated
1279 else:
1280 assert result is not None
1281 return {k: v for k, v in zip(returning, result)}, inserted_or_updated
1283 def insert(self, table: sqlalchemy.schema.Table, *rows: dict, returnIds: bool = False,
1284 select: Optional[sqlalchemy.sql.Select] = None,
1285 names: Optional[Iterable[str]] = None,
1286 ) -> Optional[List[int]]:
1287 """Insert one or more rows into a table, optionally returning
1288 autoincrement primary key values.
1290 Parameters
1291 ----------
1292 table : `sqlalchemy.schema.Table`
1293 Table rows should be inserted into.
1294 returnIds: `bool`
1295 If `True` (`False` is default), return the values of the table's
1296 autoincrement primary key field (which much exist).
1297 select : `sqlalchemy.sql.Select`, optional
1298 A SELECT query expression to insert rows from. Cannot be provided
1299 with either ``rows`` or ``returnIds=True``.
1300 names : `Iterable` [ `str` ], optional
1301 Names of columns in ``table`` to be populated, ordered to match the
1302 columns returned by ``select``. Ignored if ``select`` is `None`.
1303 If not provided, the columns returned by ``select`` must be named
1304 to match the desired columns of ``table``.
1305 *rows
1306 Positional arguments are the rows to be inserted, as dictionaries
1307 mapping column name to value. The keys in all dictionaries must
1308 be the same.
1310 Returns
1311 -------
1312 ids : `None`, or `list` of `int`
1313 If ``returnIds`` is `True`, a `list` containing the inserted
1314 values for the table's autoincrement primary key.
1316 Raises
1317 ------
1318 ReadOnlyDatabaseError
1319 Raised if `isWriteable` returns `False` when this method is called.
1321 Notes
1322 -----
1323 The default implementation uses bulk insert syntax when ``returnIds``
1324 is `False`, and a loop over single-row insert operations when it is
1325 `True`.
1327 Derived classes should reimplement when they can provide a more
1328 efficient implementation (especially for the latter case).
1330 May be used inside transaction contexts, so implementations may not
1331 perform operations that interrupt transactions.
1332 """
1333 self.assertTableWriteable(table, f"Cannot insert into read-only table {table}.")
1334 if select is not None and (rows or returnIds):
1335 raise TypeError("'select' is incompatible with passing value rows or returnIds=True.")
1336 if not rows and select is None:
1337 if returnIds:
1338 return []
1339 else:
1340 return None
1341 if not returnIds:
1342 if select is not None:
1343 if names is None:
1344 # columns() is deprecated since 1.4, but
1345 # selected_columns() method did not exist in 1.3.
1346 if hasattr(select, "selected_columns"):
1347 names = select.selected_columns.keys()
1348 else:
1349 names = select.columns.keys()
1350 self._connection.execute(table.insert().from_select(names, select))
1351 else:
1352 self._connection.execute(table.insert(), *rows)
1353 return None
1354 else:
1355 sql = table.insert()
1356 return [self._connection.execute(sql, row).inserted_primary_key[0] for row in rows]
1358 @abstractmethod
1359 def replace(self, table: sqlalchemy.schema.Table, *rows: dict) -> None:
1360 """Insert one or more rows into a table, replacing any existing rows
1361 for which insertion of a new row would violate the primary key
1362 constraint.
1364 Parameters
1365 ----------
1366 table : `sqlalchemy.schema.Table`
1367 Table rows should be inserted into.
1368 *rows
1369 Positional arguments are the rows to be inserted, as dictionaries
1370 mapping column name to value. The keys in all dictionaries must
1371 be the same.
1373 Raises
1374 ------
1375 ReadOnlyDatabaseError
1376 Raised if `isWriteable` returns `False` when this method is called.
1378 Notes
1379 -----
1380 May be used inside transaction contexts, so implementations may not
1381 perform operations that interrupt transactions.
1383 Implementations should raise a `sqlalchemy.exc.IntegrityError`
1384 exception when a constraint other than the primary key would be
1385 violated.
1387 Implementations are not required to support `replace` on tables
1388 with autoincrement keys.
1389 """
1390 raise NotImplementedError()
1392 @abstractmethod
1393 def ensure(self, table: sqlalchemy.schema.Table, *rows: dict) -> int:
1394 """Insert one or more rows into a table, skipping any rows for which
1395 insertion would violate any constraint.
1397 Parameters
1398 ----------
1399 table : `sqlalchemy.schema.Table`
1400 Table rows should be inserted into.
1401 *rows
1402 Positional arguments are the rows to be inserted, as dictionaries
1403 mapping column name to value. The keys in all dictionaries must
1404 be the same.
1406 Returns
1407 -------
1408 count : `int`
1409 The number of rows actually inserted.
1411 Raises
1412 ------
1413 ReadOnlyDatabaseError
1414 Raised if `isWriteable` returns `False` when this method is called.
1415 This is raised even if the operation would do nothing even on a
1416 writeable database.
1418 Notes
1419 -----
1420 May be used inside transaction contexts, so implementations may not
1421 perform operations that interrupt transactions.
1423 Implementations are not required to support `ensure` on tables
1424 with autoincrement keys.
1425 """
1426 raise NotImplementedError()
1428 def delete(self, table: sqlalchemy.schema.Table, columns: Iterable[str], *rows: dict) -> int:
1429 """Delete one or more rows from a table.
1431 Parameters
1432 ----------
1433 table : `sqlalchemy.schema.Table`
1434 Table that rows should be deleted from.
1435 columns: `~collections.abc.Iterable` of `str`
1436 The names of columns that will be used to constrain the rows to
1437 be deleted; these will be combined via ``AND`` to form the
1438 ``WHERE`` clause of the delete query.
1439 *rows
1440 Positional arguments are the keys of rows to be deleted, as
1441 dictionaries mapping column name to value. The keys in all
1442 dictionaries must be exactly the names in ``columns``.
1444 Returns
1445 -------
1446 count : `int`
1447 Number of rows deleted.
1449 Raises
1450 ------
1451 ReadOnlyDatabaseError
1452 Raised if `isWriteable` returns `False` when this method is called.
1454 Notes
1455 -----
1456 May be used inside transaction contexts, so implementations may not
1457 perform operations that interrupt transactions.
1459 The default implementation should be sufficient for most derived
1460 classes.
1461 """
1462 self.assertTableWriteable(table, f"Cannot delete from read-only table {table}.")
1463 if columns and not rows:
1464 # If there are no columns, this operation is supposed to delete
1465 # everything (so we proceed as usual). But if there are columns,
1466 # but no rows, it was a constrained bulk operation where the
1467 # constraint is that no rows match, and we should short-circuit
1468 # while reporting that no rows were affected.
1469 return 0
1470 sql = table.delete()
1471 columns = list(columns) # Force iterators to list
1473 # More efficient to use IN operator if there is only one
1474 # variable changing across all rows.
1475 content: Dict[str, Set] = defaultdict(set)
1476 if len(columns) == 1:
1477 # Nothing to calculate since we can always use IN
1478 column = columns[0]
1479 changing_columns = [column]
1480 content[column] = set(row[column] for row in rows)
1481 else:
1482 for row in rows:
1483 for k, v in row.items():
1484 content[k].add(v)
1485 changing_columns = [col for col, values in content.items() if len(values) > 1]
1487 if len(changing_columns) != 1:
1488 # More than one column changes each time so do explicit bind
1489 # parameters and have each row processed separately.
1490 whereTerms = [table.columns[name] == sqlalchemy.sql.bindparam(name) for name in columns]
1491 if whereTerms:
1492 sql = sql.where(sqlalchemy.sql.and_(*whereTerms))
1493 return self._connection.execute(sql, *rows).rowcount
1494 else:
1495 # One of the columns has changing values but any others are
1496 # fixed. In this case we can use an IN operator and be more
1497 # efficient.
1498 name = changing_columns.pop()
1500 # Simple where clause for the unchanging columns
1501 clauses = []
1502 for k, v in content.items():
1503 if k == name:
1504 continue
1505 column = table.columns[k]
1506 # The set only has one element
1507 clauses.append(column == v.pop())
1509 # The IN operator will not work for "infinite" numbers of
1510 # rows so must batch it up into distinct calls.
1511 in_content = list(content[name])
1512 n_elements = len(in_content)
1514 rowcount = 0
1515 iposn = 0
1516 n_per_loop = 1_000 # Controls how many items to put in IN clause
1517 for iposn in range(0, n_elements, n_per_loop):
1518 endpos = iposn + n_per_loop
1519 in_clause = table.columns[name].in_(in_content[iposn:endpos])
1521 newsql = sql.where(sqlalchemy.sql.and_(*clauses, in_clause))
1522 rowcount += self._connection.execute(newsql).rowcount
1523 return rowcount
1525 def update(self, table: sqlalchemy.schema.Table, where: Dict[str, str], *rows: dict) -> int:
1526 """Update one or more rows in a table.
1528 Parameters
1529 ----------
1530 table : `sqlalchemy.schema.Table`
1531 Table containing the rows to be updated.
1532 where : `dict` [`str`, `str`]
1533 A mapping from the names of columns that will be used to search for
1534 existing rows to the keys that will hold these values in the
1535 ``rows`` dictionaries. Note that these may not be the same due to
1536 SQLAlchemy limitations.
1537 *rows
1538 Positional arguments are the rows to be updated. The keys in all
1539 dictionaries must be the same, and may correspond to either a
1540 value in the ``where`` dictionary or the name of a column to be
1541 updated.
1543 Returns
1544 -------
1545 count : `int`
1546 Number of rows matched (regardless of whether the update actually
1547 modified them).
1549 Raises
1550 ------
1551 ReadOnlyDatabaseError
1552 Raised if `isWriteable` returns `False` when this method is called.
1554 Notes
1555 -----
1556 May be used inside transaction contexts, so implementations may not
1557 perform operations that interrupt transactions.
1559 The default implementation should be sufficient for most derived
1560 classes.
1561 """
1562 self.assertTableWriteable(table, f"Cannot update read-only table {table}.")
1563 if not rows:
1564 return 0
1565 sql = table.update().where(
1566 sqlalchemy.sql.and_(*[table.columns[k] == sqlalchemy.sql.bindparam(v) for k, v in where.items()])
1567 )
1568 return self._connection.execute(sql, *rows).rowcount
1570 def query(self, sql: sqlalchemy.sql.FromClause,
1571 *args: Any, **kwargs: Any) -> sqlalchemy.engine.ResultProxy:
1572 """Run a SELECT query against the database.
1574 Parameters
1575 ----------
1576 sql : `sqlalchemy.sql.FromClause`
1577 A SQLAlchemy representation of a ``SELECT`` query.
1578 *args
1579 Additional positional arguments are forwarded to
1580 `sqlalchemy.engine.Connection.execute`.
1581 **kwargs
1582 Additional keyword arguments are forwarded to
1583 `sqlalchemy.engine.Connection.execute`.
1585 Returns
1586 -------
1587 result : `sqlalchemy.engine.ResultProxy`
1588 Query results.
1590 Notes
1591 -----
1592 The default implementation should be sufficient for most derived
1593 classes.
1594 """
1595 # TODO: should we guard against non-SELECT queries here?
1596 return self._connection.execute(sql, *args, **kwargs)
1598 origin: int
1599 """An integer ID that should be used as the default for any datasets,
1600 quanta, or other entities that use a (autoincrement, origin) compound
1601 primary key (`int`).
1602 """
1604 namespace: Optional[str]
1605 """The schema or namespace this database instance is associated with
1606 (`str` or `None`).
1607 """