Coverage for python/lsst/daf/butler/registry/interfaces/_database.py : 14%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = [
24 "Database",
25 "ReadOnlyDatabaseError",
26 "DatabaseConflictError",
27 "SchemaAlreadyDefinedError",
28 "StaticTablesContext",
29]
31from abc import ABC, abstractmethod
32from contextlib import contextmanager
33from typing import (
34 Any,
35 Callable,
36 Dict,
37 Iterable,
38 Iterator,
39 List,
40 Optional,
41 Sequence,
42 Set,
43 Tuple,
44 Type,
45 Union,
46)
47import uuid
48import warnings
50import astropy.time
51import sqlalchemy
53from ...core import DatabaseTimespanRepresentation, ddl, time_utils
54from .._exceptions import ConflictingDefinitionError
56_IN_SAVEPOINT_TRANSACTION = "IN_SAVEPOINT_TRANSACTION"
59def _checkExistingTableDefinition(name: str, spec: ddl.TableSpec, inspection: List[Dict[str, Any]]) -> None:
60 """Test that the definition of a table in a `ddl.TableSpec` and from
61 database introspection are consistent.
63 Parameters
64 ----------
65 name : `str`
66 Name of the table (only used in error messages).
67 spec : `ddl.TableSpec`
68 Specification of the table.
69 inspection : `dict`
70 Dictionary returned by
71 `sqlalchemy.engine.reflection.Inspector.get_columns`.
73 Raises
74 ------
75 DatabaseConflictError
76 Raised if the definitions are inconsistent.
77 """
78 columnNames = [c["name"] for c in inspection]
79 if spec.fields.names != set(columnNames):
80 raise DatabaseConflictError(f"Table '{name}' exists but is defined differently in the database; "
81 f"specification has columns {list(spec.fields.names)}, while the "
82 f"table in the database has {columnNames}.")
85class ReadOnlyDatabaseError(RuntimeError):
86 """Exception raised when a write operation is called on a read-only
87 `Database`.
88 """
91class DatabaseConflictError(ConflictingDefinitionError):
92 """Exception raised when database content (row values or schema entities)
93 are inconsistent with what this client expects.
94 """
97class SchemaAlreadyDefinedError(RuntimeError):
98 """Exception raised when trying to initialize database schema when some
99 tables already exist.
100 """
103class StaticTablesContext:
104 """Helper class used to declare the static schema for a registry layer
105 in a database.
107 An instance of this class is returned by `Database.declareStaticTables`,
108 which should be the only way it should be constructed.
109 """
111 def __init__(self, db: Database):
112 self._db = db
113 self._foreignKeys: List[Tuple[sqlalchemy.schema.Table, sqlalchemy.schema.ForeignKeyConstraint]] = []
114 self._inspector = sqlalchemy.engine.reflection.Inspector(self._db._connection)
115 self._tableNames = frozenset(self._inspector.get_table_names(schema=self._db.namespace))
116 self._initializers: List[Callable[[Database], None]] = []
118 def addTable(self, name: str, spec: ddl.TableSpec) -> sqlalchemy.schema.Table:
119 """Add a new table to the schema, returning its sqlalchemy
120 representation.
122 The new table may not actually be created until the end of the
123 context created by `Database.declareStaticTables`, allowing tables
124 to be declared in any order even in the presence of foreign key
125 relationships.
126 """
127 name = self._db._mangleTableName(name)
128 if name in self._tableNames:
129 _checkExistingTableDefinition(name, spec, self._inspector.get_columns(name,
130 schema=self._db.namespace))
131 table = self._db._convertTableSpec(name, spec, self._db._metadata)
132 for foreignKeySpec in spec.foreignKeys:
133 self._foreignKeys.append(
134 (table, self._db._convertForeignKeySpec(name, foreignKeySpec, self._db._metadata))
135 )
136 return table
138 def addTableTuple(self, specs: Tuple[ddl.TableSpec, ...]) -> Tuple[sqlalchemy.schema.Table, ...]:
139 """Add a named tuple of tables to the schema, returning their
140 SQLAlchemy representations in a named tuple of the same type.
142 The new tables may not actually be created until the end of the
143 context created by `Database.declareStaticTables`, allowing tables
144 to be declared in any order even in the presence of foreign key
145 relationships.
147 Notes
148 -----
149 ``specs`` *must* be an instance of a type created by
150 `collections.namedtuple`, not just regular tuple, and the returned
151 object is guaranteed to be the same. Because `~collections.namedtuple`
152 is just a factory for `type` objects, not an actual type itself,
153 we cannot represent this with type annotations.
154 """
155 return specs._make(self.addTable(name, spec) # type: ignore
156 for name, spec in zip(specs._fields, specs)) # type: ignore
158 def addInitializer(self, initializer: Callable[[Database], None]) -> None:
159 """Add a method that does one-time initialization of a database.
161 Initialization can mean anything that changes state of a database
162 and needs to be done exactly once after database schema was created.
163 An example for that could be population of schema attributes.
165 Parameters
166 ----------
167 initializer : callable
168 Method of a single argument which is a `Database` instance.
169 """
170 self._initializers.append(initializer)
173class Database(ABC):
174 """An abstract interface that represents a particular database engine's
175 representation of a single schema/namespace/database.
177 Parameters
178 ----------
179 origin : `int`
180 An integer ID that should be used as the default for any datasets,
181 quanta, or other entities that use a (autoincrement, origin) compound
182 primary key.
183 connection : `sqlalchemy.engine.Connection`
184 The SQLAlchemy connection this `Database` wraps.
185 namespace : `str`, optional
186 Name of the schema or namespace this instance is associated with.
187 This is passed as the ``schema`` argument when constructing a
188 `sqlalchemy.schema.MetaData` instance. We use ``namespace`` instead to
189 avoid confusion between "schema means namespace" and "schema means
190 table definitions".
192 Notes
193 -----
194 `Database` requires all write operations to go through its special named
195 methods. Our write patterns are sufficiently simple that we don't really
196 need the full flexibility of SQL insert/update/delete syntax, and we need
197 non-standard (but common) functionality in these operations sufficiently
198 often that it seems worthwhile to provide our own generic API.
200 In contrast, `Database.query` allows arbitrary ``SELECT`` queries (via
201 their SQLAlchemy representation) to be run, as we expect these to require
202 significantly more sophistication while still being limited to standard
203 SQL.
205 `Database` itself has several underscore-prefixed attributes:
207 - ``_connection``: SQLAlchemy object representing the connection.
208 - ``_metadata``: the `sqlalchemy.schema.MetaData` object representing
209 the tables and other schema entities.
211 These are considered protected (derived classes may access them, but other
212 code should not), and read-only, aside from executing SQL via
213 ``_connection``.
214 """
216 def __init__(self, *, origin: int, connection: sqlalchemy.engine.Connection,
217 namespace: Optional[str] = None):
218 self.origin = origin
219 self.namespace = namespace
220 self._connection = connection
221 self._metadata: Optional[sqlalchemy.schema.MetaData] = None
222 self._tempTables: Set[str] = set()
224 def __repr__(self) -> str:
225 # Rather than try to reproduce all the parameters used to create
226 # the object, instead report the more useful information of the
227 # connection URL.
228 uri = str(self._connection.engine.url)
229 if self.namespace:
230 uri += f"#{self.namespace}"
231 return f'{type(self).__name__}("{uri}")'
233 @classmethod
234 def makeDefaultUri(cls, root: str) -> Optional[str]:
235 """Create a default connection URI appropriate for the given root
236 directory, or `None` if there can be no such default.
237 """
238 return None
240 @classmethod
241 def fromUri(cls, uri: str, *, origin: int, namespace: Optional[str] = None,
242 writeable: bool = True) -> Database:
243 """Construct a database from a SQLAlchemy URI.
245 Parameters
246 ----------
247 uri : `str`
248 A SQLAlchemy URI connection string.
249 origin : `int`
250 An integer ID that should be used as the default for any datasets,
251 quanta, or other entities that use a (autoincrement, origin)
252 compound primary key.
253 namespace : `str`, optional
254 A database namespace (i.e. schema) the new instance should be
255 associated with. If `None` (default), the namespace (if any) is
256 inferred from the URI.
257 writeable : `bool`, optional
258 If `True`, allow write operations on the database, including
259 ``CREATE TABLE``.
261 Returns
262 -------
263 db : `Database`
264 A new `Database` instance.
265 """
266 return cls.fromConnection(cls.connect(uri, writeable=writeable),
267 origin=origin,
268 namespace=namespace,
269 writeable=writeable)
271 @classmethod
272 @abstractmethod
273 def connect(cls, uri: str, *, writeable: bool = True) -> sqlalchemy.engine.Connection:
274 """Create a `sqlalchemy.engine.Connection` from a SQLAlchemy URI.
276 Parameters
277 ----------
278 uri : `str`
279 A SQLAlchemy URI connection string.
280 origin : `int`
281 An integer ID that should be used as the default for any datasets,
282 quanta, or other entities that use a (autoincrement, origin)
283 compound primary key.
284 writeable : `bool`, optional
285 If `True`, allow write operations on the database, including
286 ``CREATE TABLE``.
288 Returns
289 -------
290 connection : `sqlalchemy.engine.Connection`
291 A database connection.
293 Notes
294 -----
295 Subclasses that support other ways to connect to a database are
296 encouraged to add optional arguments to their implementation of this
297 method, as long as they maintain compatibility with the base class
298 call signature.
299 """
300 raise NotImplementedError()
302 @classmethod
303 @abstractmethod
304 def fromConnection(cls, connection: sqlalchemy.engine.Connection, *, origin: int,
305 namespace: Optional[str] = None, writeable: bool = True) -> Database:
306 """Create a new `Database` from an existing
307 `sqlalchemy.engine.Connection`.
309 Parameters
310 ----------
311 connection : `sqllachemy.engine.Connection`
312 The connection for the the database. May be shared between
313 `Database` instances.
314 origin : `int`
315 An integer ID that should be used as the default for any datasets,
316 quanta, or other entities that use a (autoincrement, origin)
317 compound primary key.
318 namespace : `str`, optional
319 A different database namespace (i.e. schema) the new instance
320 should be associated with. If `None` (default), the namespace
321 (if any) is inferred from the connection.
322 writeable : `bool`, optional
323 If `True`, allow write operations on the database, including
324 ``CREATE TABLE``.
326 Returns
327 -------
328 db : `Database`
329 A new `Database` instance.
331 Notes
332 -----
333 This method allows different `Database` instances to share the same
334 connection, which is desirable when they represent different namespaces
335 can be queried together. This also ties their transaction state,
336 however; starting a transaction in any database automatically starts
337 on in all other databases.
338 """
339 raise NotImplementedError()
341 @contextmanager
342 def transaction(self, *, interrupting: bool = False, savepoint: bool = False,
343 lock: Iterable[sqlalchemy.schema.Table] = ()) -> Iterator:
344 """Return a context manager that represents a transaction.
346 Parameters
347 ----------
348 interrupting : `bool`, optional
349 If `True` (`False` is default), this transaction block may not be
350 nested without an outer one, and attempting to do so is a logic
351 (i.e. assertion) error.
352 savepoint : `bool`, optional
353 If `True` (`False` is default), create a `SAVEPOINT`, allowing
354 exceptions raised by the database (e.g. due to constraint
355 violations) during this transaction's context to be caught outside
356 it without also rolling back all operations in an outer transaction
357 block. If `False`, transactions may still be nested, but a
358 rollback may be generated at any level and affects all levels, and
359 commits are deferred until the outermost block completes. If any
360 outer transaction block was created with ``savepoint=True``, all
361 inner blocks will be as well (regardless of the actual value
362 passed). This has no effect if this is the outermost transaction.
363 lock : `Iterable` [ `sqlalchemy.schema.Table` ], optional
364 A list of tables to lock for the duration of this transaction.
365 These locks are guaranteed to prevent concurrent writes, but only
366 prevent concurrent reads if the database engine requires that in
367 order to block concurrent writes.
369 Notes
370 -----
371 All transactions on a connection managed by one or more `Database`
372 instances _must_ go through this method, or transaction state will not
373 be correctly managed.
374 """
375 assert not (interrupting and self._connection.in_transaction()), (
376 "Logic error in transaction nesting: an operation that would "
377 "interrupt the active transaction context has been requested."
378 )
379 # We remember whether we are already in a SAVEPOINT transaction via the
380 # connection object's 'info' dict, which is explicitly for user
381 # information like this. This is safer than a regular `Database`
382 # instance attribute, because it guards against multiple `Database`
383 # instances sharing the same connection. The need to use our own flag
384 # here to track whether we're in a nested transaction should go away in
385 # SQLAlchemy 1.4, which seems to have a
386 # `Connection.in_nested_transaction()` method.
387 savepoint = savepoint or self._connection.info.get(_IN_SAVEPOINT_TRANSACTION, False)
388 self._connection.info[_IN_SAVEPOINT_TRANSACTION] = savepoint
389 if self._connection.in_transaction() and savepoint:
390 trans = self._connection.begin_nested()
391 else:
392 # Use a regular (non-savepoint) transaction always for the
393 # outermost context, as well as when a savepoint was not requested.
394 trans = self._connection.begin()
395 self._lockTables(lock)
396 try:
397 yield
398 trans.commit()
399 except BaseException:
400 trans.rollback()
401 raise
402 finally:
403 if not self._connection.in_transaction():
404 self._connection.info.pop(_IN_SAVEPOINT_TRANSACTION, None)
406 @abstractmethod
407 def _lockTables(self, tables: Iterable[sqlalchemy.schema.Table] = ()) -> None:
408 """Acquire locks on the given tables.
410 This is an implementation hook for subclasses, called by `transaction`.
411 It should not be called directly by other code.
413 Parameters
414 ----------
415 tables : `Iterable` [ `sqlalchemy.schema.Table` ], optional
416 A list of tables to lock for the duration of this transaction.
417 These locks are guaranteed to prevent concurrent writes, but only
418 prevent concurrent reads if the database engine requires that in
419 order to block concurrent writes.
420 """
421 raise NotImplementedError()
423 @contextmanager
424 def declareStaticTables(self, *, create: bool) -> Iterator[StaticTablesContext]:
425 """Return a context manager in which the database's static DDL schema
426 can be declared.
428 Parameters
429 ----------
430 create : `bool`
431 If `True`, attempt to create all tables at the end of the context.
432 If `False`, they will be assumed to already exist.
434 Returns
435 -------
436 schema : `StaticTablesContext`
437 A helper object that is used to add new tables.
439 Raises
440 ------
441 ReadOnlyDatabaseError
442 Raised if ``create`` is `True`, `Database.isWriteable` is `False`,
443 and one or more declared tables do not already exist.
445 Examples
446 --------
447 Given a `Database` instance ``db``::
449 with db.declareStaticTables(create=True) as schema:
450 schema.addTable("table1", TableSpec(...))
451 schema.addTable("table2", TableSpec(...))
453 Notes
454 -----
455 A database's static DDL schema must be declared before any dynamic
456 tables are managed via calls to `ensureTableExists` or
457 `getExistingTable`. The order in which static schema tables are added
458 inside the context block is unimportant; they will automatically be
459 sorted and added in an order consistent with their foreign key
460 relationships.
461 """
462 if create and not self.isWriteable():
463 raise ReadOnlyDatabaseError(f"Cannot create tables in read-only database {self}.")
464 self._metadata = sqlalchemy.MetaData(schema=self.namespace)
465 try:
466 context = StaticTablesContext(self)
467 if create and context._tableNames:
468 # Looks like database is already initalized, to avoid danger
469 # of modifying/destroying valid schema we refuse to do
470 # anything in this case
471 raise SchemaAlreadyDefinedError(f"Cannot create tables in non-empty database {self}.")
472 yield context
473 for table, foreignKey in context._foreignKeys:
474 table.append_constraint(foreignKey)
475 if create:
476 if self.namespace is not None:
477 if self.namespace not in context._inspector.get_schema_names():
478 self._connection.execute(sqlalchemy.schema.CreateSchema(self.namespace))
479 # In our tables we have columns that make use of sqlalchemy
480 # Sequence objects. There is currently a bug in sqlalchemy that
481 # causes a deprecation warning to be thrown on a property of
482 # the Sequence object when the repr for the sequence is
483 # created. Here a filter is used to catch these deprecation
484 # warnings when tables are created.
485 with warnings.catch_warnings():
486 warnings.simplefilter("ignore", category=sqlalchemy.exc.SADeprecationWarning)
487 self._metadata.create_all(self._connection)
488 # call all initializer methods sequentially
489 for init in context._initializers:
490 init(self)
491 except BaseException:
492 self._metadata = None
493 raise
495 @abstractmethod
496 def isWriteable(self) -> bool:
497 """Return `True` if this database can be modified by this client.
498 """
499 raise NotImplementedError()
501 @abstractmethod
502 def __str__(self) -> str:
503 """Return a human-readable identifier for this `Database`, including
504 any namespace or schema that identifies its names within a `Registry`.
505 """
506 raise NotImplementedError()
508 @property
509 def dialect(self) -> sqlalchemy.engine.Dialect:
510 """The SQLAlchemy dialect for this database engine
511 (`sqlalchemy.engine.Dialect`).
512 """
513 return self._connection.dialect
515 def shrinkDatabaseEntityName(self, original: str) -> str:
516 """Return a version of the given name that fits within this database
517 engine's length limits for table, constraint, indexes, and sequence
518 names.
520 Implementations should not assume that simple truncation is safe,
521 because multiple long names often begin with the same prefix.
523 The default implementation simply returns the given name.
525 Parameters
526 ----------
527 original : `str`
528 The original name.
530 Returns
531 -------
532 shrunk : `str`
533 The new, possibly shortened name.
534 """
535 return original
537 def expandDatabaseEntityName(self, shrunk: str) -> str:
538 """Retrieve the original name for a database entity that was too long
539 to fit within the database engine's limits.
541 Parameters
542 ----------
543 original : `str`
544 The original name.
546 Returns
547 -------
548 shrunk : `str`
549 The new, possibly shortened name.
550 """
551 return shrunk
553 def _mangleTableName(self, name: str) -> str:
554 """Map a logical, user-visible table name to the true table name used
555 in the database.
557 The default implementation returns the given name unchanged.
559 Parameters
560 ----------
561 name : `str`
562 Input table name. Should not include a namespace (i.e. schema)
563 prefix.
565 Returns
566 -------
567 mangled : `str`
568 Mangled version of the table name (still with no namespace prefix).
570 Notes
571 -----
572 Reimplementations of this method must be idempotent - mangling an
573 already-mangled name must have no effect.
574 """
575 return name
577 def _makeColumnConstraints(self, table: str, spec: ddl.FieldSpec) -> List[sqlalchemy.CheckConstraint]:
578 """Create constraints based on this spec.
580 Parameters
581 ----------
582 table : `str`
583 Name of the table this column is being added to.
584 spec : `FieldSpec`
585 Specification for the field to be added.
587 Returns
588 -------
589 constraint : `list` of `sqlalchemy.CheckConstraint`
590 Constraint added for this column.
591 """
592 # By default we return no additional constraints
593 return []
595 def _convertFieldSpec(self, table: str, spec: ddl.FieldSpec, metadata: sqlalchemy.MetaData,
596 **kwds: Any) -> sqlalchemy.schema.Column:
597 """Convert a `FieldSpec` to a `sqlalchemy.schema.Column`.
599 Parameters
600 ----------
601 table : `str`
602 Name of the table this column is being added to.
603 spec : `FieldSpec`
604 Specification for the field to be added.
605 metadata : `sqlalchemy.MetaData`
606 SQLAlchemy representation of the DDL schema this field's table is
607 being added to.
608 **kwds
609 Additional keyword arguments to forward to the
610 `sqlalchemy.schema.Column` constructor. This is provided to make
611 it easier for derived classes to delegate to ``super()`` while
612 making only minor changes.
614 Returns
615 -------
616 column : `sqlalchemy.schema.Column`
617 SQLAlchemy representation of the field.
618 """
619 args = [spec.name, spec.getSizedColumnType()]
620 if spec.autoincrement:
621 # Generate a sequence to use for auto incrementing for databases
622 # that do not support it natively. This will be ignored by
623 # sqlalchemy for databases that do support it.
624 args.append(sqlalchemy.Sequence(self.shrinkDatabaseEntityName(f"{table}_seq_{spec.name}"),
625 metadata=metadata))
626 assert spec.doc is None or isinstance(spec.doc, str), f"Bad doc for {table}.{spec.name}."
627 return sqlalchemy.schema.Column(*args, nullable=spec.nullable, primary_key=spec.primaryKey,
628 comment=spec.doc, server_default=spec.default, **kwds)
630 def _convertForeignKeySpec(self, table: str, spec: ddl.ForeignKeySpec, metadata: sqlalchemy.MetaData,
631 **kwds: Any) -> sqlalchemy.schema.ForeignKeyConstraint:
632 """Convert a `ForeignKeySpec` to a
633 `sqlalchemy.schema.ForeignKeyConstraint`.
635 Parameters
636 ----------
637 table : `str`
638 Name of the table this foreign key is being added to.
639 spec : `ForeignKeySpec`
640 Specification for the foreign key to be added.
641 metadata : `sqlalchemy.MetaData`
642 SQLAlchemy representation of the DDL schema this constraint is
643 being added to.
644 **kwds
645 Additional keyword arguments to forward to the
646 `sqlalchemy.schema.ForeignKeyConstraint` constructor. This is
647 provided to make it easier for derived classes to delegate to
648 ``super()`` while making only minor changes.
650 Returns
651 -------
652 constraint : `sqlalchemy.schema.ForeignKeyConstraint`
653 SQLAlchemy representation of the constraint.
654 """
655 name = self.shrinkDatabaseEntityName(
656 "_".join(["fkey", table, self._mangleTableName(spec.table)]
657 + list(spec.target) + list(spec.source))
658 )
659 return sqlalchemy.schema.ForeignKeyConstraint(
660 spec.source,
661 [f"{self._mangleTableName(spec.table)}.{col}" for col in spec.target],
662 name=name,
663 ondelete=spec.onDelete
664 )
666 def _convertExclusionConstraintSpec(self, table: str,
667 spec: Tuple[Union[str, Type[DatabaseTimespanRepresentation]], ...],
668 metadata: sqlalchemy.MetaData) -> sqlalchemy.schema.Constraint:
669 """Convert a `tuple` from `ddl.TableSpec.exclusion` into a SQLAlchemy
670 constraint representation.
672 Parameters
673 ----------
674 table : `str`
675 Name of the table this constraint is being added to.
676 spec : `tuple` [ `str` or `type` ]
677 A tuple of `str` column names and the `type` object returned by
678 `getTimespanRepresentation` (which must appear exactly once),
679 indicating the order of the columns in the index used to back the
680 constraint.
681 metadata : `sqlalchemy.MetaData`
682 SQLAlchemy representation of the DDL schema this constraint is
683 being added to.
685 Returns
686 -------
687 constraint : `sqlalchemy.schema.Constraint`
688 SQLAlchemy representation of the constraint.
690 Raises
691 ------
692 NotImplementedError
693 Raised if this database does not support exclusion constraints.
694 """
695 raise NotImplementedError(f"Database {self} does not support exclusion constraints.")
697 def _convertTableSpec(self, name: str, spec: ddl.TableSpec, metadata: sqlalchemy.MetaData,
698 **kwds: Any) -> sqlalchemy.schema.Table:
699 """Convert a `TableSpec` to a `sqlalchemy.schema.Table`.
701 Parameters
702 ----------
703 spec : `TableSpec`
704 Specification for the foreign key to be added.
705 metadata : `sqlalchemy.MetaData`
706 SQLAlchemy representation of the DDL schema this table is being
707 added to.
708 **kwds
709 Additional keyword arguments to forward to the
710 `sqlalchemy.schema.Table` constructor. This is provided to make it
711 easier for derived classes to delegate to ``super()`` while making
712 only minor changes.
714 Returns
715 -------
716 table : `sqlalchemy.schema.Table`
717 SQLAlchemy representation of the table.
719 Notes
720 -----
721 This method does not handle ``spec.foreignKeys`` at all, in order to
722 avoid circular dependencies. These are added by higher-level logic in
723 `ensureTableExists`, `getExistingTable`, and `declareStaticTables`.
724 """
725 name = self._mangleTableName(name)
726 args = [self._convertFieldSpec(name, fieldSpec, metadata) for fieldSpec in spec.fields]
728 # Add any column constraints
729 for fieldSpec in spec.fields:
730 args.extend(self._makeColumnConstraints(name, fieldSpec))
732 # Track indexes added for primary key and unique constraints, to make
733 # sure we don't add duplicate explicit or foreign key indexes for
734 # those.
735 allIndexes = {tuple(fieldSpec.name for fieldSpec in spec.fields if fieldSpec.primaryKey)}
736 args.extend(
737 sqlalchemy.schema.UniqueConstraint(
738 *columns,
739 name=self.shrinkDatabaseEntityName("_".join([name, "unq"] + list(columns)))
740 )
741 for columns in spec.unique
742 )
743 allIndexes.update(spec.unique)
744 args.extend(
745 sqlalchemy.schema.Index(
746 self.shrinkDatabaseEntityName("_".join([name, "idx"] + list(columns))),
747 *columns,
748 unique=(columns in spec.unique)
749 )
750 for columns in spec.indexes if columns not in allIndexes
751 )
752 allIndexes.update(spec.indexes)
753 args.extend(
754 sqlalchemy.schema.Index(
755 self.shrinkDatabaseEntityName("_".join((name, "fkidx") + fk.source)),
756 *fk.source,
757 )
758 for fk in spec.foreignKeys if fk.addIndex and fk.source not in allIndexes
759 )
761 args.extend(self._convertExclusionConstraintSpec(name, excl, metadata) for excl in spec.exclusion)
763 assert spec.doc is None or isinstance(spec.doc, str), f"Bad doc for {name}."
764 return sqlalchemy.schema.Table(name, metadata, *args, comment=spec.doc, info=spec, **kwds)
766 def ensureTableExists(self, name: str, spec: ddl.TableSpec) -> sqlalchemy.schema.Table:
767 """Ensure that a table with the given name and specification exists,
768 creating it if necessary.
770 Parameters
771 ----------
772 name : `str`
773 Name of the table (not including namespace qualifiers).
774 spec : `TableSpec`
775 Specification for the table. This will be used when creating the
776 table, and *may* be used when obtaining an existing table to check
777 for consistency, but no such check is guaranteed.
779 Returns
780 -------
781 table : `sqlalchemy.schema.Table`
782 SQLAlchemy representation of the table.
784 Raises
785 ------
786 ReadOnlyDatabaseError
787 Raised if `isWriteable` returns `False`, and the table does not
788 already exist.
789 DatabaseConflictError
790 Raised if the table exists but ``spec`` is inconsistent with its
791 definition.
793 Notes
794 -----
795 This method may not be called within transactions. It may be called on
796 read-only databases if and only if the table does in fact already
797 exist.
799 Subclasses may override this method, but usually should not need to.
800 """
801 assert not self._connection.in_transaction(), "Table creation interrupts transactions."
802 assert self._metadata is not None, "Static tables must be declared before dynamic tables."
803 table = self.getExistingTable(name, spec)
804 if table is not None:
805 return table
806 if not self.isWriteable():
807 raise ReadOnlyDatabaseError(
808 f"Table {name} does not exist, and cannot be created "
809 f"because database {self} is read-only."
810 )
811 table = self._convertTableSpec(name, spec, self._metadata)
812 for foreignKeySpec in spec.foreignKeys:
813 table.append_constraint(self._convertForeignKeySpec(name, foreignKeySpec, self._metadata))
814 table.create(self._connection)
815 return table
817 def getExistingTable(self, name: str, spec: ddl.TableSpec) -> Optional[sqlalchemy.schema.Table]:
818 """Obtain an existing table with the given name and specification.
820 Parameters
821 ----------
822 name : `str`
823 Name of the table (not including namespace qualifiers).
824 spec : `TableSpec`
825 Specification for the table. This will be used when creating the
826 SQLAlchemy representation of the table, and it is used to
827 check that the actual table in the database is consistent.
829 Returns
830 -------
831 table : `sqlalchemy.schema.Table` or `None`
832 SQLAlchemy representation of the table, or `None` if it does not
833 exist.
835 Raises
836 ------
837 DatabaseConflictError
838 Raised if the table exists but ``spec`` is inconsistent with its
839 definition.
841 Notes
842 -----
843 This method can be called within transactions and never modifies the
844 database.
846 Subclasses may override this method, but usually should not need to.
847 """
848 assert self._metadata is not None, "Static tables must be declared before dynamic tables."
849 name = self._mangleTableName(name)
850 table = self._metadata.tables.get(name if self.namespace is None else f"{self.namespace}.{name}")
851 if table is not None:
852 if spec.fields.names != set(table.columns.keys()):
853 raise DatabaseConflictError(f"Table '{name}' has already been defined differently; the new "
854 f"specification has columns {list(spec.fields.names)}, while "
855 f"the previous definition has {list(table.columns.keys())}.")
856 else:
857 inspector = sqlalchemy.engine.reflection.Inspector(self._connection)
858 if name in inspector.get_table_names(schema=self.namespace):
859 _checkExistingTableDefinition(name, spec, inspector.get_columns(name, schema=self.namespace))
860 table = self._convertTableSpec(name, spec, self._metadata)
861 for foreignKeySpec in spec.foreignKeys:
862 table.append_constraint(self._convertForeignKeySpec(name, foreignKeySpec, self._metadata))
863 return table
864 return table
866 def makeTemporaryTable(self, spec: ddl.TableSpec, name: Optional[str] = None) -> sqlalchemy.schema.Table:
867 """Create a temporary table.
869 Parameters
870 ----------
871 spec : `TableSpec`
872 Specification for the table.
873 name : `str`, optional
874 A unique (within this session/connetion) name for the table.
875 Subclasses may override to modify the actual name used. If not
876 provided, a unique name will be generated.
878 Returns
879 -------
880 table : `sqlalchemy.schema.Table`
881 SQLAlchemy representation of the table.
883 Notes
884 -----
885 Temporary tables may be created, dropped, and written to even in
886 read-only databases - at least according to the Python-level
887 protections in the `Database` classes. Server permissions may say
888 otherwise, but in that case they probably need to be modified to
889 support the full range of expected read-only butler behavior.
891 Temporary table rows are guaranteed to be dropped when a connection is
892 closed. `Database` implementations are permitted to allow the table to
893 remain as long as this is transparent to the user (i.e. "creating" the
894 temporary table in a new session should not be an error, even if it
895 does nothing).
897 It may not be possible to use temporary tables within transactions with
898 some database engines (or configurations thereof).
899 """
900 if name is None:
901 name = f"tmp_{uuid.uuid4().hex}"
902 table = self._convertTableSpec(name, spec, self._metadata, prefixes=['TEMPORARY'],
903 schema=sqlalchemy.schema.BLANK_SCHEMA)
904 if table.key in self._tempTables:
905 if table.key != name:
906 raise ValueError(f"A temporary table with name {name} (transformed to {table.key} by "
907 f"Database) already exists.")
908 for foreignKeySpec in spec.foreignKeys:
909 table.append_constraint(self._convertForeignKeySpec(name, foreignKeySpec, self._metadata))
910 table.create(self._connection)
911 self._tempTables.add(table.key)
912 return table
914 def dropTemporaryTable(self, table: sqlalchemy.schema.Table) -> None:
915 """Drop a temporary table.
917 Parameters
918 ----------
919 table : `sqlalchemy.schema.Table`
920 A SQLAlchemy object returned by a previous call to
921 `makeTemporaryTable`.
922 """
923 if table.key in self._tempTables:
924 table.drop(self._connection)
925 self._tempTables.remove(table.key)
926 else:
927 raise TypeError(f"Table {table.key} was not created by makeTemporaryTable.")
929 @classmethod
930 def getTimespanRepresentation(cls) -> Type[DatabaseTimespanRepresentation]:
931 """Return a `type` that encapsulates the way `Timespan` objects are
932 recommended to be stored in this database.
934 `Database` does not automatically use the return type of this method
935 anywhere else; calling code is responsible for making sure that DDL
936 and queries are consistent with it.
938 Returns
939 -------
940 tsRepr : `type` (`DatabaseTimespanRepresention` subclass)
941 A type that encapsultes the way `Timespan` objects should be
942 stored in this database.
944 Notes
945 -----
946 There are two big reasons we've decided to keep timespan-mangling logic
947 outside the `Database` implementations, even though the choice of
948 representation is ultimately up to a `Database` implementation:
950 - Timespans appear in relatively few tables and queries in our
951 typical usage, and the code that operates on them is already aware
952 that it is working with timespans. In contrast, a
953 timespan-representation-aware implementation of, say, `insert`,
954 would need to have extra logic to identify when timespan-mangling
955 needed to occur, which would usually be useless overhead.
957 - SQLAlchemy's rich SELECT query expression system has no way to wrap
958 multiple columns in a single expression object (the ORM does, but
959 we are not using the ORM). So we would have to wrap _much_ more of
960 that code in our own interfaces to encapsulate timespan
961 representations there.
962 """
963 return DatabaseTimespanRepresentation.Compound
965 def sync(self, table: sqlalchemy.schema.Table, *,
966 keys: Dict[str, Any],
967 compared: Optional[Dict[str, Any]] = None,
968 extra: Optional[Dict[str, Any]] = None,
969 returning: Optional[Sequence[str]] = None,
970 ) -> Tuple[Optional[Dict[str, Any]], bool]:
971 """Insert into a table as necessary to ensure database contains
972 values equivalent to the given ones.
974 Parameters
975 ----------
976 table : `sqlalchemy.schema.Table`
977 Table to be queried and possibly inserted into.
978 keys : `dict`
979 Column name-value pairs used to search for an existing row; must
980 be a combination that can be used to select a single row if one
981 exists. If such a row does not exist, these values are used in
982 the insert.
983 compared : `dict`, optional
984 Column name-value pairs that are compared to those in any existing
985 row. If such a row does not exist, these rows are used in the
986 insert.
987 extra : `dict`, optional
988 Column name-value pairs that are ignored if a matching row exists,
989 but used in an insert if one is necessary.
990 returning : `~collections.abc.Sequence` of `str`, optional
991 The names of columns whose values should be returned.
993 Returns
994 -------
995 row : `dict`, optional
996 The value of the fields indicated by ``returning``, or `None` if
997 ``returning`` is `None`.
998 inserted : `bool`
999 If `True`, a new row was inserted.
1001 Raises
1002 ------
1003 DatabaseConflictError
1004 Raised if the values in ``compared`` do not match the values in the
1005 database.
1006 ReadOnlyDatabaseError
1007 Raised if `isWriteable` returns `False`, and no matching record
1008 already exists.
1010 Notes
1011 -----
1012 This method may not be called within transactions. It may be called on
1013 read-only databases if and only if the matching row does in fact
1014 already exist.
1015 """
1017 def check() -> Tuple[int, Optional[List[str]], Optional[List]]:
1018 """Query for a row that matches the ``key`` argument, and compare
1019 to what was given by the caller.
1021 Returns
1022 -------
1023 n : `int`
1024 Number of matching rows. ``n != 1`` is always an error, but
1025 it's a different kind of error depending on where `check` is
1026 being called.
1027 bad : `list` of `str`, or `None`
1028 The subset of the keys of ``compared`` for which the existing
1029 values did not match the given one. Once again, ``not bad``
1030 is always an error, but a different kind on context. `None`
1031 if ``n != 1``
1032 result : `list` or `None`
1033 Results in the database that correspond to the columns given
1034 in ``returning``, or `None` if ``returning is None``.
1035 """
1036 toSelect: Set[str] = set()
1037 if compared is not None:
1038 toSelect.update(compared.keys())
1039 if returning is not None:
1040 toSelect.update(returning)
1041 if not toSelect:
1042 # Need to select some column, even if we just want to see
1043 # how many rows we get back.
1044 toSelect.add(next(iter(keys.keys())))
1045 selectSql = sqlalchemy.sql.select(
1046 [table.columns[k].label(k) for k in toSelect]
1047 ).select_from(table).where(
1048 sqlalchemy.sql.and_(*[table.columns[k] == v for k, v in keys.items()])
1049 )
1050 fetched = list(self._connection.execute(selectSql).fetchall())
1051 if len(fetched) != 1:
1052 return len(fetched), None, None
1053 existing = fetched[0]
1054 if compared is not None:
1056 def safeNotEqual(a: Any, b: Any) -> bool:
1057 if isinstance(a, astropy.time.Time):
1058 return not time_utils.times_equal(a, b)
1059 return a != b
1061 inconsistencies = [f"{k}: {existing[k]!r} != {v!r}"
1062 for k, v in compared.items()
1063 if safeNotEqual(existing[k], v)]
1064 else:
1065 inconsistencies = []
1066 if returning is not None:
1067 toReturn: Optional[list] = [existing[k] for k in returning]
1068 else:
1069 toReturn = None
1070 return 1, inconsistencies, toReturn
1072 if self.isWriteable() or table.key in self._tempTables:
1073 # Database is writeable. Try an insert first, but allow it to fail
1074 # (in only specific ways).
1075 row = keys.copy()
1076 if compared is not None:
1077 row.update(compared)
1078 if extra is not None:
1079 row.update(extra)
1080 insertSql = table.insert().values(row)
1081 try:
1082 with self.transaction(interrupting=True):
1083 self._connection.execute(insertSql)
1084 # Need to perform check() for this branch inside the
1085 # transaction, so we roll back an insert that didn't do
1086 # what we expected. That limits the extent to which we
1087 # can reduce duplication between this block and the other
1088 # ones that perform similar logic.
1089 n, bad, result = check()
1090 if n < 1:
1091 raise RuntimeError("Insertion in sync did not seem to affect table. This is a bug.")
1092 elif n > 1:
1093 raise RuntimeError(f"Keys passed to sync {keys.keys()} do not comprise a "
1094 f"unique constraint for table {table.name}.")
1095 elif bad:
1096 raise RuntimeError(
1097 f"Conflict ({bad}) in sync after successful insert; this is "
1098 f"possible if the same table is being updated by a concurrent "
1099 f"process that isn't using sync, but it may also be a bug in "
1100 f"daf_butler."
1101 )
1102 # No exceptions, so it looks like we inserted the requested row
1103 # successfully.
1104 inserted = True
1105 except sqlalchemy.exc.IntegrityError as err:
1106 # Most likely cause is that an equivalent row already exists,
1107 # but it could also be some other constraint. Query for the
1108 # row we think we matched to resolve that question.
1109 n, bad, result = check()
1110 if n < 1:
1111 # There was no matched row; insertion failed for some
1112 # completely different reason. Just re-raise the original
1113 # IntegrityError.
1114 raise
1115 elif n > 2:
1116 # There were multiple matched rows, which means we
1117 # conflicted *and* the arguments were bad to begin with.
1118 raise RuntimeError(f"Keys passed to sync {keys.keys()} do not comprise a "
1119 f"unique constraint for table {table.name}.") from err
1120 elif bad:
1121 # No logic bug, but data conflicted on the keys given.
1122 raise DatabaseConflictError(f"Conflict in sync for table "
1123 f"{table.name} on column(s) {bad}.") from err
1124 # The desired row is already present and consistent with what
1125 # we tried to insert.
1126 inserted = False
1127 else:
1128 assert not self._connection.in_transaction(), (
1129 "Calling sync within a transaction block is an error even "
1130 "on a read-only database."
1131 )
1132 # Database is not writeable; just see if the row exists.
1133 n, bad, result = check()
1134 if n < 1:
1135 raise ReadOnlyDatabaseError("sync needs to insert, but database is read-only.")
1136 elif n > 1:
1137 raise RuntimeError("Keys passed to sync do not comprise a unique constraint.")
1138 elif bad:
1139 raise DatabaseConflictError(f"Conflict in sync on column(s) {bad}.")
1140 inserted = False
1141 if returning is None:
1142 return None, inserted
1143 else:
1144 assert result is not None
1145 return {k: v for k, v in zip(returning, result)}, inserted
1147 def insert(self, table: sqlalchemy.schema.Table, *rows: dict, returnIds: bool = False,
1148 select: Optional[sqlalchemy.sql.Select] = None,
1149 names: Optional[Iterable[str]] = None,
1150 ) -> Optional[List[int]]:
1151 """Insert one or more rows into a table, optionally returning
1152 autoincrement primary key values.
1154 Parameters
1155 ----------
1156 table : `sqlalchemy.schema.Table`
1157 Table rows should be inserted into.
1158 returnIds: `bool`
1159 If `True` (`False` is default), return the values of the table's
1160 autoincrement primary key field (which much exist).
1161 select : `sqlalchemy.sql.Select`, optional
1162 A SELECT query expression to insert rows from. Cannot be provided
1163 with either ``rows`` or ``returnIds=True``.
1164 names : `Iterable` [ `str` ], optional
1165 Names of columns in ``table`` to be populated, ordered to match the
1166 columns returned by ``select``. Ignored if ``select`` is `None`.
1167 If not provided, the columns returned by ``select`` must be named
1168 to match the desired columns of ``table``.
1169 *rows
1170 Positional arguments are the rows to be inserted, as dictionaries
1171 mapping column name to value. The keys in all dictionaries must
1172 be the same.
1174 Returns
1175 -------
1176 ids : `None`, or `list` of `int`
1177 If ``returnIds`` is `True`, a `list` containing the inserted
1178 values for the table's autoincrement primary key.
1180 Raises
1181 ------
1182 ReadOnlyDatabaseError
1183 Raised if `isWriteable` returns `False` when this method is called.
1185 Notes
1186 -----
1187 The default implementation uses bulk insert syntax when ``returnIds``
1188 is `False`, and a loop over single-row insert operations when it is
1189 `True`.
1191 Derived classes should reimplement when they can provide a more
1192 efficient implementation (especially for the latter case).
1194 May be used inside transaction contexts, so implementations may not
1195 perform operations that interrupt transactions.
1196 """
1197 if not (self.isWriteable() or table.key in self._tempTables):
1198 raise ReadOnlyDatabaseError(f"Attempt to insert into read-only database '{self}'.")
1199 if select is not None and (rows or returnIds):
1200 raise TypeError("'select' is incompatible with passing value rows or returnIds=True.")
1201 if not rows and select is None:
1202 if returnIds:
1203 return []
1204 else:
1205 return None
1206 if not returnIds:
1207 if select is not None:
1208 if names is None:
1209 names = select.columns.keys()
1210 self._connection.execute(table.insert().from_select(names, select))
1211 else:
1212 self._connection.execute(table.insert(), *rows)
1213 return None
1214 else:
1215 sql = table.insert()
1216 return [self._connection.execute(sql, row).inserted_primary_key[0] for row in rows]
1218 @abstractmethod
1219 def replace(self, table: sqlalchemy.schema.Table, *rows: dict) -> None:
1220 """Insert one or more rows into a table, replacing any existing rows
1221 for which insertion of a new row would violate the primary key
1222 constraint.
1224 Parameters
1225 ----------
1226 table : `sqlalchemy.schema.Table`
1227 Table rows should be inserted into.
1228 *rows
1229 Positional arguments are the rows to be inserted, as dictionaries
1230 mapping column name to value. The keys in all dictionaries must
1231 be the same.
1233 Raises
1234 ------
1235 ReadOnlyDatabaseError
1236 Raised if `isWriteable` returns `False` when this method is called.
1238 Notes
1239 -----
1240 May be used inside transaction contexts, so implementations may not
1241 perform operations that interrupt transactions.
1243 Implementations should raise a `sqlalchemy.exc.IntegrityError`
1244 exception when a constraint other than the primary key would be
1245 violated.
1247 Implementations are not required to support `replace` on tables
1248 with autoincrement keys.
1249 """
1250 raise NotImplementedError()
1252 def delete(self, table: sqlalchemy.schema.Table, columns: Iterable[str], *rows: dict) -> int:
1253 """Delete one or more rows from a table.
1255 Parameters
1256 ----------
1257 table : `sqlalchemy.schema.Table`
1258 Table that rows should be deleted from.
1259 columns: `~collections.abc.Iterable` of `str`
1260 The names of columns that will be used to constrain the rows to
1261 be deleted; these will be combined via ``AND`` to form the
1262 ``WHERE`` clause of the delete query.
1263 *rows
1264 Positional arguments are the keys of rows to be deleted, as
1265 dictionaries mapping column name to value. The keys in all
1266 dictionaries must exactly the names in ``columns``.
1268 Returns
1269 -------
1270 count : `int`
1271 Number of rows deleted.
1273 Raises
1274 ------
1275 ReadOnlyDatabaseError
1276 Raised if `isWriteable` returns `False` when this method is called.
1278 Notes
1279 -----
1280 May be used inside transaction contexts, so implementations may not
1281 perform operations that interrupt transactions.
1283 The default implementation should be sufficient for most derived
1284 classes.
1285 """
1286 if not (self.isWriteable() or table.key in self._tempTables):
1287 raise ReadOnlyDatabaseError(f"Attempt to delete from read-only database '{self}'.")
1288 if columns and not rows:
1289 # If there are no columns, this operation is supposed to delete
1290 # everything (so we proceed as usual). But if there are columns,
1291 # but no rows, it was a constrained bulk operation where the
1292 # constraint is that no rows match, and we should short-circuit
1293 # while reporting that no rows were affected.
1294 return 0
1295 sql = table.delete()
1296 whereTerms = [table.columns[name] == sqlalchemy.sql.bindparam(name) for name in columns]
1297 if whereTerms:
1298 sql = sql.where(sqlalchemy.sql.and_(*whereTerms))
1299 return self._connection.execute(sql, *rows).rowcount
1301 def update(self, table: sqlalchemy.schema.Table, where: Dict[str, str], *rows: dict) -> int:
1302 """Update one or more rows in a table.
1304 Parameters
1305 ----------
1306 table : `sqlalchemy.schema.Table`
1307 Table containing the rows to be updated.
1308 where : `dict` [`str`, `str`]
1309 A mapping from the names of columns that will be used to search for
1310 existing rows to the keys that will hold these values in the
1311 ``rows`` dictionaries. Note that these may not be the same due to
1312 SQLAlchemy limitations.
1313 *rows
1314 Positional arguments are the rows to be updated. The keys in all
1315 dictionaries must be the same, and may correspond to either a
1316 value in the ``where`` dictionary or the name of a column to be
1317 updated.
1319 Returns
1320 -------
1321 count : `int`
1322 Number of rows matched (regardless of whether the update actually
1323 modified them).
1325 Raises
1326 ------
1327 ReadOnlyDatabaseError
1328 Raised if `isWriteable` returns `False` when this method is called.
1330 Notes
1331 -----
1332 May be used inside transaction contexts, so implementations may not
1333 perform operations that interrupt transactions.
1335 The default implementation should be sufficient for most derived
1336 classes.
1337 """
1338 if not (self.isWriteable() or table.key in self._tempTables):
1339 raise ReadOnlyDatabaseError(f"Attempt to update read-only database '{self}'.")
1340 if not rows:
1341 return 0
1342 sql = table.update().where(
1343 sqlalchemy.sql.and_(*[table.columns[k] == sqlalchemy.sql.bindparam(v) for k, v in where.items()])
1344 )
1345 return self._connection.execute(sql, *rows).rowcount
1347 def query(self, sql: sqlalchemy.sql.FromClause,
1348 *args: Any, **kwds: Any) -> sqlalchemy.engine.ResultProxy:
1349 """Run a SELECT query against the database.
1351 Parameters
1352 ----------
1353 sql : `sqlalchemy.sql.FromClause`
1354 A SQLAlchemy representation of a ``SELECT`` query.
1355 *args
1356 Additional positional arguments are forwarded to
1357 `sqlalchemy.engine.Connection.execute`.
1358 **kwds
1359 Additional keyword arguments are forwarded to
1360 `sqlalchemy.engine.Connection.execute`.
1362 Returns
1363 -------
1364 result : `sqlalchemy.engine.ResultProxy`
1365 Query results.
1367 Notes
1368 -----
1369 The default implementation should be sufficient for most derived
1370 classes.
1371 """
1372 # TODO: should we guard against non-SELECT queries here?
1373 return self._connection.execute(sql, *args, **kwds)
1375 origin: int
1376 """An integer ID that should be used as the default for any datasets,
1377 quanta, or other entities that use a (autoincrement, origin) compound
1378 primary key (`int`).
1379 """
1381 namespace: Optional[str]
1382 """The schema or namespace this database instance is associated with
1383 (`str` or `None`).
1384 """