Coverage for python/lsst/daf/butler/registry/interfaces/_database.py : 14%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = [
24 "Database",
25 "ReadOnlyDatabaseError",
26 "DatabaseConflictError",
27 "SchemaAlreadyDefinedError",
28 "StaticTablesContext",
29]
31from abc import ABC, abstractmethod
32from contextlib import contextmanager
33from typing import (
34 Any,
35 Callable,
36 Dict,
37 Iterable,
38 Iterator,
39 List,
40 Optional,
41 Sequence,
42 Set,
43 Tuple,
44)
45import uuid
46import warnings
48import astropy.time
49import sqlalchemy
51from ...core import ddl, time_utils
52from .._exceptions import ConflictingDefinitionError
54_IN_SAVEPOINT_TRANSACTION = "IN_SAVEPOINT_TRANSACTION"
57def _checkExistingTableDefinition(name: str, spec: ddl.TableSpec, inspection: List[Dict[str, Any]]) -> None:
58 """Test that the definition of a table in a `ddl.TableSpec` and from
59 database introspection are consistent.
61 Parameters
62 ----------
63 name : `str`
64 Name of the table (only used in error messages).
65 spec : `ddl.TableSpec`
66 Specification of the table.
67 inspection : `dict`
68 Dictionary returned by
69 `sqlalchemy.engine.reflection.Inspector.get_columns`.
71 Raises
72 ------
73 DatabaseConflictError
74 Raised if the definitions are inconsistent.
75 """
76 columnNames = [c["name"] for c in inspection]
77 if spec.fields.names != set(columnNames):
78 raise DatabaseConflictError(f"Table '{name}' exists but is defined differently in the database; "
79 f"specification has columns {list(spec.fields.names)}, while the "
80 f"table in the database has {columnNames}.")
83class ReadOnlyDatabaseError(RuntimeError):
84 """Exception raised when a write operation is called on a read-only
85 `Database`.
86 """
89class DatabaseConflictError(ConflictingDefinitionError):
90 """Exception raised when database content (row values or schema entities)
91 are inconsistent with what this client expects.
92 """
95class SchemaAlreadyDefinedError(RuntimeError):
96 """Exception raised when trying to initialize database schema when some
97 tables already exist.
98 """
101class StaticTablesContext:
102 """Helper class used to declare the static schema for a registry layer
103 in a database.
105 An instance of this class is returned by `Database.declareStaticTables`,
106 which should be the only way it should be constructed.
107 """
109 def __init__(self, db: Database):
110 self._db = db
111 self._foreignKeys: List[Tuple[sqlalchemy.schema.Table, sqlalchemy.schema.ForeignKeyConstraint]] = []
112 self._inspector = sqlalchemy.engine.reflection.Inspector(self._db._connection)
113 self._tableNames = frozenset(self._inspector.get_table_names(schema=self._db.namespace))
114 self._initializers: List[Callable[[Database], None]] = []
116 def addTable(self, name: str, spec: ddl.TableSpec) -> sqlalchemy.schema.Table:
117 """Add a new table to the schema, returning its sqlalchemy
118 representation.
120 The new table may not actually be created until the end of the
121 context created by `Database.declareStaticTables`, allowing tables
122 to be declared in any order even in the presence of foreign key
123 relationships.
124 """
125 name = self._db._mangleTableName(name)
126 if name in self._tableNames:
127 _checkExistingTableDefinition(name, spec, self._inspector.get_columns(name,
128 schema=self._db.namespace))
129 table = self._db._convertTableSpec(name, spec, self._db._metadata)
130 for foreignKeySpec in spec.foreignKeys:
131 self._foreignKeys.append(
132 (table, self._db._convertForeignKeySpec(name, foreignKeySpec, self._db._metadata))
133 )
134 return table
136 def addTableTuple(self, specs: Tuple[ddl.TableSpec, ...]) -> Tuple[sqlalchemy.schema.Table, ...]:
137 """Add a named tuple of tables to the schema, returning their
138 SQLAlchemy representations in a named tuple of the same type.
140 The new tables may not actually be created until the end of the
141 context created by `Database.declareStaticTables`, allowing tables
142 to be declared in any order even in the presence of foreign key
143 relationships.
145 Notes
146 -----
147 ``specs`` *must* be an instance of a type created by
148 `collections.namedtuple`, not just regular tuple, and the returned
149 object is guaranteed to be the same. Because `~collections.namedtuple`
150 is just a factory for `type` objects, not an actual type itself,
151 we cannot represent this with type annotations.
152 """
153 return specs._make(self.addTable(name, spec) # type: ignore
154 for name, spec in zip(specs._fields, specs)) # type: ignore
156 def addInitializer(self, initializer: Callable[[Database], None]) -> None:
157 """Add a method that does one-time initialization of a database.
159 Initialization can mean anything that changes state of a database
160 and needs to be done exactly once after database schema was created.
161 An example for that could be population of schema attributes.
163 Parameters
164 ----------
165 initializer : callable
166 Method of a single argument which is a `Database` instance.
167 """
168 self._initializers.append(initializer)
171class Database(ABC):
172 """An abstract interface that represents a particular database engine's
173 representation of a single schema/namespace/database.
175 Parameters
176 ----------
177 origin : `int`
178 An integer ID that should be used as the default for any datasets,
179 quanta, or other entities that use a (autoincrement, origin) compound
180 primary key.
181 connection : `sqlalchemy.engine.Connection`
182 The SQLAlchemy connection this `Database` wraps.
183 namespace : `str`, optional
184 Name of the schema or namespace this instance is associated with.
185 This is passed as the ``schema`` argument when constructing a
186 `sqlalchemy.schema.MetaData` instance. We use ``namespace`` instead to
187 avoid confusion between "schema means namespace" and "schema means
188 table definitions".
190 Notes
191 -----
192 `Database` requires all write operations to go through its special named
193 methods. Our write patterns are sufficiently simple that we don't really
194 need the full flexibility of SQL insert/update/delete syntax, and we need
195 non-standard (but common) functionality in these operations sufficiently
196 often that it seems worthwhile to provide our own generic API.
198 In contrast, `Database.query` allows arbitrary ``SELECT`` queries (via
199 their SQLAlchemy representation) to be run, as we expect these to require
200 significantly more sophistication while still being limited to standard
201 SQL.
203 `Database` itself has several underscore-prefixed attributes:
205 - ``_connection``: SQLAlchemy object representing the connection.
206 - ``_metadata``: the `sqlalchemy.schema.MetaData` object representing
207 the tables and other schema entities.
209 These are considered protected (derived classes may access them, but other
210 code should not), and read-only, aside from executing SQL via
211 ``_connection``.
212 """
214 def __init__(self, *, origin: int, connection: sqlalchemy.engine.Connection,
215 namespace: Optional[str] = None):
216 self.origin = origin
217 self.namespace = namespace
218 self._connection = connection
219 self._metadata: Optional[sqlalchemy.schema.MetaData] = None
220 self._tempTables: Set[str] = set()
222 def __repr__(self) -> str:
223 # Rather than try to reproduce all the parameters used to create
224 # the object, instead report the more useful information of the
225 # connection URL.
226 uri = str(self._connection.engine.url)
227 if self.namespace:
228 uri += f"#{self.namespace}"
229 return f'{type(self).__name__}("{uri}")'
231 @classmethod
232 def makeDefaultUri(cls, root: str) -> Optional[str]:
233 """Create a default connection URI appropriate for the given root
234 directory, or `None` if there can be no such default.
235 """
236 return None
238 @classmethod
239 def fromUri(cls, uri: str, *, origin: int, namespace: Optional[str] = None,
240 writeable: bool = True) -> Database:
241 """Construct a database from a SQLAlchemy URI.
243 Parameters
244 ----------
245 uri : `str`
246 A SQLAlchemy URI connection string.
247 origin : `int`
248 An integer ID that should be used as the default for any datasets,
249 quanta, or other entities that use a (autoincrement, origin)
250 compound primary key.
251 namespace : `str`, optional
252 A database namespace (i.e. schema) the new instance should be
253 associated with. If `None` (default), the namespace (if any) is
254 inferred from the URI.
255 writeable : `bool`, optional
256 If `True`, allow write operations on the database, including
257 ``CREATE TABLE``.
259 Returns
260 -------
261 db : `Database`
262 A new `Database` instance.
263 """
264 return cls.fromConnection(cls.connect(uri, writeable=writeable),
265 origin=origin,
266 namespace=namespace,
267 writeable=writeable)
269 @classmethod
270 @abstractmethod
271 def connect(cls, uri: str, *, writeable: bool = True) -> sqlalchemy.engine.Connection:
272 """Create a `sqlalchemy.engine.Connection` from a SQLAlchemy URI.
274 Parameters
275 ----------
276 uri : `str`
277 A SQLAlchemy URI connection string.
278 origin : `int`
279 An integer ID that should be used as the default for any datasets,
280 quanta, or other entities that use a (autoincrement, origin)
281 compound primary key.
282 writeable : `bool`, optional
283 If `True`, allow write operations on the database, including
284 ``CREATE TABLE``.
286 Returns
287 -------
288 connection : `sqlalchemy.engine.Connection`
289 A database connection.
291 Notes
292 -----
293 Subclasses that support other ways to connect to a database are
294 encouraged to add optional arguments to their implementation of this
295 method, as long as they maintain compatibility with the base class
296 call signature.
297 """
298 raise NotImplementedError()
300 @classmethod
301 @abstractmethod
302 def fromConnection(cls, connection: sqlalchemy.engine.Connection, *, origin: int,
303 namespace: Optional[str] = None, writeable: bool = True) -> Database:
304 """Create a new `Database` from an existing
305 `sqlalchemy.engine.Connection`.
307 Parameters
308 ----------
309 connection : `sqllachemy.engine.Connection`
310 The connection for the the database. May be shared between
311 `Database` instances.
312 origin : `int`
313 An integer ID that should be used as the default for any datasets,
314 quanta, or other entities that use a (autoincrement, origin)
315 compound primary key.
316 namespace : `str`, optional
317 A different database namespace (i.e. schema) the new instance
318 should be associated with. If `None` (default), the namespace
319 (if any) is inferred from the connection.
320 writeable : `bool`, optional
321 If `True`, allow write operations on the database, including
322 ``CREATE TABLE``.
324 Returns
325 -------
326 db : `Database`
327 A new `Database` instance.
329 Notes
330 -----
331 This method allows different `Database` instances to share the same
332 connection, which is desirable when they represent different namespaces
333 can be queried together. This also ties their transaction state,
334 however; starting a transaction in any database automatically starts
335 on in all other databases.
336 """
337 raise NotImplementedError()
339 @contextmanager
340 def transaction(self, *, interrupting: bool = False, savepoint: bool = False,
341 lock: Iterable[sqlalchemy.schema.Table] = ()) -> Iterator:
342 """Return a context manager that represents a transaction.
344 Parameters
345 ----------
346 interrupting : `bool`, optional
347 If `True` (`False` is default), this transaction block may not be
348 nested without an outer one, and attempting to do so is a logic
349 (i.e. assertion) error.
350 savepoint : `bool`, optional
351 If `True` (`False` is default), create a `SAVEPOINT`, allowing
352 exceptions raised by the database (e.g. due to constraint
353 violations) during this transaction's context to be caught outside
354 it without also rolling back all operations in an outer transaction
355 block. If `False`, transactions may still be nested, but a
356 rollback may be generated at any level and affects all levels, and
357 commits are deferred until the outermost block completes. If any
358 outer transaction block was created with ``savepoint=True``, all
359 inner blocks will be as well (regardless of the actual value
360 passed). This has no effect if this is the outermost transaction.
361 lock : `Iterable` [ `sqlalchemy.schema.Table` ], optional
362 A list of tables to lock for the duration of this transaction.
363 These locks are guaranteed to prevent concurrent writes, but only
364 prevent concurrent reads if the database engine requires that in
365 order to block concurrent writes.
367 Notes
368 -----
369 All transactions on a connection managed by one or more `Database`
370 instances _must_ go through this method, or transaction state will not
371 be correctly managed.
372 """
373 assert not (interrupting and self._connection.in_transaction()), (
374 "Logic error in transaction nesting: an operation that would "
375 "interrupt the active transaction context has been requested."
376 )
377 # We remember whether we are already in a SAVEPOINT transaction via the
378 # connection object's 'info' dict, which is explicitly for user
379 # information like this. This is safer than a regular `Database`
380 # instance attribute, because it guards against multiple `Database`
381 # instances sharing the same connection. The need to use our own flag
382 # here to track whether we're in a nested transaction should go away in
383 # SQLAlchemy 1.4, which seems to have a
384 # `Connection.in_nested_transaction()` method.
385 savepoint = savepoint or self._connection.info.get(_IN_SAVEPOINT_TRANSACTION, False)
386 self._connection.info[_IN_SAVEPOINT_TRANSACTION] = savepoint
387 if self._connection.in_transaction() and savepoint:
388 trans = self._connection.begin_nested()
389 else:
390 # Use a regular (non-savepoint) transaction always for the
391 # outermost context, as well as when a savepoint was not requested.
392 trans = self._connection.begin()
393 self._lockTables(lock)
394 try:
395 yield
396 trans.commit()
397 except BaseException:
398 trans.rollback()
399 raise
400 finally:
401 if not self._connection.in_transaction():
402 self._connection.info.pop(_IN_SAVEPOINT_TRANSACTION, None)
404 @abstractmethod
405 def _lockTables(self, tables: Iterable[sqlalchemy.schema.Table] = ()) -> None:
406 """Acquire locks on the given tables.
408 This is an implementation hook for subclasses, called by `transaction`.
409 It should not be called directly by other code.
411 Parameters
412 ----------
413 tables : `Iterable` [ `sqlalchemy.schema.Table` ], optional
414 A list of tables to lock for the duration of this transaction.
415 These locks are guaranteed to prevent concurrent writes, but only
416 prevent concurrent reads if the database engine requires that in
417 order to block concurrent writes.
418 """
419 raise NotImplementedError()
421 @contextmanager
422 def declareStaticTables(self, *, create: bool) -> Iterator[StaticTablesContext]:
423 """Return a context manager in which the database's static DDL schema
424 can be declared.
426 Parameters
427 ----------
428 create : `bool`
429 If `True`, attempt to create all tables at the end of the context.
430 If `False`, they will be assumed to already exist.
432 Returns
433 -------
434 schema : `StaticTablesContext`
435 A helper object that is used to add new tables.
437 Raises
438 ------
439 ReadOnlyDatabaseError
440 Raised if ``create`` is `True`, `Database.isWriteable` is `False`,
441 and one or more declared tables do not already exist.
443 Examples
444 --------
445 Given a `Database` instance ``db``::
447 with db.declareStaticTables(create=True) as schema:
448 schema.addTable("table1", TableSpec(...))
449 schema.addTable("table2", TableSpec(...))
451 Notes
452 -----
453 A database's static DDL schema must be declared before any dynamic
454 tables are managed via calls to `ensureTableExists` or
455 `getExistingTable`. The order in which static schema tables are added
456 inside the context block is unimportant; they will automatically be
457 sorted and added in an order consistent with their foreign key
458 relationships.
459 """
460 if create and not self.isWriteable():
461 raise ReadOnlyDatabaseError(f"Cannot create tables in read-only database {self}.")
462 self._metadata = sqlalchemy.MetaData(schema=self.namespace)
463 try:
464 context = StaticTablesContext(self)
465 if create and context._tableNames:
466 # Looks like database is already initalized, to avoid danger
467 # of modifying/destroying valid schema we refuse to do
468 # anything in this case
469 raise SchemaAlreadyDefinedError(f"Cannot create tables in non-empty database {self}.")
470 yield context
471 for table, foreignKey in context._foreignKeys:
472 table.append_constraint(foreignKey)
473 if create:
474 if self.namespace is not None:
475 if self.namespace not in context._inspector.get_schema_names():
476 self._connection.execute(sqlalchemy.schema.CreateSchema(self.namespace))
477 # In our tables we have columns that make use of sqlalchemy
478 # Sequence objects. There is currently a bug in sqlalchemy that
479 # causes a deprecation warning to be thrown on a property of
480 # the Sequence object when the repr for the sequence is
481 # created. Here a filter is used to catch these deprecation
482 # warnings when tables are created.
483 with warnings.catch_warnings():
484 warnings.simplefilter("ignore", category=sqlalchemy.exc.SADeprecationWarning)
485 self._metadata.create_all(self._connection)
486 # call all initializer methods sequentially
487 for init in context._initializers:
488 init(self)
489 except BaseException:
490 self._metadata = None
491 raise
493 @abstractmethod
494 def isWriteable(self) -> bool:
495 """Return `True` if this database can be modified by this client.
496 """
497 raise NotImplementedError()
499 @abstractmethod
500 def __str__(self) -> str:
501 """Return a human-readable identifier for this `Database`, including
502 any namespace or schema that identifies its names within a `Registry`.
503 """
504 raise NotImplementedError()
506 @property
507 def dialect(self) -> sqlalchemy.engine.Dialect:
508 """The SQLAlchemy dialect for this database engine
509 (`sqlalchemy.engine.Dialect`).
510 """
511 return self._connection.dialect
513 def shrinkDatabaseEntityName(self, original: str) -> str:
514 """Return a version of the given name that fits within this database
515 engine's length limits for table, constraint, indexes, and sequence
516 names.
518 Implementations should not assume that simple truncation is safe,
519 because multiple long names often begin with the same prefix.
521 The default implementation simply returns the given name.
523 Parameters
524 ----------
525 original : `str`
526 The original name.
528 Returns
529 -------
530 shrunk : `str`
531 The new, possibly shortened name.
532 """
533 return original
535 def expandDatabaseEntityName(self, shrunk: str) -> str:
536 """Retrieve the original name for a database entity that was too long
537 to fit within the database engine's limits.
539 Parameters
540 ----------
541 original : `str`
542 The original name.
544 Returns
545 -------
546 shrunk : `str`
547 The new, possibly shortened name.
548 """
549 return shrunk
551 def _mangleTableName(self, name: str) -> str:
552 """Map a logical, user-visible table name to the true table name used
553 in the database.
555 The default implementation returns the given name unchanged.
557 Parameters
558 ----------
559 name : `str`
560 Input table name. Should not include a namespace (i.e. schema)
561 prefix.
563 Returns
564 -------
565 mangled : `str`
566 Mangled version of the table name (still with no namespace prefix).
568 Notes
569 -----
570 Reimplementations of this method must be idempotent - mangling an
571 already-mangled name must have no effect.
572 """
573 return name
575 def _makeColumnConstraints(self, table: str, spec: ddl.FieldSpec) -> List[sqlalchemy.CheckConstraint]:
576 """Create constraints based on this spec.
578 Parameters
579 ----------
580 table : `str`
581 Name of the table this column is being added to.
582 spec : `FieldSpec`
583 Specification for the field to be added.
585 Returns
586 -------
587 constraint : `list` of `sqlalchemy.CheckConstraint`
588 Constraint added for this column.
589 """
590 # By default we return no additional constraints
591 return []
593 def _convertFieldSpec(self, table: str, spec: ddl.FieldSpec, metadata: sqlalchemy.MetaData,
594 **kwds: Any) -> sqlalchemy.schema.Column:
595 """Convert a `FieldSpec` to a `sqlalchemy.schema.Column`.
597 Parameters
598 ----------
599 table : `str`
600 Name of the table this column is being added to.
601 spec : `FieldSpec`
602 Specification for the field to be added.
603 metadata : `sqlalchemy.MetaData`
604 SQLAlchemy representation of the DDL schema this field's table is
605 being added to.
606 **kwds
607 Additional keyword arguments to forward to the
608 `sqlalchemy.schema.Column` constructor. This is provided to make
609 it easier for derived classes to delegate to ``super()`` while
610 making only minor changes.
612 Returns
613 -------
614 column : `sqlalchemy.schema.Column`
615 SQLAlchemy representation of the field.
616 """
617 args = [spec.name, spec.getSizedColumnType()]
618 if spec.autoincrement:
619 # Generate a sequence to use for auto incrementing for databases
620 # that do not support it natively. This will be ignored by
621 # sqlalchemy for databases that do support it.
622 args.append(sqlalchemy.Sequence(self.shrinkDatabaseEntityName(f"{table}_seq_{spec.name}"),
623 metadata=metadata))
624 assert spec.doc is None or isinstance(spec.doc, str), f"Bad doc for {table}.{spec.name}."
625 return sqlalchemy.schema.Column(*args, nullable=spec.nullable, primary_key=spec.primaryKey,
626 comment=spec.doc, **kwds)
628 def _convertForeignKeySpec(self, table: str, spec: ddl.ForeignKeySpec, metadata: sqlalchemy.MetaData,
629 **kwds: Any) -> sqlalchemy.schema.ForeignKeyConstraint:
630 """Convert a `ForeignKeySpec` to a
631 `sqlalchemy.schema.ForeignKeyConstraint`.
633 Parameters
634 ----------
635 table : `str`
636 Name of the table this foreign key is being added to.
637 spec : `ForeignKeySpec`
638 Specification for the foreign key to be added.
639 metadata : `sqlalchemy.MetaData`
640 SQLAlchemy representation of the DDL schema this constraint is
641 being added to.
642 **kwds
643 Additional keyword arguments to forward to the
644 `sqlalchemy.schema.ForeignKeyConstraint` constructor. This is
645 provided to make it easier for derived classes to delegate to
646 ``super()`` while making only minor changes.
648 Returns
649 -------
650 constraint : `sqlalchemy.schema.ForeignKeyConstraint`
651 SQLAlchemy representation of the constraint.
652 """
653 name = self.shrinkDatabaseEntityName(
654 "_".join(["fkey", table, self._mangleTableName(spec.table)]
655 + list(spec.target) + list(spec.source))
656 )
657 return sqlalchemy.schema.ForeignKeyConstraint(
658 spec.source,
659 [f"{self._mangleTableName(spec.table)}.{col}" for col in spec.target],
660 name=name,
661 ondelete=spec.onDelete
662 )
664 def _convertTableSpec(self, name: str, spec: ddl.TableSpec, metadata: sqlalchemy.MetaData,
665 **kwds: Any) -> sqlalchemy.schema.Table:
666 """Convert a `TableSpec` to a `sqlalchemy.schema.Table`.
668 Parameters
669 ----------
670 spec : `TableSpec`
671 Specification for the foreign key to be added.
672 metadata : `sqlalchemy.MetaData`
673 SQLAlchemy representation of the DDL schema this table is being
674 added to.
675 **kwds
676 Additional keyword arguments to forward to the
677 `sqlalchemy.schema.Table` constructor. This is provided to make it
678 easier for derived classes to delegate to ``super()`` while making
679 only minor changes.
681 Returns
682 -------
683 table : `sqlalchemy.schema.Table`
684 SQLAlchemy representation of the table.
686 Notes
687 -----
688 This method does not handle ``spec.foreignKeys`` at all, in order to
689 avoid circular dependencies. These are added by higher-level logic in
690 `ensureTableExists`, `getExistingTable`, and `declareStaticTables`.
691 """
692 name = self._mangleTableName(name)
693 args = [self._convertFieldSpec(name, fieldSpec, metadata) for fieldSpec in spec.fields]
695 # Add any column constraints
696 for fieldSpec in spec.fields:
697 args.extend(self._makeColumnConstraints(name, fieldSpec))
699 # Track indexes added for primary key and unique constraints, to make
700 # sure we don't add duplicate explicit or foreign key indexes for
701 # those.
702 allIndexes = {tuple(fieldSpec.name for fieldSpec in spec.fields if fieldSpec.primaryKey)}
703 args.extend(
704 sqlalchemy.schema.UniqueConstraint(
705 *columns,
706 name=self.shrinkDatabaseEntityName("_".join([name, "unq"] + list(columns)))
707 )
708 for columns in spec.unique
709 )
710 allIndexes.update(spec.unique)
711 args.extend(
712 sqlalchemy.schema.Index(
713 self.shrinkDatabaseEntityName("_".join([name, "idx"] + list(columns))),
714 *columns,
715 unique=(columns in spec.unique)
716 )
717 for columns in spec.indexes if columns not in allIndexes
718 )
719 allIndexes.update(spec.indexes)
720 args.extend(
721 sqlalchemy.schema.Index(
722 self.shrinkDatabaseEntityName("_".join((name, "fkidx") + fk.source)),
723 *fk.source,
724 )
725 for fk in spec.foreignKeys if fk.addIndex and fk.source not in allIndexes
726 )
727 assert spec.doc is None or isinstance(spec.doc, str), f"Bad doc for {name}."
728 return sqlalchemy.schema.Table(name, metadata, *args, comment=spec.doc, info=spec, **kwds)
730 def ensureTableExists(self, name: str, spec: ddl.TableSpec) -> sqlalchemy.schema.Table:
731 """Ensure that a table with the given name and specification exists,
732 creating it if necessary.
734 Parameters
735 ----------
736 name : `str`
737 Name of the table (not including namespace qualifiers).
738 spec : `TableSpec`
739 Specification for the table. This will be used when creating the
740 table, and *may* be used when obtaining an existing table to check
741 for consistency, but no such check is guaranteed.
743 Returns
744 -------
745 table : `sqlalchemy.schema.Table`
746 SQLAlchemy representation of the table.
748 Raises
749 ------
750 ReadOnlyDatabaseError
751 Raised if `isWriteable` returns `False`, and the table does not
752 already exist.
753 DatabaseConflictError
754 Raised if the table exists but ``spec`` is inconsistent with its
755 definition.
757 Notes
758 -----
759 This method may not be called within transactions. It may be called on
760 read-only databases if and only if the table does in fact already
761 exist.
763 Subclasses may override this method, but usually should not need to.
764 """
765 assert not self._connection.in_transaction(), "Table creation interrupts transactions."
766 assert self._metadata is not None, "Static tables must be declared before dynamic tables."
767 table = self.getExistingTable(name, spec)
768 if table is not None:
769 return table
770 if not self.isWriteable():
771 raise ReadOnlyDatabaseError(
772 f"Table {name} does not exist, and cannot be created "
773 f"because database {self} is read-only."
774 )
775 table = self._convertTableSpec(name, spec, self._metadata)
776 for foreignKeySpec in spec.foreignKeys:
777 table.append_constraint(self._convertForeignKeySpec(name, foreignKeySpec, self._metadata))
778 table.create(self._connection)
779 return table
781 def getExistingTable(self, name: str, spec: ddl.TableSpec) -> Optional[sqlalchemy.schema.Table]:
782 """Obtain an existing table with the given name and specification.
784 Parameters
785 ----------
786 name : `str`
787 Name of the table (not including namespace qualifiers).
788 spec : `TableSpec`
789 Specification for the table. This will be used when creating the
790 SQLAlchemy representation of the table, and it is used to
791 check that the actual table in the database is consistent.
793 Returns
794 -------
795 table : `sqlalchemy.schema.Table` or `None`
796 SQLAlchemy representation of the table, or `None` if it does not
797 exist.
799 Raises
800 ------
801 DatabaseConflictError
802 Raised if the table exists but ``spec`` is inconsistent with its
803 definition.
805 Notes
806 -----
807 This method can be called within transactions and never modifies the
808 database.
810 Subclasses may override this method, but usually should not need to.
811 """
812 assert self._metadata is not None, "Static tables must be declared before dynamic tables."
813 name = self._mangleTableName(name)
814 table = self._metadata.tables.get(name if self.namespace is None else f"{self.namespace}.{name}")
815 if table is not None:
816 if spec.fields.names != set(table.columns.keys()):
817 raise DatabaseConflictError(f"Table '{name}' has already been defined differently; the new "
818 f"specification has columns {list(spec.fields.names)}, while "
819 f"the previous definition has {list(table.columns.keys())}.")
820 else:
821 inspector = sqlalchemy.engine.reflection.Inspector(self._connection)
822 if name in inspector.get_table_names(schema=self.namespace):
823 _checkExistingTableDefinition(name, spec, inspector.get_columns(name, schema=self.namespace))
824 table = self._convertTableSpec(name, spec, self._metadata)
825 for foreignKeySpec in spec.foreignKeys:
826 table.append_constraint(self._convertForeignKeySpec(name, foreignKeySpec, self._metadata))
827 return table
828 return table
830 def makeTemporaryTable(self, spec: ddl.TableSpec, name: Optional[str] = None) -> sqlalchemy.schema.Table:
831 """Create a temporary table.
833 Parameters
834 ----------
835 spec : `TableSpec`
836 Specification for the table.
837 name : `str`, optional
838 A unique (within this session/connetion) name for the table.
839 Subclasses may override to modify the actual name used. If not
840 provided, a unique name will be generated.
842 Returns
843 -------
844 table : `sqlalchemy.schema.Table`
845 SQLAlchemy representation of the table.
847 Notes
848 -----
849 Temporary tables may be created, dropped, and written to even in
850 read-only databases - at least according to the Python-level
851 protections in the `Database` classes. Server permissions may say
852 otherwise, but in that case they probably need to be modified to
853 support the full range of expected read-only butler behavior.
855 Temporary table rows are guaranteed to be dropped when a connection is
856 closed. `Database` implementations are permitted to allow the table to
857 remain as long as this is transparent to the user (i.e. "creating" the
858 temporary table in a new session should not be an error, even if it
859 does nothing).
861 It may not be possible to use temporary tables within transactions with
862 some database engines (or configurations thereof).
863 """
864 if name is None:
865 name = f"tmp_{uuid.uuid4().hex}"
866 table = self._convertTableSpec(name, spec, self._metadata, prefixes=['TEMPORARY'],
867 schema=sqlalchemy.schema.BLANK_SCHEMA)
868 if table.key in self._tempTables:
869 if table.key != name:
870 raise ValueError(f"A temporary table with name {name} (transformed to {table.key} by "
871 f"Database) already exists.")
872 for foreignKeySpec in spec.foreignKeys:
873 table.append_constraint(self._convertForeignKeySpec(name, foreignKeySpec, self._metadata))
874 table.create(self._connection)
875 self._tempTables.add(table.key)
876 return table
878 def dropTemporaryTable(self, table: sqlalchemy.schema.Table) -> None:
879 """Drop a temporary table.
881 Parameters
882 ----------
883 table : `sqlalchemy.schema.Table`
884 A SQLAlchemy object returned by a previous call to
885 `makeTemporaryTable`.
886 """
887 if table.key in self._tempTables:
888 table.drop(self._connection)
889 self._tempTables.remove(table.key)
890 else:
891 raise TypeError(f"Table {table.key} was not created by makeTemporaryTable.")
893 def sync(self, table: sqlalchemy.schema.Table, *,
894 keys: Dict[str, Any],
895 compared: Optional[Dict[str, Any]] = None,
896 extra: Optional[Dict[str, Any]] = None,
897 returning: Optional[Sequence[str]] = None,
898 ) -> Tuple[Optional[Dict[str, Any]], bool]:
899 """Insert into a table as necessary to ensure database contains
900 values equivalent to the given ones.
902 Parameters
903 ----------
904 table : `sqlalchemy.schema.Table`
905 Table to be queried and possibly inserted into.
906 keys : `dict`
907 Column name-value pairs used to search for an existing row; must
908 be a combination that can be used to select a single row if one
909 exists. If such a row does not exist, these values are used in
910 the insert.
911 compared : `dict`, optional
912 Column name-value pairs that are compared to those in any existing
913 row. If such a row does not exist, these rows are used in the
914 insert.
915 extra : `dict`, optional
916 Column name-value pairs that are ignored if a matching row exists,
917 but used in an insert if one is necessary.
918 returning : `~collections.abc.Sequence` of `str`, optional
919 The names of columns whose values should be returned.
921 Returns
922 -------
923 row : `dict`, optional
924 The value of the fields indicated by ``returning``, or `None` if
925 ``returning`` is `None`.
926 inserted : `bool`
927 If `True`, a new row was inserted.
929 Raises
930 ------
931 DatabaseConflictError
932 Raised if the values in ``compared`` do not match the values in the
933 database.
934 ReadOnlyDatabaseError
935 Raised if `isWriteable` returns `False`, and no matching record
936 already exists.
938 Notes
939 -----
940 This method may not be called within transactions. It may be called on
941 read-only databases if and only if the matching row does in fact
942 already exist.
943 """
945 def check() -> Tuple[int, Optional[List[str]], Optional[List]]:
946 """Query for a row that matches the ``key`` argument, and compare
947 to what was given by the caller.
949 Returns
950 -------
951 n : `int`
952 Number of matching rows. ``n != 1`` is always an error, but
953 it's a different kind of error depending on where `check` is
954 being called.
955 bad : `list` of `str`, or `None`
956 The subset of the keys of ``compared`` for which the existing
957 values did not match the given one. Once again, ``not bad``
958 is always an error, but a different kind on context. `None`
959 if ``n != 1``
960 result : `list` or `None`
961 Results in the database that correspond to the columns given
962 in ``returning``, or `None` if ``returning is None``.
963 """
964 toSelect: Set[str] = set()
965 if compared is not None:
966 toSelect.update(compared.keys())
967 if returning is not None:
968 toSelect.update(returning)
969 if not toSelect:
970 # Need to select some column, even if we just want to see
971 # how many rows we get back.
972 toSelect.add(next(iter(keys.keys())))
973 selectSql = sqlalchemy.sql.select(
974 [table.columns[k].label(k) for k in toSelect]
975 ).select_from(table).where(
976 sqlalchemy.sql.and_(*[table.columns[k] == v for k, v in keys.items()])
977 )
978 fetched = list(self._connection.execute(selectSql).fetchall())
979 if len(fetched) != 1:
980 return len(fetched), None, None
981 existing = fetched[0]
982 if compared is not None:
984 def safeNotEqual(a: Any, b: Any) -> bool:
985 if isinstance(a, astropy.time.Time):
986 return not time_utils.times_equal(a, b)
987 return a != b
989 inconsistencies = [f"{k}: {existing[k]!r} != {v!r}"
990 for k, v in compared.items()
991 if safeNotEqual(existing[k], v)]
992 else:
993 inconsistencies = []
994 if returning is not None:
995 toReturn: Optional[list] = [existing[k] for k in returning]
996 else:
997 toReturn = None
998 return 1, inconsistencies, toReturn
1000 if self.isWriteable() or table.key in self._tempTables:
1001 # Database is writeable. Try an insert first, but allow it to fail
1002 # (in only specific ways).
1003 row = keys.copy()
1004 if compared is not None:
1005 row.update(compared)
1006 if extra is not None:
1007 row.update(extra)
1008 insertSql = table.insert().values(row)
1009 try:
1010 with self.transaction(interrupting=True):
1011 self._connection.execute(insertSql)
1012 # Need to perform check() for this branch inside the
1013 # transaction, so we roll back an insert that didn't do
1014 # what we expected. That limits the extent to which we
1015 # can reduce duplication between this block and the other
1016 # ones that perform similar logic.
1017 n, bad, result = check()
1018 if n < 1:
1019 raise RuntimeError("Insertion in sync did not seem to affect table. This is a bug.")
1020 elif n > 1:
1021 raise RuntimeError(f"Keys passed to sync {keys.keys()} do not comprise a "
1022 f"unique constraint for table {table.name}.")
1023 elif bad:
1024 raise RuntimeError(
1025 f"Conflict ({bad}) in sync after successful insert; this is "
1026 f"possible if the same table is being updated by a concurrent "
1027 f"process that isn't using sync, but it may also be a bug in "
1028 f"daf_butler."
1029 )
1030 # No exceptions, so it looks like we inserted the requested row
1031 # successfully.
1032 inserted = True
1033 except sqlalchemy.exc.IntegrityError as err:
1034 # Most likely cause is that an equivalent row already exists,
1035 # but it could also be some other constraint. Query for the
1036 # row we think we matched to resolve that question.
1037 n, bad, result = check()
1038 if n < 1:
1039 # There was no matched row; insertion failed for some
1040 # completely different reason. Just re-raise the original
1041 # IntegrityError.
1042 raise
1043 elif n > 2:
1044 # There were multiple matched rows, which means we
1045 # conflicted *and* the arguments were bad to begin with.
1046 raise RuntimeError(f"Keys passed to sync {keys.keys()} do not comprise a "
1047 f"unique constraint for table {table.name}.") from err
1048 elif bad:
1049 # No logic bug, but data conflicted on the keys given.
1050 raise DatabaseConflictError(f"Conflict in sync for table "
1051 f"{table.name} on column(s) {bad}.") from err
1052 # The desired row is already present and consistent with what
1053 # we tried to insert.
1054 inserted = False
1055 else:
1056 assert not self._connection.in_transaction(), (
1057 "Calling sync within a transaction block is an error even "
1058 "on a read-only database."
1059 )
1060 # Database is not writeable; just see if the row exists.
1061 n, bad, result = check()
1062 if n < 1:
1063 raise ReadOnlyDatabaseError("sync needs to insert, but database is read-only.")
1064 elif n > 1:
1065 raise RuntimeError("Keys passed to sync do not comprise a unique constraint.")
1066 elif bad:
1067 raise DatabaseConflictError(f"Conflict in sync on column(s) {bad}.")
1068 inserted = False
1069 if returning is None:
1070 return None, inserted
1071 else:
1072 assert result is not None
1073 return {k: v for k, v in zip(returning, result)}, inserted
1075 def insert(self, table: sqlalchemy.schema.Table, *rows: dict, returnIds: bool = False,
1076 select: Optional[sqlalchemy.sql.Select] = None,
1077 names: Optional[Iterable[str]] = None,
1078 ) -> Optional[List[int]]:
1079 """Insert one or more rows into a table, optionally returning
1080 autoincrement primary key values.
1082 Parameters
1083 ----------
1084 table : `sqlalchemy.schema.Table`
1085 Table rows should be inserted into.
1086 returnIds: `bool`
1087 If `True` (`False` is default), return the values of the table's
1088 autoincrement primary key field (which much exist).
1089 select : `sqlalchemy.sql.Select`, optional
1090 A SELECT query expression to insert rows from. Cannot be provided
1091 with either ``rows`` or ``returnIds=True``.
1092 names : `Iterable` [ `str` ], optional
1093 Names of columns in ``table`` to be populated, ordered to match the
1094 columns returned by ``select``. Ignored if ``select`` is `None`.
1095 If not provided, the columns returned by ``select`` must be named
1096 to match the desired columns of ``table``.
1097 *rows
1098 Positional arguments are the rows to be inserted, as dictionaries
1099 mapping column name to value. The keys in all dictionaries must
1100 be the same.
1102 Returns
1103 -------
1104 ids : `None`, or `list` of `int`
1105 If ``returnIds`` is `True`, a `list` containing the inserted
1106 values for the table's autoincrement primary key.
1108 Raises
1109 ------
1110 ReadOnlyDatabaseError
1111 Raised if `isWriteable` returns `False` when this method is called.
1113 Notes
1114 -----
1115 The default implementation uses bulk insert syntax when ``returnIds``
1116 is `False`, and a loop over single-row insert operations when it is
1117 `True`.
1119 Derived classes should reimplement when they can provide a more
1120 efficient implementation (especially for the latter case).
1122 May be used inside transaction contexts, so implementations may not
1123 perform operations that interrupt transactions.
1124 """
1125 if not (self.isWriteable() or table.key in self._tempTables):
1126 raise ReadOnlyDatabaseError(f"Attempt to insert into read-only database '{self}'.")
1127 if select is not None and (rows or returnIds):
1128 raise TypeError("'select' is incompatible with passing value rows or returnIds=True.")
1129 if not rows and select is None:
1130 if returnIds:
1131 return []
1132 else:
1133 return None
1134 if not returnIds:
1135 if select is not None:
1136 if names is None:
1137 names = select.columns.keys()
1138 self._connection.execute(table.insert().from_select(names, select))
1139 else:
1140 self._connection.execute(table.insert(), *rows)
1141 return None
1142 else:
1143 sql = table.insert()
1144 return [self._connection.execute(sql, row).inserted_primary_key[0] for row in rows]
1146 @abstractmethod
1147 def replace(self, table: sqlalchemy.schema.Table, *rows: dict) -> None:
1148 """Insert one or more rows into a table, replacing any existing rows
1149 for which insertion of a new row would violate the primary key
1150 constraint.
1152 Parameters
1153 ----------
1154 table : `sqlalchemy.schema.Table`
1155 Table rows should be inserted into.
1156 *rows
1157 Positional arguments are the rows to be inserted, as dictionaries
1158 mapping column name to value. The keys in all dictionaries must
1159 be the same.
1161 Raises
1162 ------
1163 ReadOnlyDatabaseError
1164 Raised if `isWriteable` returns `False` when this method is called.
1166 Notes
1167 -----
1168 May be used inside transaction contexts, so implementations may not
1169 perform operations that interrupt transactions.
1171 Implementations should raise a `sqlalchemy.exc.IntegrityError`
1172 exception when a constraint other than the primary key would be
1173 violated.
1175 Implementations are not required to support `replace` on tables
1176 with autoincrement keys.
1177 """
1178 raise NotImplementedError()
1180 def delete(self, table: sqlalchemy.schema.Table, columns: Iterable[str], *rows: dict) -> int:
1181 """Delete one or more rows from a table.
1183 Parameters
1184 ----------
1185 table : `sqlalchemy.schema.Table`
1186 Table that rows should be deleted from.
1187 columns: `~collections.abc.Iterable` of `str`
1188 The names of columns that will be used to constrain the rows to
1189 be deleted; these will be combined via ``AND`` to form the
1190 ``WHERE`` clause of the delete query.
1191 *rows
1192 Positional arguments are the keys of rows to be deleted, as
1193 dictionaries mapping column name to value. The keys in all
1194 dictionaries must exactly the names in ``columns``.
1196 Returns
1197 -------
1198 count : `int`
1199 Number of rows deleted.
1201 Raises
1202 ------
1203 ReadOnlyDatabaseError
1204 Raised if `isWriteable` returns `False` when this method is called.
1206 Notes
1207 -----
1208 May be used inside transaction contexts, so implementations may not
1209 perform operations that interrupt transactions.
1211 The default implementation should be sufficient for most derived
1212 classes.
1213 """
1214 if not (self.isWriteable() or table.key in self._tempTables):
1215 raise ReadOnlyDatabaseError(f"Attempt to delete from read-only database '{self}'.")
1216 if columns and not rows:
1217 # If there are no columns, this operation is supposed to delete
1218 # everything (so we proceed as usual). But if there are columns,
1219 # but no rows, it was a constrained bulk operation where the
1220 # constraint is that no rows match, and we should short-circuit
1221 # while reporting that no rows were affected.
1222 return 0
1223 sql = table.delete()
1224 whereTerms = [table.columns[name] == sqlalchemy.sql.bindparam(name) for name in columns]
1225 if whereTerms:
1226 sql = sql.where(sqlalchemy.sql.and_(*whereTerms))
1227 return self._connection.execute(sql, *rows).rowcount
1229 def update(self, table: sqlalchemy.schema.Table, where: Dict[str, str], *rows: dict) -> int:
1230 """Update one or more rows in a table.
1232 Parameters
1233 ----------
1234 table : `sqlalchemy.schema.Table`
1235 Table containing the rows to be updated.
1236 where : `dict` [`str`, `str`]
1237 A mapping from the names of columns that will be used to search for
1238 existing rows to the keys that will hold these values in the
1239 ``rows`` dictionaries. Note that these may not be the same due to
1240 SQLAlchemy limitations.
1241 *rows
1242 Positional arguments are the rows to be updated. The keys in all
1243 dictionaries must be the same, and may correspond to either a
1244 value in the ``where`` dictionary or the name of a column to be
1245 updated.
1247 Returns
1248 -------
1249 count : `int`
1250 Number of rows matched (regardless of whether the update actually
1251 modified them).
1253 Raises
1254 ------
1255 ReadOnlyDatabaseError
1256 Raised if `isWriteable` returns `False` when this method is called.
1258 Notes
1259 -----
1260 May be used inside transaction contexts, so implementations may not
1261 perform operations that interrupt transactions.
1263 The default implementation should be sufficient for most derived
1264 classes.
1265 """
1266 if not (self.isWriteable() or table.key in self._tempTables):
1267 raise ReadOnlyDatabaseError(f"Attempt to update read-only database '{self}'.")
1268 if not rows:
1269 return 0
1270 sql = table.update().where(
1271 sqlalchemy.sql.and_(*[table.columns[k] == sqlalchemy.sql.bindparam(v) for k, v in where.items()])
1272 )
1273 return self._connection.execute(sql, *rows).rowcount
1275 def query(self, sql: sqlalchemy.sql.FromClause,
1276 *args: Any, **kwds: Any) -> sqlalchemy.engine.ResultProxy:
1277 """Run a SELECT query against the database.
1279 Parameters
1280 ----------
1281 sql : `sqlalchemy.sql.FromClause`
1282 A SQLAlchemy representation of a ``SELECT`` query.
1283 *args
1284 Additional positional arguments are forwarded to
1285 `sqlalchemy.engine.Connection.execute`.
1286 **kwds
1287 Additional keyword arguments are forwarded to
1288 `sqlalchemy.engine.Connection.execute`.
1290 Returns
1291 -------
1292 result : `sqlalchemy.engine.ResultProxy`
1293 Query results.
1295 Notes
1296 -----
1297 The default implementation should be sufficient for most derived
1298 classes.
1299 """
1300 # TODO: should we guard against non-SELECT queries here?
1301 return self._connection.execute(sql, *args, **kwds)
1303 origin: int
1304 """An integer ID that should be used as the default for any datasets,
1305 quanta, or other entities that use a (autoincrement, origin) compound
1306 primary key (`int`).
1307 """
1309 namespace: Optional[str]
1310 """The schema or namespace this database instance is associated with
1311 (`str` or `None`).
1312 """