Coverage for python/lsst/daf/butler/registry/interfaces/_database.py : 13%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = [
24 "Database",
25 "ReadOnlyDatabaseError",
26 "DatabaseConflictError",
27 "SchemaAlreadyDefinedError",
28 "StaticTablesContext",
29]
31from abc import ABC, abstractmethod
32from contextlib import contextmanager
33from typing import (
34 Any,
35 Callable,
36 Dict,
37 Iterable,
38 Iterator,
39 List,
40 Optional,
41 Sequence,
42 Set,
43 Tuple,
44)
45import uuid
46import warnings
48import astropy.time
49import sqlalchemy
51from ...core import ddl, time_utils
52from .._exceptions import ConflictingDefinitionError
55def _checkExistingTableDefinition(name: str, spec: ddl.TableSpec, inspection: List[Dict[str, Any]]) -> None:
56 """Test that the definition of a table in a `ddl.TableSpec` and from
57 database introspection are consistent.
59 Parameters
60 ----------
61 name : `str`
62 Name of the table (only used in error messages).
63 spec : `ddl.TableSpec`
64 Specification of the table.
65 inspection : `dict`
66 Dictionary returned by
67 `sqlalchemy.engine.reflection.Inspector.get_columns`.
69 Raises
70 ------
71 DatabaseConflictError
72 Raised if the definitions are inconsistent.
73 """
74 columnNames = [c["name"] for c in inspection]
75 if spec.fields.names != set(columnNames):
76 raise DatabaseConflictError(f"Table '{name}' exists but is defined differently in the database; "
77 f"specification has columns {list(spec.fields.names)}, while the "
78 f"table in the database has {columnNames}.")
81class ReadOnlyDatabaseError(RuntimeError):
82 """Exception raised when a write operation is called on a read-only
83 `Database`.
84 """
87class DatabaseConflictError(ConflictingDefinitionError):
88 """Exception raised when database content (row values or schema entities)
89 are inconsistent with what this client expects.
90 """
93class SchemaAlreadyDefinedError(RuntimeError):
94 """Exception raised when trying to initialize database schema when some
95 tables already exist.
96 """
99class StaticTablesContext:
100 """Helper class used to declare the static schema for a registry layer
101 in a database.
103 An instance of this class is returned by `Database.declareStaticTables`,
104 which should be the only way it should be constructed.
105 """
107 def __init__(self, db: Database):
108 self._db = db
109 self._foreignKeys: List[Tuple[sqlalchemy.schema.Table, sqlalchemy.schema.ForeignKeyConstraint]] = []
110 self._inspector = sqlalchemy.engine.reflection.Inspector(self._db._connection)
111 self._tableNames = frozenset(self._inspector.get_table_names(schema=self._db.namespace))
112 self._initializers: List[Callable[[Database], None]] = []
114 def addTable(self, name: str, spec: ddl.TableSpec) -> sqlalchemy.schema.Table:
115 """Add a new table to the schema, returning its sqlalchemy
116 representation.
118 The new table may not actually be created until the end of the
119 context created by `Database.declareStaticTables`, allowing tables
120 to be declared in any order even in the presence of foreign key
121 relationships.
122 """
123 name = self._db._mangleTableName(name)
124 if name in self._tableNames:
125 _checkExistingTableDefinition(name, spec, self._inspector.get_columns(name,
126 schema=self._db.namespace))
127 table = self._db._convertTableSpec(name, spec, self._db._metadata)
128 for foreignKeySpec in spec.foreignKeys:
129 self._foreignKeys.append(
130 (table, self._db._convertForeignKeySpec(name, foreignKeySpec, self._db._metadata))
131 )
132 return table
134 def addTableTuple(self, specs: Tuple[ddl.TableSpec, ...]) -> Tuple[sqlalchemy.schema.Table, ...]:
135 """Add a named tuple of tables to the schema, returning their
136 SQLAlchemy representations in a named tuple of the same type.
138 The new tables may not actually be created until the end of the
139 context created by `Database.declareStaticTables`, allowing tables
140 to be declared in any order even in the presence of foreign key
141 relationships.
143 Notes
144 -----
145 ``specs`` *must* be an instance of a type created by
146 `collections.namedtuple`, not just regular tuple, and the returned
147 object is guaranteed to be the same. Because `~collections.namedtuple`
148 is just a factory for `type` objects, not an actual type itself,
149 we cannot represent this with type annotations.
150 """
151 return specs._make(self.addTable(name, spec) # type: ignore
152 for name, spec in zip(specs._fields, specs)) # type: ignore
154 def addInitializer(self, initializer: Callable[[Database], None]) -> None:
155 """Add a method that does one-time initialization of a database.
157 Initialization can mean anything that changes state of a database
158 and needs to be done exactly once after database schema was created.
159 An example for that could be population of schema attributes.
161 Parameters
162 ----------
163 initializer : callable
164 Method of a single argument which is a `Database` instance.
165 """
166 self._initializers.append(initializer)
169class Database(ABC):
170 """An abstract interface that represents a particular database engine's
171 representation of a single schema/namespace/database.
173 Parameters
174 ----------
175 origin : `int`
176 An integer ID that should be used as the default for any datasets,
177 quanta, or other entities that use a (autoincrement, origin) compound
178 primary key.
179 connection : `sqlalchemy.engine.Connection`
180 The SQLAlchemy connection this `Database` wraps.
181 namespace : `str`, optional
182 Name of the schema or namespace this instance is associated with.
183 This is passed as the ``schema`` argument when constructing a
184 `sqlalchemy.schema.MetaData` instance. We use ``namespace`` instead to
185 avoid confusion between "schema means namespace" and "schema means
186 table definitions".
188 Notes
189 -----
190 `Database` requires all write operations to go through its special named
191 methods. Our write patterns are sufficiently simple that we don't really
192 need the full flexibility of SQL insert/update/delete syntax, and we need
193 non-standard (but common) functionality in these operations sufficiently
194 often that it seems worthwhile to provide our own generic API.
196 In contrast, `Database.query` allows arbitrary ``SELECT`` queries (via
197 their SQLAlchemy representation) to be run, as we expect these to require
198 significantly more sophistication while still being limited to standard
199 SQL.
201 `Database` itself has several underscore-prefixed attributes:
203 - ``_connection``: SQLAlchemy object representing the connection.
204 - ``_metadata``: the `sqlalchemy.schema.MetaData` object representing
205 the tables and other schema entities.
207 These are considered protected (derived classes may access them, but other
208 code should not), and read-only, aside from executing SQL via
209 ``_connection``.
210 """
212 def __init__(self, *, origin: int, connection: sqlalchemy.engine.Connection,
213 namespace: Optional[str] = None):
214 self.origin = origin
215 self.namespace = namespace
216 self._connection = connection
217 self._metadata: Optional[sqlalchemy.schema.MetaData] = None
218 self._tempTables: Set[str] = set()
220 def __repr__(self) -> str:
221 # Rather than try to reproduce all the parameters used to create
222 # the object, instead report the more useful information of the
223 # connection URL.
224 uri = str(self._connection.engine.url)
225 if self.namespace:
226 uri += f"#{self.namespace}"
227 return f'{type(self).__name__}("{uri}")'
229 @classmethod
230 def makeDefaultUri(cls, root: str) -> Optional[str]:
231 """Create a default connection URI appropriate for the given root
232 directory, or `None` if there can be no such default.
233 """
234 return None
236 @classmethod
237 def fromUri(cls, uri: str, *, origin: int, namespace: Optional[str] = None,
238 writeable: bool = True) -> Database:
239 """Construct a database from a SQLAlchemy URI.
241 Parameters
242 ----------
243 uri : `str`
244 A SQLAlchemy URI connection string.
245 origin : `int`
246 An integer ID that should be used as the default for any datasets,
247 quanta, or other entities that use a (autoincrement, origin)
248 compound primary key.
249 namespace : `str`, optional
250 A database namespace (i.e. schema) the new instance should be
251 associated with. If `None` (default), the namespace (if any) is
252 inferred from the URI.
253 writeable : `bool`, optional
254 If `True`, allow write operations on the database, including
255 ``CREATE TABLE``.
257 Returns
258 -------
259 db : `Database`
260 A new `Database` instance.
261 """
262 return cls.fromConnection(cls.connect(uri, writeable=writeable),
263 origin=origin,
264 namespace=namespace,
265 writeable=writeable)
267 @classmethod
268 @abstractmethod
269 def connect(cls, uri: str, *, writeable: bool = True) -> sqlalchemy.engine.Connection:
270 """Create a `sqlalchemy.engine.Connection` from a SQLAlchemy URI.
272 Parameters
273 ----------
274 uri : `str`
275 A SQLAlchemy URI connection string.
276 origin : `int`
277 An integer ID that should be used as the default for any datasets,
278 quanta, or other entities that use a (autoincrement, origin)
279 compound primary key.
280 writeable : `bool`, optional
281 If `True`, allow write operations on the database, including
282 ``CREATE TABLE``.
284 Returns
285 -------
286 connection : `sqlalchemy.engine.Connection`
287 A database connection.
289 Notes
290 -----
291 Subclasses that support other ways to connect to a database are
292 encouraged to add optional arguments to their implementation of this
293 method, as long as they maintain compatibility with the base class
294 call signature.
295 """
296 raise NotImplementedError()
298 @classmethod
299 @abstractmethod
300 def fromConnection(cls, connection: sqlalchemy.engine.Connection, *, origin: int,
301 namespace: Optional[str] = None, writeable: bool = True) -> Database:
302 """Create a new `Database` from an existing
303 `sqlalchemy.engine.Connection`.
305 Parameters
306 ----------
307 connection : `sqllachemy.engine.Connection`
308 The connection for the the database. May be shared between
309 `Database` instances.
310 origin : `int`
311 An integer ID that should be used as the default for any datasets,
312 quanta, or other entities that use a (autoincrement, origin)
313 compound primary key.
314 namespace : `str`, optional
315 A different database namespace (i.e. schema) the new instance
316 should be associated with. If `None` (default), the namespace
317 (if any) is inferred from the connection.
318 writeable : `bool`, optional
319 If `True`, allow write operations on the database, including
320 ``CREATE TABLE``.
322 Returns
323 -------
324 db : `Database`
325 A new `Database` instance.
327 Notes
328 -----
329 This method allows different `Database` instances to share the same
330 connection, which is desirable when they represent different namespaces
331 can be queried together. This also ties their transaction state,
332 however; starting a transaction in any database automatically starts
333 on in all other databases.
334 """
335 raise NotImplementedError()
337 @contextmanager
338 def transaction(self, *, interrupting: bool = False) -> Iterator:
339 """Return a context manager that represents a transaction.
341 Parameters
342 ----------
343 interrupting : `bool`
344 If `True`, this transaction block needs to be able to interrupt
345 any existing one in order to yield correct behavior.
346 """
347 assert not (interrupting and self._connection.in_transaction()), (
348 "Logic error in transaction nesting: an operation that would "
349 "interrupt the active transaction context has been requested."
350 )
351 if self._connection.in_transaction():
352 trans = self._connection.begin_nested()
353 else:
354 # Use a regular (non-savepoint) transaction only for the outermost
355 # context.
356 trans = self._connection.begin()
357 try:
358 yield
359 trans.commit()
360 except BaseException:
361 trans.rollback()
362 raise
364 @contextmanager
365 def declareStaticTables(self, *, create: bool) -> Iterator[StaticTablesContext]:
366 """Return a context manager in which the database's static DDL schema
367 can be declared.
369 Parameters
370 ----------
371 create : `bool`
372 If `True`, attempt to create all tables at the end of the context.
373 If `False`, they will be assumed to already exist.
375 Returns
376 -------
377 schema : `StaticTablesContext`
378 A helper object that is used to add new tables.
380 Raises
381 ------
382 ReadOnlyDatabaseError
383 Raised if ``create`` is `True`, `Database.isWriteable` is `False`,
384 and one or more declared tables do not already exist.
386 Examples
387 --------
388 Given a `Database` instance ``db``::
390 with db.declareStaticTables(create=True) as schema:
391 schema.addTable("table1", TableSpec(...))
392 schema.addTable("table2", TableSpec(...))
394 Notes
395 -----
396 A database's static DDL schema must be declared before any dynamic
397 tables are managed via calls to `ensureTableExists` or
398 `getExistingTable`. The order in which static schema tables are added
399 inside the context block is unimportant; they will automatically be
400 sorted and added in an order consistent with their foreign key
401 relationships.
402 """
403 if create and not self.isWriteable():
404 raise ReadOnlyDatabaseError(f"Cannot create tables in read-only database {self}.")
405 self._metadata = sqlalchemy.MetaData(schema=self.namespace)
406 try:
407 context = StaticTablesContext(self)
408 if create and context._tableNames:
409 # Looks like database is already initalized, to avoid danger
410 # of modifying/destroying valid schema we refuse to do
411 # anything in this case
412 raise SchemaAlreadyDefinedError(f"Cannot create tables in non-empty database {self}.")
413 yield context
414 for table, foreignKey in context._foreignKeys:
415 table.append_constraint(foreignKey)
416 if create:
417 if self.namespace is not None:
418 if self.namespace not in context._inspector.get_schema_names():
419 self._connection.execute(sqlalchemy.schema.CreateSchema(self.namespace))
420 # In our tables we have columns that make use of sqlalchemy
421 # Sequence objects. There is currently a bug in sqlalchemy that
422 # causes a deprecation warning to be thrown on a property of
423 # the Sequence object when the repr for the sequence is
424 # created. Here a filter is used to catch these deprecation
425 # warnings when tables are created.
426 with warnings.catch_warnings():
427 warnings.simplefilter("ignore", category=sqlalchemy.exc.SADeprecationWarning)
428 self._metadata.create_all(self._connection)
429 # call all initializer methods sequentially
430 for init in context._initializers:
431 init(self)
432 except BaseException:
433 self._metadata = None
434 raise
436 @abstractmethod
437 def isWriteable(self) -> bool:
438 """Return `True` if this database can be modified by this client.
439 """
440 raise NotImplementedError()
442 @abstractmethod
443 def __str__(self) -> str:
444 """Return a human-readable identifier for this `Database`, including
445 any namespace or schema that identifies its names within a `Registry`.
446 """
447 raise NotImplementedError()
449 def shrinkDatabaseEntityName(self, original: str) -> str:
450 """Return a version of the given name that fits within this database
451 engine's length limits for table, constraint, indexes, and sequence
452 names.
454 Implementations should not assume that simple truncation is safe,
455 because multiple long names often begin with the same prefix.
457 The default implementation simply returns the given name.
459 Parameters
460 ----------
461 original : `str`
462 The original name.
464 Returns
465 -------
466 shrunk : `str`
467 The new, possibly shortened name.
468 """
469 return original
471 def expandDatabaseEntityName(self, shrunk: str) -> str:
472 """Retrieve the original name for a database entity that was too long
473 to fit within the database engine's limits.
475 Parameters
476 ----------
477 original : `str`
478 The original name.
480 Returns
481 -------
482 shrunk : `str`
483 The new, possibly shortened name.
484 """
485 return shrunk
487 def _mangleTableName(self, name: str) -> str:
488 """Map a logical, user-visible table name to the true table name used
489 in the database.
491 The default implementation returns the given name unchanged.
493 Parameters
494 ----------
495 name : `str`
496 Input table name. Should not include a namespace (i.e. schema)
497 prefix.
499 Returns
500 -------
501 mangled : `str`
502 Mangled version of the table name (still with no namespace prefix).
504 Notes
505 -----
506 Reimplementations of this method must be idempotent - mangling an
507 already-mangled name must have no effect.
508 """
509 return name
511 def _makeColumnConstraints(self, table: str, spec: ddl.FieldSpec) -> List[sqlalchemy.CheckConstraint]:
512 """Create constraints based on this spec.
514 Parameters
515 ----------
516 table : `str`
517 Name of the table this column is being added to.
518 spec : `FieldSpec`
519 Specification for the field to be added.
521 Returns
522 -------
523 constraint : `list` of `sqlalchemy.CheckConstraint`
524 Constraint added for this column.
525 """
526 # By default we return no additional constraints
527 return []
529 def _convertFieldSpec(self, table: str, spec: ddl.FieldSpec, metadata: sqlalchemy.MetaData,
530 **kwds: Any) -> sqlalchemy.schema.Column:
531 """Convert a `FieldSpec` to a `sqlalchemy.schema.Column`.
533 Parameters
534 ----------
535 table : `str`
536 Name of the table this column is being added to.
537 spec : `FieldSpec`
538 Specification for the field to be added.
539 metadata : `sqlalchemy.MetaData`
540 SQLAlchemy representation of the DDL schema this field's table is
541 being added to.
542 **kwds
543 Additional keyword arguments to forward to the
544 `sqlalchemy.schema.Column` constructor. This is provided to make
545 it easier for derived classes to delegate to ``super()`` while
546 making only minor changes.
548 Returns
549 -------
550 column : `sqlalchemy.schema.Column`
551 SQLAlchemy representation of the field.
552 """
553 args = [spec.name, spec.getSizedColumnType()]
554 if spec.autoincrement:
555 # Generate a sequence to use for auto incrementing for databases
556 # that do not support it natively. This will be ignored by
557 # sqlalchemy for databases that do support it.
558 args.append(sqlalchemy.Sequence(self.shrinkDatabaseEntityName(f"{table}_seq_{spec.name}"),
559 metadata=metadata))
560 assert spec.doc is None or isinstance(spec.doc, str), f"Bad doc for {table}.{spec.name}."
561 return sqlalchemy.schema.Column(*args, nullable=spec.nullable, primary_key=spec.primaryKey,
562 comment=spec.doc, **kwds)
564 def _convertForeignKeySpec(self, table: str, spec: ddl.ForeignKeySpec, metadata: sqlalchemy.MetaData,
565 **kwds: Any) -> sqlalchemy.schema.ForeignKeyConstraint:
566 """Convert a `ForeignKeySpec` to a
567 `sqlalchemy.schema.ForeignKeyConstraint`.
569 Parameters
570 ----------
571 table : `str`
572 Name of the table this foreign key is being added to.
573 spec : `ForeignKeySpec`
574 Specification for the foreign key to be added.
575 metadata : `sqlalchemy.MetaData`
576 SQLAlchemy representation of the DDL schema this constraint is
577 being added to.
578 **kwds
579 Additional keyword arguments to forward to the
580 `sqlalchemy.schema.ForeignKeyConstraint` constructor. This is
581 provided to make it easier for derived classes to delegate to
582 ``super()`` while making only minor changes.
584 Returns
585 -------
586 constraint : `sqlalchemy.schema.ForeignKeyConstraint`
587 SQLAlchemy representation of the constraint.
588 """
589 name = self.shrinkDatabaseEntityName(
590 "_".join(["fkey", table, self._mangleTableName(spec.table)]
591 + list(spec.target) + list(spec.source))
592 )
593 return sqlalchemy.schema.ForeignKeyConstraint(
594 spec.source,
595 [f"{self._mangleTableName(spec.table)}.{col}" for col in spec.target],
596 name=name,
597 ondelete=spec.onDelete
598 )
600 def _convertTableSpec(self, name: str, spec: ddl.TableSpec, metadata: sqlalchemy.MetaData,
601 **kwds: Any) -> sqlalchemy.schema.Table:
602 """Convert a `TableSpec` to a `sqlalchemy.schema.Table`.
604 Parameters
605 ----------
606 spec : `TableSpec`
607 Specification for the foreign key to be added.
608 metadata : `sqlalchemy.MetaData`
609 SQLAlchemy representation of the DDL schema this table is being
610 added to.
611 **kwds
612 Additional keyword arguments to forward to the
613 `sqlalchemy.schema.Table` constructor. This is provided to make it
614 easier for derived classes to delegate to ``super()`` while making
615 only minor changes.
617 Returns
618 -------
619 table : `sqlalchemy.schema.Table`
620 SQLAlchemy representation of the table.
622 Notes
623 -----
624 This method does not handle ``spec.foreignKeys`` at all, in order to
625 avoid circular dependencies. These are added by higher-level logic in
626 `ensureTableExists`, `getExistingTable`, and `declareStaticTables`.
627 """
628 name = self._mangleTableName(name)
629 args = [self._convertFieldSpec(name, fieldSpec, metadata) for fieldSpec in spec.fields]
631 # Add any column constraints
632 for fieldSpec in spec.fields:
633 args.extend(self._makeColumnConstraints(name, fieldSpec))
635 # Track indexes added for primary key and unique constraints, to make
636 # sure we don't add duplicate explicit or foreign key indexes for
637 # those.
638 allIndexes = {tuple(fieldSpec.name for fieldSpec in spec.fields if fieldSpec.primaryKey)}
639 args.extend(
640 sqlalchemy.schema.UniqueConstraint(
641 *columns,
642 name=self.shrinkDatabaseEntityName("_".join([name, "unq"] + list(columns)))
643 )
644 for columns in spec.unique
645 )
646 allIndexes.update(spec.unique)
647 args.extend(
648 sqlalchemy.schema.Index(
649 self.shrinkDatabaseEntityName("_".join([name, "idx"] + list(columns))),
650 *columns,
651 unique=(columns in spec.unique)
652 )
653 for columns in spec.indexes if columns not in allIndexes
654 )
655 allIndexes.update(spec.indexes)
656 args.extend(
657 sqlalchemy.schema.Index(
658 self.shrinkDatabaseEntityName("_".join((name, "fkidx") + fk.source)),
659 *fk.source,
660 )
661 for fk in spec.foreignKeys if fk.addIndex and fk.source not in allIndexes
662 )
663 assert spec.doc is None or isinstance(spec.doc, str), f"Bad doc for {name}."
664 return sqlalchemy.schema.Table(name, metadata, *args, comment=spec.doc, info=spec, **kwds)
666 def ensureTableExists(self, name: str, spec: ddl.TableSpec) -> sqlalchemy.schema.Table:
667 """Ensure that a table with the given name and specification exists,
668 creating it if necessary.
670 Parameters
671 ----------
672 name : `str`
673 Name of the table (not including namespace qualifiers).
674 spec : `TableSpec`
675 Specification for the table. This will be used when creating the
676 table, and *may* be used when obtaining an existing table to check
677 for consistency, but no such check is guaranteed.
679 Returns
680 -------
681 table : `sqlalchemy.schema.Table`
682 SQLAlchemy representation of the table.
684 Raises
685 ------
686 ReadOnlyDatabaseError
687 Raised if `isWriteable` returns `False`, and the table does not
688 already exist.
689 DatabaseConflictError
690 Raised if the table exists but ``spec`` is inconsistent with its
691 definition.
693 Notes
694 -----
695 This method may not be called within transactions. It may be called on
696 read-only databases if and only if the table does in fact already
697 exist.
699 Subclasses may override this method, but usually should not need to.
700 """
701 assert not self._connection.in_transaction(), "Table creation interrupts transactions."
702 assert self._metadata is not None, "Static tables must be declared before dynamic tables."
703 table = self.getExistingTable(name, spec)
704 if table is not None:
705 return table
706 if not self.isWriteable():
707 raise ReadOnlyDatabaseError(
708 f"Table {name} does not exist, and cannot be created "
709 f"because database {self} is read-only."
710 )
711 table = self._convertTableSpec(name, spec, self._metadata)
712 for foreignKeySpec in spec.foreignKeys:
713 table.append_constraint(self._convertForeignKeySpec(name, foreignKeySpec, self._metadata))
714 table.create(self._connection)
715 return table
717 def getExistingTable(self, name: str, spec: ddl.TableSpec) -> Optional[sqlalchemy.schema.Table]:
718 """Obtain an existing table with the given name and specification.
720 Parameters
721 ----------
722 name : `str`
723 Name of the table (not including namespace qualifiers).
724 spec : `TableSpec`
725 Specification for the table. This will be used when creating the
726 SQLAlchemy representation of the table, and it is used to
727 check that the actual table in the database is consistent.
729 Returns
730 -------
731 table : `sqlalchemy.schema.Table` or `None`
732 SQLAlchemy representation of the table, or `None` if it does not
733 exist.
735 Raises
736 ------
737 DatabaseConflictError
738 Raised if the table exists but ``spec`` is inconsistent with its
739 definition.
741 Notes
742 -----
743 This method can be called within transactions and never modifies the
744 database.
746 Subclasses may override this method, but usually should not need to.
747 """
748 assert self._metadata is not None, "Static tables must be declared before dynamic tables."
749 name = self._mangleTableName(name)
750 table = self._metadata.tables.get(name if self.namespace is None else f"{self.namespace}.{name}")
751 if table is not None:
752 if spec.fields.names != set(table.columns.keys()):
753 raise DatabaseConflictError(f"Table '{name}' has already been defined differently; the new "
754 f"specification has columns {list(spec.fields.names)}, while "
755 f"the previous definition has {list(table.columns.keys())}.")
756 else:
757 inspector = sqlalchemy.engine.reflection.Inspector(self._connection)
758 if name in inspector.get_table_names(schema=self.namespace):
759 _checkExistingTableDefinition(name, spec, inspector.get_columns(name, schema=self.namespace))
760 table = self._convertTableSpec(name, spec, self._metadata)
761 for foreignKeySpec in spec.foreignKeys:
762 table.append_constraint(self._convertForeignKeySpec(name, foreignKeySpec, self._metadata))
763 return table
764 return table
766 def makeTemporaryTable(self, spec: ddl.TableSpec, name: Optional[str] = None) -> sqlalchemy.schema.Table:
767 """Create a temporary table.
769 Parameters
770 ----------
771 spec : `TableSpec`
772 Specification for the table.
773 name : `str`, optional
774 A unique (within this session/connetion) name for the table.
775 Subclasses may override to modify the actual name used. If not
776 provided, a unique name will be generated.
778 Returns
779 -------
780 table : `sqlalchemy.schema.Table`
781 SQLAlchemy representation of the table.
783 Notes
784 -----
785 Temporary tables may be created, dropped, and written to even in
786 read-only databases - at least according to the Python-level
787 protections in the `Database` classes. Server permissions may say
788 otherwise, but in that case they probably need to be modified to
789 support the full range of expected read-only butler behavior.
791 Temporary table rows are guaranteed to be dropped when a connection is
792 closed. `Database` implementations are permitted to allow the table to
793 remain as long as this is transparent to the user (i.e. "creating" the
794 temporary table in a new session should not be an error, even if it
795 does nothing).
797 It may not be possible to use temporary tables within transactions with
798 some database engines (or configurations thereof).
799 """
800 if name is None:
801 name = f"tmp_{uuid.uuid4().hex}"
802 table = self._convertTableSpec(name, spec, self._metadata, prefixes=['TEMPORARY'],
803 schema=sqlalchemy.schema.BLANK_SCHEMA)
804 if table.key in self._tempTables:
805 if table.key != name:
806 raise ValueError(f"A temporary table with name {name} (transformed to {table.key} by "
807 f"Database) already exists.")
808 for foreignKeySpec in spec.foreignKeys:
809 table.append_constraint(self._convertForeignKeySpec(name, foreignKeySpec, self._metadata))
810 table.create(self._connection)
811 self._tempTables.add(table.key)
812 return table
814 def dropTemporaryTable(self, table: sqlalchemy.schema.Table) -> None:
815 """Drop a temporary table.
817 Parameters
818 ----------
819 table : `sqlalchemy.schema.Table`
820 A SQLAlchemy object returned by a previous call to
821 `makeTemporaryTable`.
822 """
823 if table.key in self._tempTables:
824 table.drop(self._connection)
825 self._tempTables.remove(table.key)
826 else:
827 raise TypeError(f"Table {table.key} was not created by makeTemporaryTable.")
829 def sync(self, table: sqlalchemy.schema.Table, *,
830 keys: Dict[str, Any],
831 compared: Optional[Dict[str, Any]] = None,
832 extra: Optional[Dict[str, Any]] = None,
833 returning: Optional[Sequence[str]] = None,
834 ) -> Tuple[Optional[Dict[str, Any]], bool]:
835 """Insert into a table as necessary to ensure database contains
836 values equivalent to the given ones.
838 Parameters
839 ----------
840 table : `sqlalchemy.schema.Table`
841 Table to be queried and possibly inserted into.
842 keys : `dict`
843 Column name-value pairs used to search for an existing row; must
844 be a combination that can be used to select a single row if one
845 exists. If such a row does not exist, these values are used in
846 the insert.
847 compared : `dict`, optional
848 Column name-value pairs that are compared to those in any existing
849 row. If such a row does not exist, these rows are used in the
850 insert.
851 extra : `dict`, optional
852 Column name-value pairs that are ignored if a matching row exists,
853 but used in an insert if one is necessary.
854 returning : `~collections.abc.Sequence` of `str`, optional
855 The names of columns whose values should be returned.
857 Returns
858 -------
859 row : `dict`, optional
860 The value of the fields indicated by ``returning``, or `None` if
861 ``returning`` is `None`.
862 inserted : `bool`
863 If `True`, a new row was inserted.
865 Raises
866 ------
867 DatabaseConflictError
868 Raised if the values in ``compared`` do not match the values in the
869 database.
870 ReadOnlyDatabaseError
871 Raised if `isWriteable` returns `False`, and no matching record
872 already exists.
874 Notes
875 -----
876 This method may not be called within transactions. It may be called on
877 read-only databases if and only if the matching row does in fact
878 already exist.
879 """
881 def check() -> Tuple[int, Optional[List[str]], Optional[List]]:
882 """Query for a row that matches the ``key`` argument, and compare
883 to what was given by the caller.
885 Returns
886 -------
887 n : `int`
888 Number of matching rows. ``n != 1`` is always an error, but
889 it's a different kind of error depending on where `check` is
890 being called.
891 bad : `list` of `str`, or `None`
892 The subset of the keys of ``compared`` for which the existing
893 values did not match the given one. Once again, ``not bad``
894 is always an error, but a different kind on context. `None`
895 if ``n != 1``
896 result : `list` or `None`
897 Results in the database that correspond to the columns given
898 in ``returning``, or `None` if ``returning is None``.
899 """
900 toSelect: Set[str] = set()
901 if compared is not None:
902 toSelect.update(compared.keys())
903 if returning is not None:
904 toSelect.update(returning)
905 if not toSelect:
906 # Need to select some column, even if we just want to see
907 # how many rows we get back.
908 toSelect.add(next(iter(keys.keys())))
909 selectSql = sqlalchemy.sql.select(
910 [table.columns[k].label(k) for k in toSelect]
911 ).select_from(table).where(
912 sqlalchemy.sql.and_(*[table.columns[k] == v for k, v in keys.items()])
913 )
914 fetched = list(self._connection.execute(selectSql).fetchall())
915 if len(fetched) != 1:
916 return len(fetched), None, None
917 existing = fetched[0]
918 if compared is not None:
920 def safeNotEqual(a: Any, b: Any) -> bool:
921 if isinstance(a, astropy.time.Time):
922 return not time_utils.times_equal(a, b)
923 return a != b
925 inconsistencies = [f"{k}: {existing[k]!r} != {v!r}"
926 for k, v in compared.items()
927 if safeNotEqual(existing[k], v)]
928 else:
929 inconsistencies = []
930 if returning is not None:
931 toReturn: Optional[list] = [existing[k] for k in returning]
932 else:
933 toReturn = None
934 return 1, inconsistencies, toReturn
936 if self.isWriteable() or table.key in self._tempTables:
937 # Database is writeable. Try an insert first, but allow it to fail
938 # (in only specific ways).
939 row = keys.copy()
940 if compared is not None:
941 row.update(compared)
942 if extra is not None:
943 row.update(extra)
944 insertSql = table.insert().values(row)
945 try:
946 with self.transaction(interrupting=True):
947 self._connection.execute(insertSql)
948 # Need to perform check() for this branch inside the
949 # transaction, so we roll back an insert that didn't do
950 # what we expected. That limits the extent to which we
951 # can reduce duplication between this block and the other
952 # ones that perform similar logic.
953 n, bad, result = check()
954 if n < 1:
955 raise RuntimeError("Insertion in sync did not seem to affect table. This is a bug.")
956 elif n > 1:
957 raise RuntimeError(f"Keys passed to sync {keys.keys()} do not comprise a "
958 f"unique constraint for table {table.name}.")
959 elif bad:
960 raise RuntimeError(
961 f"Conflict ({bad}) in sync after successful insert; this is "
962 f"possible if the same table is being updated by a concurrent "
963 f"process that isn't using sync, but it may also be a bug in "
964 f"daf_butler."
965 )
966 # No exceptions, so it looks like we inserted the requested row
967 # successfully.
968 inserted = True
969 except sqlalchemy.exc.IntegrityError as err:
970 # Most likely cause is that an equivalent row already exists,
971 # but it could also be some other constraint. Query for the
972 # row we think we matched to resolve that question.
973 n, bad, result = check()
974 if n < 1:
975 # There was no matched row; insertion failed for some
976 # completely different reason. Just re-raise the original
977 # IntegrityError.
978 raise
979 elif n > 2:
980 # There were multiple matched rows, which means we
981 # conflicted *and* the arguments were bad to begin with.
982 raise RuntimeError(f"Keys passed to sync {keys.keys()} do not comprise a "
983 f"unique constraint for table {table.name}.") from err
984 elif bad:
985 # No logic bug, but data conflicted on the keys given.
986 raise DatabaseConflictError(f"Conflict in sync for table "
987 f"{table.name} on column(s) {bad}.") from err
988 # The desired row is already present and consistent with what
989 # we tried to insert.
990 inserted = False
991 else:
992 assert not self._connection.in_transaction(), (
993 "Calling sync within a transaction block is an error even "
994 "on a read-only database."
995 )
996 # Database is not writeable; just see if the row exists.
997 n, bad, result = check()
998 if n < 1:
999 raise ReadOnlyDatabaseError("sync needs to insert, but database is read-only.")
1000 elif n > 1:
1001 raise RuntimeError("Keys passed to sync do not comprise a unique constraint.")
1002 elif bad:
1003 raise DatabaseConflictError(f"Conflict in sync on column(s) {bad}.")
1004 inserted = False
1005 if returning is None:
1006 return None, inserted
1007 else:
1008 assert result is not None
1009 return {k: v for k, v in zip(returning, result)}, inserted
1011 def insert(self, table: sqlalchemy.schema.Table, *rows: dict, returnIds: bool = False,
1012 select: Optional[sqlalchemy.sql.Select] = None,
1013 names: Optional[Iterable[str]] = None,
1014 ) -> Optional[List[int]]:
1015 """Insert one or more rows into a table, optionally returning
1016 autoincrement primary key values.
1018 Parameters
1019 ----------
1020 table : `sqlalchemy.schema.Table`
1021 Table rows should be inserted into.
1022 returnIds: `bool`
1023 If `True` (`False` is default), return the values of the table's
1024 autoincrement primary key field (which much exist).
1025 select : `sqlalchemy.sql.Select`, optional
1026 A SELECT query expression to insert rows from. Cannot be provided
1027 with either ``rows`` or ``returnIds=True``.
1028 names : `Iterable` [ `str` ], optional
1029 Names of columns in ``table`` to be populated, ordered to match the
1030 columns returned by ``select``. Ignored if ``select`` is `None`.
1031 If not provided, the columns returned by ``select`` must be named
1032 to match the desired columns of ``table``.
1033 *rows
1034 Positional arguments are the rows to be inserted, as dictionaries
1035 mapping column name to value. The keys in all dictionaries must
1036 be the same.
1038 Returns
1039 -------
1040 ids : `None`, or `list` of `int`
1041 If ``returnIds`` is `True`, a `list` containing the inserted
1042 values for the table's autoincrement primary key.
1044 Raises
1045 ------
1046 ReadOnlyDatabaseError
1047 Raised if `isWriteable` returns `False` when this method is called.
1049 Notes
1050 -----
1051 The default implementation uses bulk insert syntax when ``returnIds``
1052 is `False`, and a loop over single-row insert operations when it is
1053 `True`.
1055 Derived classes should reimplement when they can provide a more
1056 efficient implementation (especially for the latter case).
1058 May be used inside transaction contexts, so implementations may not
1059 perform operations that interrupt transactions.
1060 """
1061 if not (self.isWriteable() or table.key in self._tempTables):
1062 raise ReadOnlyDatabaseError(f"Attempt to insert into read-only database '{self}'.")
1063 if select is not None and (rows or returnIds):
1064 raise TypeError("'select' is incompatible with passing value rows or returnIds=True.")
1065 if not rows and select is None:
1066 if returnIds:
1067 return []
1068 else:
1069 return None
1070 if not returnIds:
1071 if select is not None:
1072 if names is None:
1073 names = select.columns.keys()
1074 self._connection.execute(table.insert().from_select(names, select))
1075 else:
1076 self._connection.execute(table.insert(), *rows)
1077 return None
1078 else:
1079 sql = table.insert()
1080 return [self._connection.execute(sql, row).inserted_primary_key[0] for row in rows]
1082 @abstractmethod
1083 def replace(self, table: sqlalchemy.schema.Table, *rows: dict) -> None:
1084 """Insert one or more rows into a table, replacing any existing rows
1085 for which insertion of a new row would violate the primary key
1086 constraint.
1088 Parameters
1089 ----------
1090 table : `sqlalchemy.schema.Table`
1091 Table rows should be inserted into.
1092 *rows
1093 Positional arguments are the rows to be inserted, as dictionaries
1094 mapping column name to value. The keys in all dictionaries must
1095 be the same.
1097 Raises
1098 ------
1099 ReadOnlyDatabaseError
1100 Raised if `isWriteable` returns `False` when this method is called.
1102 Notes
1103 -----
1104 May be used inside transaction contexts, so implementations may not
1105 perform operations that interrupt transactions.
1107 Implementations should raise a `sqlalchemy.exc.IntegrityError`
1108 exception when a constraint other than the primary key would be
1109 violated.
1111 Implementations are not required to support `replace` on tables
1112 with autoincrement keys.
1113 """
1114 raise NotImplementedError()
1116 def delete(self, table: sqlalchemy.schema.Table, columns: Iterable[str], *rows: dict) -> int:
1117 """Delete one or more rows from a table.
1119 Parameters
1120 ----------
1121 table : `sqlalchemy.schema.Table`
1122 Table that rows should be deleted from.
1123 columns: `~collections.abc.Iterable` of `str`
1124 The names of columns that will be used to constrain the rows to
1125 be deleted; these will be combined via ``AND`` to form the
1126 ``WHERE`` clause of the delete query.
1127 *rows
1128 Positional arguments are the keys of rows to be deleted, as
1129 dictionaries mapping column name to value. The keys in all
1130 dictionaries must exactly the names in ``columns``.
1132 Returns
1133 -------
1134 count : `int`
1135 Number of rows deleted.
1137 Raises
1138 ------
1139 ReadOnlyDatabaseError
1140 Raised if `isWriteable` returns `False` when this method is called.
1142 Notes
1143 -----
1144 May be used inside transaction contexts, so implementations may not
1145 perform operations that interrupt transactions.
1147 The default implementation should be sufficient for most derived
1148 classes.
1149 """
1150 if not (self.isWriteable() or table.key in self._tempTables):
1151 raise ReadOnlyDatabaseError(f"Attempt to delete from read-only database '{self}'.")
1152 if columns and not rows:
1153 # If there are no columns, this operation is supposed to delete
1154 # everything (so we proceed as usual). But if there are columns,
1155 # but no rows, it was a constrained bulk operation where the
1156 # constraint is that no rows match, and we should short-circuit
1157 # while reporting that no rows were affected.
1158 return 0
1159 sql = table.delete()
1160 whereTerms = [table.columns[name] == sqlalchemy.sql.bindparam(name) for name in columns]
1161 if whereTerms:
1162 sql = sql.where(sqlalchemy.sql.and_(*whereTerms))
1163 return self._connection.execute(sql, *rows).rowcount
1165 def update(self, table: sqlalchemy.schema.Table, where: Dict[str, str], *rows: dict) -> int:
1166 """Update one or more rows in a table.
1168 Parameters
1169 ----------
1170 table : `sqlalchemy.schema.Table`
1171 Table containing the rows to be updated.
1172 where : `dict` [`str`, `str`]
1173 A mapping from the names of columns that will be used to search for
1174 existing rows to the keys that will hold these values in the
1175 ``rows`` dictionaries. Note that these may not be the same due to
1176 SQLAlchemy limitations.
1177 *rows
1178 Positional arguments are the rows to be updated. The keys in all
1179 dictionaries must be the same, and may correspond to either a
1180 value in the ``where`` dictionary or the name of a column to be
1181 updated.
1183 Returns
1184 -------
1185 count : `int`
1186 Number of rows matched (regardless of whether the update actually
1187 modified them).
1189 Raises
1190 ------
1191 ReadOnlyDatabaseError
1192 Raised if `isWriteable` returns `False` when this method is called.
1194 Notes
1195 -----
1196 May be used inside transaction contexts, so implementations may not
1197 perform operations that interrupt transactions.
1199 The default implementation should be sufficient for most derived
1200 classes.
1201 """
1202 if not (self.isWriteable() or table.key in self._tempTables):
1203 raise ReadOnlyDatabaseError(f"Attempt to update read-only database '{self}'.")
1204 if not rows:
1205 return 0
1206 sql = table.update().where(
1207 sqlalchemy.sql.and_(*[table.columns[k] == sqlalchemy.sql.bindparam(v) for k, v in where.items()])
1208 )
1209 return self._connection.execute(sql, *rows).rowcount
1211 def query(self, sql: sqlalchemy.sql.FromClause,
1212 *args: Any, **kwds: Any) -> sqlalchemy.engine.ResultProxy:
1213 """Run a SELECT query against the database.
1215 Parameters
1216 ----------
1217 sql : `sqlalchemy.sql.FromClause`
1218 A SQLAlchemy representation of a ``SELECT`` query.
1219 *args
1220 Additional positional arguments are forwarded to
1221 `sqlalchemy.engine.Connection.execute`.
1222 **kwds
1223 Additional keyword arguments are forwarded to
1224 `sqlalchemy.engine.Connection.execute`.
1226 Returns
1227 -------
1228 result : `sqlalchemy.engine.ResultProxy`
1229 Query results.
1231 Notes
1232 -----
1233 The default implementation should be sufficient for most derived
1234 classes.
1235 """
1236 # TODO: should we guard against non-SELECT queries here?
1237 return self._connection.execute(sql, *args, **kwds)
1239 origin: int
1240 """An integer ID that should be used as the default for any datasets,
1241 quanta, or other entities that use a (autoincrement, origin) compound
1242 primary key (`int`).
1243 """
1245 namespace: Optional[str]
1246 """The schema or namespace this database instance is associated with
1247 (`str` or `None`).
1248 """