Coverage for python/lsst/daf/butler/registry/interfaces/_database.py : 14%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = [
24 "Database",
25 "ReadOnlyDatabaseError",
26 "DatabaseConflictError",
27 "SchemaAlreadyDefinedError",
28 "StaticTablesContext",
29]
31from abc import ABC, abstractmethod
32from contextlib import contextmanager
33from typing import (
34 Any,
35 Callable,
36 Dict,
37 Iterable,
38 Iterator,
39 List,
40 Optional,
41 Sequence,
42 Set,
43 Tuple,
44)
45import warnings
47import astropy.time
48import sqlalchemy
50from ...core import ddl, time_utils
51from .._exceptions import ConflictingDefinitionError
54def _checkExistingTableDefinition(name: str, spec: ddl.TableSpec, inspection: List[Dict[str, Any]]) -> None:
55 """Test that the definition of a table in a `ddl.TableSpec` and from
56 database introspection are consistent.
58 Parameters
59 ----------
60 name : `str`
61 Name of the table (only used in error messages).
62 spec : `ddl.TableSpec`
63 Specification of the table.
64 inspection : `dict`
65 Dictionary returned by
66 `sqlalchemy.engine.reflection.Inspector.get_columns`.
68 Raises
69 ------
70 DatabaseConflictError
71 Raised if the definitions are inconsistent.
72 """
73 columnNames = [c["name"] for c in inspection]
74 if spec.fields.names != set(columnNames):
75 raise DatabaseConflictError(f"Table '{name}' exists but is defined differently in the database; "
76 f"specification has columns {list(spec.fields.names)}, while the "
77 f"table in the database has {columnNames}.")
80class ReadOnlyDatabaseError(RuntimeError):
81 """Exception raised when a write operation is called on a read-only
82 `Database`.
83 """
86class DatabaseConflictError(ConflictingDefinitionError):
87 """Exception raised when database content (row values or schema entities)
88 are inconsistent with what this client expects.
89 """
92class SchemaAlreadyDefinedError(RuntimeError):
93 """Exception raised when trying to initialize database schema when some
94 tables already exist.
95 """
98class StaticTablesContext:
99 """Helper class used to declare the static schema for a registry layer
100 in a database.
102 An instance of this class is returned by `Database.declareStaticTables`,
103 which should be the only way it should be constructed.
104 """
106 def __init__(self, db: Database):
107 self._db = db
108 self._foreignKeys: List[Tuple[sqlalchemy.schema.Table, sqlalchemy.schema.ForeignKeyConstraint]] = []
109 self._inspector = sqlalchemy.engine.reflection.Inspector(self._db._connection)
110 self._tableNames = frozenset(self._inspector.get_table_names(schema=self._db.namespace))
111 self._initializers: List[Callable[[Database], None]] = []
113 def addTable(self, name: str, spec: ddl.TableSpec) -> sqlalchemy.schema.Table:
114 """Add a new table to the schema, returning its sqlalchemy
115 representation.
117 The new table may not actually be created until the end of the
118 context created by `Database.declareStaticTables`, allowing tables
119 to be declared in any order even in the presence of foreign key
120 relationships.
121 """
122 name = self._db._mangleTableName(name)
123 if name in self._tableNames:
124 _checkExistingTableDefinition(name, spec, self._inspector.get_columns(name,
125 schema=self._db.namespace))
126 table = self._db._convertTableSpec(name, spec, self._db._metadata)
127 for foreignKeySpec in spec.foreignKeys:
128 self._foreignKeys.append(
129 (table, self._db._convertForeignKeySpec(name, foreignKeySpec, self._db._metadata))
130 )
131 return table
133 def addTableTuple(self, specs: Tuple[ddl.TableSpec, ...]) -> Tuple[sqlalchemy.schema.Table, ...]:
134 """Add a named tuple of tables to the schema, returning their
135 SQLAlchemy representations in a named tuple of the same type.
137 The new tables may not actually be created until the end of the
138 context created by `Database.declareStaticTables`, allowing tables
139 to be declared in any order even in the presence of foreign key
140 relationships.
142 Notes
143 -----
144 ``specs`` *must* be an instance of a type created by
145 `collections.namedtuple`, not just regular tuple, and the returned
146 object is guaranteed to be the same. Because `~collections.namedtuple`
147 is just a factory for `type` objects, not an actual type itself,
148 we cannot represent this with type annotations.
149 """
150 return specs._make(self.addTable(name, spec) # type: ignore
151 for name, spec in zip(specs._fields, specs)) # type: ignore
153 def addInitializer(self, initializer: Callable[[Database], None]) -> None:
154 """Add a method that does one-time initialization of a database.
156 Initialization can mean anything that changes state of a database
157 and needs to be done exactly once after database schema was created.
158 An example for that could be population of schema attributes.
160 Parameters
161 ----------
162 initializer : callable
163 Method of a single argument which is a `Database` instance.
164 """
165 self._initializers.append(initializer)
168class Database(ABC):
169 """An abstract interface that represents a particular database engine's
170 representation of a single schema/namespace/database.
172 Parameters
173 ----------
174 origin : `int`
175 An integer ID that should be used as the default for any datasets,
176 quanta, or other entities that use a (autoincrement, origin) compound
177 primary key.
178 connection : `sqlalchemy.engine.Connection`
179 The SQLAlchemy connection this `Database` wraps.
180 namespace : `str`, optional
181 Name of the schema or namespace this instance is associated with.
182 This is passed as the ``schema`` argument when constructing a
183 `sqlalchemy.schema.MetaData` instance. We use ``namespace`` instead to
184 avoid confusion between "schema means namespace" and "schema means
185 table definitions".
187 Notes
188 -----
189 `Database` requires all write operations to go through its special named
190 methods. Our write patterns are sufficiently simple that we don't really
191 need the full flexibility of SQL insert/update/delete syntax, and we need
192 non-standard (but common) functionality in these operations sufficiently
193 often that it seems worthwhile to provide our own generic API.
195 In contrast, `Database.query` allows arbitrary ``SELECT`` queries (via
196 their SQLAlchemy representation) to be run, as we expect these to require
197 significantly more sophistication while still being limited to standard
198 SQL.
200 `Database` itself has several underscore-prefixed attributes:
202 - ``_connection``: SQLAlchemy object representing the connection.
203 - ``_metadata``: the `sqlalchemy.schema.MetaData` object representing
204 the tables and other schema entities.
206 These are considered protected (derived classes may access them, but other
207 code should not), and read-only, aside from executing SQL via
208 ``_connection``.
209 """
211 def __init__(self, *, origin: int, connection: sqlalchemy.engine.Connection,
212 namespace: Optional[str] = None):
213 self.origin = origin
214 self.namespace = namespace
215 self._connection = connection
216 self._metadata: Optional[sqlalchemy.schema.MetaData] = None
218 def __repr__(self) -> str:
219 # Rather than try to reproduce all the parameters used to create
220 # the object, instead report the more useful information of the
221 # connection URL.
222 uri = str(self._connection.engine.url)
223 if self.namespace:
224 uri += f"#{self.namespace}"
225 return f'{type(self).__name__}("{uri}")'
227 @classmethod
228 def makeDefaultUri(cls, root: str) -> Optional[str]:
229 """Create a default connection URI appropriate for the given root
230 directory, or `None` if there can be no such default.
231 """
232 return None
234 @classmethod
235 def fromUri(cls, uri: str, *, origin: int, namespace: Optional[str] = None,
236 writeable: bool = True) -> Database:
237 """Construct a database from a SQLAlchemy URI.
239 Parameters
240 ----------
241 uri : `str`
242 A SQLAlchemy URI connection string.
243 origin : `int`
244 An integer ID that should be used as the default for any datasets,
245 quanta, or other entities that use a (autoincrement, origin)
246 compound primary key.
247 namespace : `str`, optional
248 A database namespace (i.e. schema) the new instance should be
249 associated with. If `None` (default), the namespace (if any) is
250 inferred from the URI.
251 writeable : `bool`, optional
252 If `True`, allow write operations on the database, including
253 ``CREATE TABLE``.
255 Returns
256 -------
257 db : `Database`
258 A new `Database` instance.
259 """
260 return cls.fromConnection(cls.connect(uri, writeable=writeable),
261 origin=origin,
262 namespace=namespace,
263 writeable=writeable)
265 @classmethod
266 @abstractmethod
267 def connect(cls, uri: str, *, writeable: bool = True) -> sqlalchemy.engine.Connection:
268 """Create a `sqlalchemy.engine.Connection` from a SQLAlchemy URI.
270 Parameters
271 ----------
272 uri : `str`
273 A SQLAlchemy URI connection string.
274 origin : `int`
275 An integer ID that should be used as the default for any datasets,
276 quanta, or other entities that use a (autoincrement, origin)
277 compound primary key.
278 writeable : `bool`, optional
279 If `True`, allow write operations on the database, including
280 ``CREATE TABLE``.
282 Returns
283 -------
284 connection : `sqlalchemy.engine.Connection`
285 A database connection.
287 Notes
288 -----
289 Subclasses that support other ways to connect to a database are
290 encouraged to add optional arguments to their implementation of this
291 method, as long as they maintain compatibility with the base class
292 call signature.
293 """
294 raise NotImplementedError()
296 @classmethod
297 @abstractmethod
298 def fromConnection(cls, connection: sqlalchemy.engine.Connection, *, origin: int,
299 namespace: Optional[str] = None, writeable: bool = True) -> Database:
300 """Create a new `Database` from an existing
301 `sqlalchemy.engine.Connection`.
303 Parameters
304 ----------
305 connection : `sqllachemy.engine.Connection`
306 The connection for the the database. May be shared between
307 `Database` instances.
308 origin : `int`
309 An integer ID that should be used as the default for any datasets,
310 quanta, or other entities that use a (autoincrement, origin)
311 compound primary key.
312 namespace : `str`, optional
313 A different database namespace (i.e. schema) the new instance
314 should be associated with. If `None` (default), the namespace
315 (if any) is inferred from the connection.
316 writeable : `bool`, optional
317 If `True`, allow write operations on the database, including
318 ``CREATE TABLE``.
320 Returns
321 -------
322 db : `Database`
323 A new `Database` instance.
325 Notes
326 -----
327 This method allows different `Database` instances to share the same
328 connection, which is desirable when they represent different namespaces
329 can be queried together. This also ties their transaction state,
330 however; starting a transaction in any database automatically starts
331 on in all other databases.
332 """
333 raise NotImplementedError()
335 @contextmanager
336 def transaction(self, *, interrupting: bool = False) -> Iterator:
337 """Return a context manager that represents a transaction.
339 Parameters
340 ----------
341 interrupting : `bool`
342 If `True`, this transaction block needs to be able to interrupt
343 any existing one in order to yield correct behavior.
344 """
345 assert not (interrupting and self._connection.in_transaction()), (
346 "Logic error in transaction nesting: an operation that would "
347 "interrupt the active transaction context has been requested."
348 )
349 if self._connection.in_transaction():
350 trans = self._connection.begin_nested()
351 else:
352 # Use a regular (non-savepoint) transaction only for the outermost
353 # context.
354 trans = self._connection.begin()
355 try:
356 yield
357 trans.commit()
358 except BaseException:
359 trans.rollback()
360 raise
362 @contextmanager
363 def declareStaticTables(self, *, create: bool) -> Iterator[StaticTablesContext]:
364 """Return a context manager in which the database's static DDL schema
365 can be declared.
367 Parameters
368 ----------
369 create : `bool`
370 If `True`, attempt to create all tables at the end of the context.
371 If `False`, they will be assumed to already exist.
373 Returns
374 -------
375 schema : `StaticTablesContext`
376 A helper object that is used to add new tables.
378 Raises
379 ------
380 ReadOnlyDatabaseError
381 Raised if ``create`` is `True`, `Database.isWriteable` is `False`,
382 and one or more declared tables do not already exist.
384 Examples
385 --------
386 Given a `Database` instance ``db``::
388 with db.declareStaticTables(create=True) as schema:
389 schema.addTable("table1", TableSpec(...))
390 schema.addTable("table2", TableSpec(...))
392 Notes
393 -----
394 A database's static DDL schema must be declared before any dynamic
395 tables are managed via calls to `ensureTableExists` or
396 `getExistingTable`. The order in which static schema tables are added
397 inside the context block is unimportant; they will automatically be
398 sorted and added in an order consistent with their foreign key
399 relationships.
400 """
401 if create and not self.isWriteable():
402 raise ReadOnlyDatabaseError(f"Cannot create tables in read-only database {self}.")
403 self._metadata = sqlalchemy.MetaData(schema=self.namespace)
404 try:
405 context = StaticTablesContext(self)
406 if create and context._tableNames:
407 # Looks like database is already initalized, to avoid danger
408 # of modifying/destroying valid schema we refuse to do
409 # anything in this case
410 raise SchemaAlreadyDefinedError(f"Cannot create tables in non-empty database {self}.")
411 yield context
412 for table, foreignKey in context._foreignKeys:
413 table.append_constraint(foreignKey)
414 if create:
415 if self.namespace is not None:
416 if self.namespace not in context._inspector.get_schema_names():
417 self._connection.execute(sqlalchemy.schema.CreateSchema(self.namespace))
418 # In our tables we have columns that make use of sqlalchemy
419 # Sequence objects. There is currently a bug in sqlalchemy that
420 # causes a deprecation warning to be thrown on a property of
421 # the Sequence object when the repr for the sequence is
422 # created. Here a filter is used to catch these deprecation
423 # warnings when tables are created.
424 with warnings.catch_warnings():
425 warnings.simplefilter("ignore", category=sqlalchemy.exc.SADeprecationWarning)
426 self._metadata.create_all(self._connection)
427 # call all initializer methods sequentially
428 for init in context._initializers:
429 init(self)
430 except BaseException:
431 self._metadata = None
432 raise
434 @abstractmethod
435 def isWriteable(self) -> bool:
436 """Return `True` if this database can be modified by this client.
437 """
438 raise NotImplementedError()
440 @abstractmethod
441 def __str__(self) -> str:
442 """Return a human-readable identifier for this `Database`, including
443 any namespace or schema that identifies its names within a `Registry`.
444 """
445 raise NotImplementedError()
447 def shrinkDatabaseEntityName(self, original: str) -> str:
448 """Return a version of the given name that fits within this database
449 engine's length limits for table, constraint, indexes, and sequence
450 names.
452 Implementations should not assume that simple truncation is safe,
453 because multiple long names often begin with the same prefix.
455 The default implementation simply returns the given name.
457 Parameters
458 ----------
459 original : `str`
460 The original name.
462 Returns
463 -------
464 shrunk : `str`
465 The new, possibly shortened name.
466 """
467 return original
469 def expandDatabaseEntityName(self, shrunk: str) -> str:
470 """Retrieve the original name for a database entity that was too long
471 to fit within the database engine's limits.
473 Parameters
474 ----------
475 original : `str`
476 The original name.
478 Returns
479 -------
480 shrunk : `str`
481 The new, possibly shortened name.
482 """
483 return shrunk
485 def _mangleTableName(self, name: str) -> str:
486 """Map a logical, user-visible table name to the true table name used
487 in the database.
489 The default implementation returns the given name unchanged.
491 Parameters
492 ----------
493 name : `str`
494 Input table name. Should not include a namespace (i.e. schema)
495 prefix.
497 Returns
498 -------
499 mangled : `str`
500 Mangled version of the table name (still with no namespace prefix).
502 Notes
503 -----
504 Reimplementations of this method must be idempotent - mangling an
505 already-mangled name must have no effect.
506 """
507 return name
509 def _makeColumnConstraints(self, table: str, spec: ddl.FieldSpec) -> List[sqlalchemy.CheckConstraint]:
510 """Create constraints based on this spec.
512 Parameters
513 ----------
514 table : `str`
515 Name of the table this column is being added to.
516 spec : `FieldSpec`
517 Specification for the field to be added.
519 Returns
520 -------
521 constraint : `list` of `sqlalchemy.CheckConstraint`
522 Constraint added for this column.
523 """
524 # By default we return no additional constraints
525 return []
527 def _convertFieldSpec(self, table: str, spec: ddl.FieldSpec, metadata: sqlalchemy.MetaData,
528 **kwds: Any) -> sqlalchemy.schema.Column:
529 """Convert a `FieldSpec` to a `sqlalchemy.schema.Column`.
531 Parameters
532 ----------
533 table : `str`
534 Name of the table this column is being added to.
535 spec : `FieldSpec`
536 Specification for the field to be added.
537 metadata : `sqlalchemy.MetaData`
538 SQLAlchemy representation of the DDL schema this field's table is
539 being added to.
540 **kwds
541 Additional keyword arguments to forward to the
542 `sqlalchemy.schema.Column` constructor. This is provided to make
543 it easier for derived classes to delegate to ``super()`` while
544 making only minor changes.
546 Returns
547 -------
548 column : `sqlalchemy.schema.Column`
549 SQLAlchemy representation of the field.
550 """
551 args = [spec.name, spec.getSizedColumnType()]
552 if spec.autoincrement:
553 # Generate a sequence to use for auto incrementing for databases
554 # that do not support it natively. This will be ignored by
555 # sqlalchemy for databases that do support it.
556 args.append(sqlalchemy.Sequence(self.shrinkDatabaseEntityName(f"{table}_seq_{spec.name}"),
557 metadata=metadata))
558 assert spec.doc is None or isinstance(spec.doc, str), f"Bad doc for {table}.{spec.name}."
559 return sqlalchemy.schema.Column(*args, nullable=spec.nullable, primary_key=spec.primaryKey,
560 comment=spec.doc, **kwds)
562 def _convertForeignKeySpec(self, table: str, spec: ddl.ForeignKeySpec, metadata: sqlalchemy.MetaData,
563 **kwds: Any) -> sqlalchemy.schema.ForeignKeyConstraint:
564 """Convert a `ForeignKeySpec` to a
565 `sqlalchemy.schema.ForeignKeyConstraint`.
567 Parameters
568 ----------
569 table : `str`
570 Name of the table this foreign key is being added to.
571 spec : `ForeignKeySpec`
572 Specification for the foreign key to be added.
573 metadata : `sqlalchemy.MetaData`
574 SQLAlchemy representation of the DDL schema this constraint is
575 being added to.
576 **kwds
577 Additional keyword arguments to forward to the
578 `sqlalchemy.schema.ForeignKeyConstraint` constructor. This is
579 provided to make it easier for derived classes to delegate to
580 ``super()`` while making only minor changes.
582 Returns
583 -------
584 constraint : `sqlalchemy.schema.ForeignKeyConstraint`
585 SQLAlchemy representation of the constraint.
586 """
587 name = self.shrinkDatabaseEntityName(
588 "_".join(["fkey", table, self._mangleTableName(spec.table)]
589 + list(spec.target) + list(spec.source))
590 )
591 return sqlalchemy.schema.ForeignKeyConstraint(
592 spec.source,
593 [f"{self._mangleTableName(spec.table)}.{col}" for col in spec.target],
594 name=name,
595 ondelete=spec.onDelete
596 )
598 def _convertTableSpec(self, name: str, spec: ddl.TableSpec, metadata: sqlalchemy.MetaData,
599 **kwds: Any) -> sqlalchemy.schema.Table:
600 """Convert a `TableSpec` to a `sqlalchemy.schema.Table`.
602 Parameters
603 ----------
604 spec : `TableSpec`
605 Specification for the foreign key to be added.
606 metadata : `sqlalchemy.MetaData`
607 SQLAlchemy representation of the DDL schema this table is being
608 added to.
609 **kwds
610 Additional keyword arguments to forward to the
611 `sqlalchemy.schema.Table` constructor. This is provided to make it
612 easier for derived classes to delegate to ``super()`` while making
613 only minor changes.
615 Returns
616 -------
617 table : `sqlalchemy.schema.Table`
618 SQLAlchemy representation of the table.
620 Notes
621 -----
622 This method does not handle ``spec.foreignKeys`` at all, in order to
623 avoid circular dependencies. These are added by higher-level logic in
624 `ensureTableExists`, `getExistingTable`, and `declareStaticTables`.
625 """
626 name = self._mangleTableName(name)
627 args = [self._convertFieldSpec(name, fieldSpec, metadata) for fieldSpec in spec.fields]
629 # Add any column constraints
630 for fieldSpec in spec.fields:
631 args.extend(self._makeColumnConstraints(name, fieldSpec))
633 # Track indexes added for primary key and unique constraints, to make
634 # sure we don't add duplicate explicit or foreign key indexes for
635 # those.
636 allIndexes = {tuple(fieldSpec.name for fieldSpec in spec.fields if fieldSpec.primaryKey)}
637 args.extend(
638 sqlalchemy.schema.UniqueConstraint(
639 *columns,
640 name=self.shrinkDatabaseEntityName("_".join([name, "unq"] + list(columns)))
641 )
642 for columns in spec.unique
643 )
644 allIndexes.update(spec.unique)
645 args.extend(
646 sqlalchemy.schema.Index(
647 self.shrinkDatabaseEntityName("_".join([name, "idx"] + list(columns))),
648 *columns,
649 unique=(columns in spec.unique)
650 )
651 for columns in spec.indexes if columns not in allIndexes
652 )
653 allIndexes.update(spec.indexes)
654 args.extend(
655 sqlalchemy.schema.Index(
656 self.shrinkDatabaseEntityName("_".join((name, "fkidx") + fk.source)),
657 *fk.source,
658 )
659 for fk in spec.foreignKeys if fk.addIndex and fk.source not in allIndexes
660 )
661 assert spec.doc is None or isinstance(spec.doc, str), f"Bad doc for {name}."
662 return sqlalchemy.schema.Table(name, metadata, *args, comment=spec.doc, info=spec, **kwds)
664 def ensureTableExists(self, name: str, spec: ddl.TableSpec) -> sqlalchemy.sql.FromClause:
665 """Ensure that a table with the given name and specification exists,
666 creating it if necessary.
668 Parameters
669 ----------
670 name : `str`
671 Name of the table (not including namespace qualifiers).
672 spec : `TableSpec`
673 Specification for the table. This will be used when creating the
674 table, and *may* be used when obtaining an existing table to check
675 for consistency, but no such check is guaranteed.
677 Returns
678 -------
679 table : `sqlalchemy.schema.Table`
680 SQLAlchemy representation of the table.
682 Raises
683 ------
684 ReadOnlyDatabaseError
685 Raised if `isWriteable` returns `False`, and the table does not
686 already exist.
687 DatabaseConflictError
688 Raised if the table exists but ``spec`` is inconsistent with its
689 definition.
691 Notes
692 -----
693 This method may not be called within transactions. It may be called on
694 read-only databases if and only if the table does in fact already
695 exist.
697 Subclasses may override this method, but usually should not need to.
698 """
699 assert not self._connection.in_transaction(), "Table creation interrupts transactions."
700 assert self._metadata is not None, "Static tables must be declared before dynamic tables."
701 table = self.getExistingTable(name, spec)
702 if table is not None:
703 return table
704 if not self.isWriteable():
705 raise ReadOnlyDatabaseError(
706 f"Table {name} does not exist, and cannot be created "
707 f"because database {self} is read-only."
708 )
709 table = self._convertTableSpec(name, spec, self._metadata)
710 for foreignKeySpec in spec.foreignKeys:
711 table.append_constraint(self._convertForeignKeySpec(name, foreignKeySpec, self._metadata))
712 table.create(self._connection)
713 return table
715 def getExistingTable(self, name: str, spec: ddl.TableSpec) -> Optional[sqlalchemy.schema.Table]:
716 """Obtain an existing table with the given name and specification.
718 Parameters
719 ----------
720 name : `str`
721 Name of the table (not including namespace qualifiers).
722 spec : `TableSpec`
723 Specification for the table. This will be used when creating the
724 SQLAlchemy representation of the table, and it is used to
725 check that the actual table in the database is consistent.
727 Returns
728 -------
729 table : `sqlalchemy.schema.Table` or `None`
730 SQLAlchemy representation of the table, or `None` if it does not
731 exist.
733 Raises
734 ------
735 DatabaseConflictError
736 Raised if the table exists but ``spec`` is inconsistent with its
737 definition.
739 Notes
740 -----
741 This method can be called within transactions and never modifies the
742 database.
744 Subclasses may override this method, but usually should not need to.
745 """
746 assert self._metadata is not None, "Static tables must be declared before dynamic tables."
747 name = self._mangleTableName(name)
748 table = self._metadata.tables.get(name if self.namespace is None else f"{self.namespace}.{name}")
749 if table is not None:
750 if spec.fields.names != set(table.columns.keys()):
751 raise DatabaseConflictError(f"Table '{name}' has already been defined differently; the new "
752 f"specification has columns {list(spec.fields.names)}, while "
753 f"the previous definition has {list(table.columns.keys())}.")
754 else:
755 inspector = sqlalchemy.engine.reflection.Inspector(self._connection)
756 if name in inspector.get_table_names(schema=self.namespace):
757 _checkExistingTableDefinition(name, spec, inspector.get_columns(name, schema=self.namespace))
758 table = self._convertTableSpec(name, spec, self._metadata)
759 for foreignKeySpec in spec.foreignKeys:
760 table.append_constraint(self._convertForeignKeySpec(name, foreignKeySpec, self._metadata))
761 return table
762 return table
764 def sync(self, table: sqlalchemy.schema.Table, *,
765 keys: Dict[str, Any],
766 compared: Optional[Dict[str, Any]] = None,
767 extra: Optional[Dict[str, Any]] = None,
768 returning: Optional[Sequence[str]] = None,
769 ) -> Tuple[Optional[Dict[str, Any]], bool]:
770 """Insert into a table as necessary to ensure database contains
771 values equivalent to the given ones.
773 Parameters
774 ----------
775 table : `sqlalchemy.schema.Table`
776 Table to be queried and possibly inserted into.
777 keys : `dict`
778 Column name-value pairs used to search for an existing row; must
779 be a combination that can be used to select a single row if one
780 exists. If such a row does not exist, these values are used in
781 the insert.
782 compared : `dict`, optional
783 Column name-value pairs that are compared to those in any existing
784 row. If such a row does not exist, these rows are used in the
785 insert.
786 extra : `dict`, optional
787 Column name-value pairs that are ignored if a matching row exists,
788 but used in an insert if one is necessary.
789 returning : `~collections.abc.Sequence` of `str`, optional
790 The names of columns whose values should be returned.
792 Returns
793 -------
794 row : `dict`, optional
795 The value of the fields indicated by ``returning``, or `None` if
796 ``returning`` is `None`.
797 inserted : `bool`
798 If `True`, a new row was inserted.
800 Raises
801 ------
802 DatabaseConflictError
803 Raised if the values in ``compared`` do not match the values in the
804 database.
805 ReadOnlyDatabaseError
806 Raised if `isWriteable` returns `False`, and no matching record
807 already exists.
809 Notes
810 -----
811 This method may not be called within transactions. It may be called on
812 read-only databases if and only if the matching row does in fact
813 already exist.
814 """
816 def check() -> Tuple[int, Optional[List[str]], Optional[List]]:
817 """Query for a row that matches the ``key`` argument, and compare
818 to what was given by the caller.
820 Returns
821 -------
822 n : `int`
823 Number of matching rows. ``n != 1`` is always an error, but
824 it's a different kind of error depending on where `check` is
825 being called.
826 bad : `list` of `str`, or `None`
827 The subset of the keys of ``compared`` for which the existing
828 values did not match the given one. Once again, ``not bad``
829 is always an error, but a different kind on context. `None`
830 if ``n != 1``
831 result : `list` or `None`
832 Results in the database that correspond to the columns given
833 in ``returning``, or `None` if ``returning is None``.
834 """
835 toSelect: Set[str] = set()
836 if compared is not None:
837 toSelect.update(compared.keys())
838 if returning is not None:
839 toSelect.update(returning)
840 if not toSelect:
841 # Need to select some column, even if we just want to see
842 # how many rows we get back.
843 toSelect.add(next(iter(keys.keys())))
844 selectSql = sqlalchemy.sql.select(
845 [table.columns[k].label(k) for k in toSelect]
846 ).select_from(table).where(
847 sqlalchemy.sql.and_(*[table.columns[k] == v for k, v in keys.items()])
848 )
849 fetched = list(self._connection.execute(selectSql).fetchall())
850 if len(fetched) != 1:
851 return len(fetched), None, None
852 existing = fetched[0]
853 if compared is not None:
855 def safeNotEqual(a: Any, b: Any) -> bool:
856 if isinstance(a, astropy.time.Time):
857 return not time_utils.times_equal(a, b)
858 return a != b
860 inconsistencies = [f"{k}: {existing[k]!r} != {v!r}"
861 for k, v in compared.items()
862 if safeNotEqual(existing[k], v)]
863 else:
864 inconsistencies = []
865 if returning is not None:
866 toReturn: Optional[list] = [existing[k] for k in returning]
867 else:
868 toReturn = None
869 return 1, inconsistencies, toReturn
871 if self.isWriteable():
872 # Database is writeable. Try an insert first, but allow it to fail
873 # (in only specific ways).
874 row = keys.copy()
875 if compared is not None:
876 row.update(compared)
877 if extra is not None:
878 row.update(extra)
879 insertSql = table.insert().values(row)
880 try:
881 with self.transaction(interrupting=True):
882 self._connection.execute(insertSql)
883 # Need to perform check() for this branch inside the
884 # transaction, so we roll back an insert that didn't do
885 # what we expected. That limits the extent to which we
886 # can reduce duplication between this block and the other
887 # ones that perform similar logic.
888 n, bad, result = check()
889 if n < 1:
890 raise RuntimeError("Insertion in sync did not seem to affect table. This is a bug.")
891 elif n > 1:
892 raise RuntimeError(f"Keys passed to sync {keys.keys()} do not comprise a "
893 f"unique constraint for table {table.name}.")
894 elif bad:
895 raise RuntimeError(
896 f"Conflict ({bad}) in sync after successful insert; this is "
897 f"possible if the same table is being updated by a concurrent "
898 f"process that isn't using sync, but it may also be a bug in "
899 f"daf_butler."
900 )
901 # No exceptions, so it looks like we inserted the requested row
902 # successfully.
903 inserted = True
904 except sqlalchemy.exc.IntegrityError as err:
905 # Most likely cause is that an equivalent row already exists,
906 # but it could also be some other constraint. Query for the
907 # row we think we matched to resolve that question.
908 n, bad, result = check()
909 if n < 1:
910 # There was no matched row; insertion failed for some
911 # completely different reason. Just re-raise the original
912 # IntegrityError.
913 raise
914 elif n > 2:
915 # There were multiple matched rows, which means we
916 # conflicted *and* the arguments were bad to begin with.
917 raise RuntimeError(f"Keys passed to sync {keys.keys()} do not comprise a "
918 f"unique constraint for table {table.name}.") from err
919 elif bad:
920 # No logic bug, but data conflicted on the keys given.
921 raise DatabaseConflictError(f"Conflict in sync for table "
922 f"{table.name} on column(s) {bad}.") from err
923 # The desired row is already present and consistent with what
924 # we tried to insert.
925 inserted = False
926 else:
927 assert not self._connection.in_transaction(), (
928 "Calling sync within a transaction block is an error even "
929 "on a read-only database."
930 )
931 # Database is not writeable; just see if the row exists.
932 n, bad, result = check()
933 if n < 1:
934 raise ReadOnlyDatabaseError("sync needs to insert, but database is read-only.")
935 elif n > 1:
936 raise RuntimeError("Keys passed to sync do not comprise a unique constraint.")
937 elif bad:
938 raise DatabaseConflictError(f"Conflict in sync on column(s) {bad}.")
939 inserted = False
940 if returning is None:
941 return None, inserted
942 else:
943 assert result is not None
944 return {k: v for k, v in zip(returning, result)}, inserted
946 def insert(self, table: sqlalchemy.schema.Table, *rows: dict, returnIds: bool = False,
947 ) -> Optional[List[int]]:
948 """Insert one or more rows into a table, optionally returning
949 autoincrement primary key values.
951 Parameters
952 ----------
953 table : `sqlalchemy.schema.Table`
954 Table rows should be inserted into.
955 returnIds: `bool`
956 If `True` (`False` is default), return the values of the table's
957 autoincrement primary key field (which much exist).
958 *rows
959 Positional arguments are the rows to be inserted, as dictionaries
960 mapping column name to value. The keys in all dictionaries must
961 be the same.
963 Returns
964 -------
965 ids : `None`, or `list` of `int`
966 If ``returnIds`` is `True`, a `list` containing the inserted
967 values for the table's autoincrement primary key.
969 Raises
970 ------
971 ReadOnlyDatabaseError
972 Raised if `isWriteable` returns `False` when this method is called.
974 Notes
975 -----
976 The default implementation uses bulk insert syntax when ``returnIds``
977 is `False`, and a loop over single-row insert operations when it is
978 `True`.
980 Derived classes should reimplement when they can provide a more
981 efficient implementation (especially for the latter case).
983 May be used inside transaction contexts, so implementations may not
984 perform operations that interrupt transactions.
985 """
986 if not self.isWriteable():
987 raise ReadOnlyDatabaseError(f"Attempt to insert into read-only database '{self}'.")
988 if not rows:
989 if returnIds:
990 return []
991 else:
992 return None
993 if not returnIds:
994 self._connection.execute(table.insert(), *rows)
995 return None
996 else:
997 sql = table.insert()
998 return [self._connection.execute(sql, row).inserted_primary_key[0] for row in rows]
1000 @abstractmethod
1001 def replace(self, table: sqlalchemy.schema.Table, *rows: dict) -> None:
1002 """Insert one or more rows into a table, replacing any existing rows
1003 for which insertion of a new row would violate the primary key
1004 constraint.
1006 Parameters
1007 ----------
1008 table : `sqlalchemy.schema.Table`
1009 Table rows should be inserted into.
1010 *rows
1011 Positional arguments are the rows to be inserted, as dictionaries
1012 mapping column name to value. The keys in all dictionaries must
1013 be the same.
1015 Raises
1016 ------
1017 ReadOnlyDatabaseError
1018 Raised if `isWriteable` returns `False` when this method is called.
1020 Notes
1021 -----
1022 May be used inside transaction contexts, so implementations may not
1023 perform operations that interrupt transactions.
1025 Implementations should raise a `sqlalchemy.exc.IntegrityError`
1026 exception when a constraint other than the primary key would be
1027 violated.
1029 Implementations are not required to support `replace` on tables
1030 with autoincrement keys.
1031 """
1032 raise NotImplementedError()
1034 def delete(self, table: sqlalchemy.schema.Table, columns: Iterable[str], *rows: dict) -> int:
1035 """Delete one or more rows from a table.
1037 Parameters
1038 ----------
1039 table : `sqlalchemy.schema.Table`
1040 Table that rows should be deleted from.
1041 columns: `~collections.abc.Iterable` of `str`
1042 The names of columns that will be used to constrain the rows to
1043 be deleted; these will be combined via ``AND`` to form the
1044 ``WHERE`` clause of the delete query.
1045 *rows
1046 Positional arguments are the keys of rows to be deleted, as
1047 dictionaries mapping column name to value. The keys in all
1048 dictionaries must exactly the names in ``columns``.
1050 Returns
1051 -------
1052 count : `int`
1053 Number of rows deleted.
1055 Raises
1056 ------
1057 ReadOnlyDatabaseError
1058 Raised if `isWriteable` returns `False` when this method is called.
1060 Notes
1061 -----
1062 May be used inside transaction contexts, so implementations may not
1063 perform operations that interrupt transactions.
1065 The default implementation should be sufficient for most derived
1066 classes.
1067 """
1068 if not self.isWriteable():
1069 raise ReadOnlyDatabaseError(f"Attempt to delete from read-only database '{self}'.")
1070 if columns and not rows:
1071 # If there are no columns, this operation is supposed to delete
1072 # everything (so we proceed as usual). But if there are columns,
1073 # but no rows, it was a constrained bulk operation where the
1074 # constraint is that no rows match, and we should short-circuit
1075 # while reporting that no rows were affected.
1076 return 0
1077 sql = table.delete()
1078 whereTerms = [table.columns[name] == sqlalchemy.sql.bindparam(name) for name in columns]
1079 if whereTerms:
1080 sql = sql.where(sqlalchemy.sql.and_(*whereTerms))
1081 return self._connection.execute(sql, *rows).rowcount
1083 def update(self, table: sqlalchemy.schema.Table, where: Dict[str, str], *rows: dict) -> int:
1084 """Update one or more rows in a table.
1086 Parameters
1087 ----------
1088 table : `sqlalchemy.schema.Table`
1089 Table containing the rows to be updated.
1090 where : `dict` [`str`, `str`]
1091 A mapping from the names of columns that will be used to search for
1092 existing rows to the keys that will hold these values in the
1093 ``rows`` dictionaries. Note that these may not be the same due to
1094 SQLAlchemy limitations.
1095 *rows
1096 Positional arguments are the rows to be updated. The keys in all
1097 dictionaries must be the same, and may correspond to either a
1098 value in the ``where`` dictionary or the name of a column to be
1099 updated.
1101 Returns
1102 -------
1103 count : `int`
1104 Number of rows matched (regardless of whether the update actually
1105 modified them).
1107 Raises
1108 ------
1109 ReadOnlyDatabaseError
1110 Raised if `isWriteable` returns `False` when this method is called.
1112 Notes
1113 -----
1114 May be used inside transaction contexts, so implementations may not
1115 perform operations that interrupt transactions.
1117 The default implementation should be sufficient for most derived
1118 classes.
1119 """
1120 if not self.isWriteable():
1121 raise ReadOnlyDatabaseError(f"Attempt to update read-only database '{self}'.")
1122 if not rows:
1123 return 0
1124 sql = table.update().where(
1125 sqlalchemy.sql.and_(*[table.columns[k] == sqlalchemy.sql.bindparam(v) for k, v in where.items()])
1126 )
1127 return self._connection.execute(sql, *rows).rowcount
1129 def query(self, sql: sqlalchemy.sql.FromClause,
1130 *args: Any, **kwds: Any) -> sqlalchemy.engine.ResultProxy:
1131 """Run a SELECT query against the database.
1133 Parameters
1134 ----------
1135 sql : `sqlalchemy.sql.FromClause`
1136 A SQLAlchemy representation of a ``SELECT`` query.
1137 *args
1138 Additional positional arguments are forwarded to
1139 `sqlalchemy.engine.Connection.execute`.
1140 **kwds
1141 Additional keyword arguments are forwarded to
1142 `sqlalchemy.engine.Connection.execute`.
1144 Returns
1145 -------
1146 result : `sqlalchemy.engine.ResultProxy`
1147 Query results.
1149 Notes
1150 -----
1151 The default implementation should be sufficient for most derived
1152 classes.
1153 """
1154 # TODO: should we guard against non-SELECT queries here?
1155 return self._connection.execute(sql, *args, **kwds)
1157 origin: int
1158 """An integer ID that should be used as the default for any datasets,
1159 quanta, or other entities that use a (autoincrement, origin) compound
1160 primary key (`int`).
1161 """
1163 namespace: Optional[str]
1164 """The schema or namespace this database instance is associated with
1165 (`str` or `None`).
1166 """