Coverage for python/lsst/daf/butler/registry/databases/sqlite.py : 17%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ["SqliteDatabase"]
25from contextlib import closing
26import copy
27from typing import Any, ContextManager, Dict, Iterable, List, Optional
28from dataclasses import dataclass
29import os
30import urllib.parse
32import sqlite3
33import sqlalchemy
34import sqlalchemy.ext.compiler
36from ..interfaces import Database, ReadOnlyDatabaseError, StaticTablesContext
37from ...core import ddl
40def _onSqlite3Connect(dbapiConnection: sqlite3.Connection,
41 connectionRecord: sqlalchemy.pool._ConnectionRecord) -> None:
42 assert isinstance(dbapiConnection, sqlite3.Connection)
43 # Prevent pysqlite from emitting BEGIN and COMMIT statements.
44 dbapiConnection.isolation_level = None
45 # Enable foreign keys
46 with closing(dbapiConnection.cursor()) as cursor:
47 cursor.execute("PRAGMA foreign_keys=ON;")
48 cursor.execute("PRAGMA busy_timeout = 300000;") # in ms, so 5min (way longer than should be needed)
51def _onSqlite3Begin(connection: sqlalchemy.engine.Connection) -> sqlalchemy.engine.Connection:
52 assert connection.dialect.name == "sqlite"
53 # Replace pysqlite's buggy transaction handling that never BEGINs with our
54 # own that does, and tell SQLite to try to acquire a lock as soon as we
55 # start a transaction (this should lead to more blocking and fewer
56 # deadlocks).
57 connection.execute("BEGIN IMMEDIATE")
58 return connection
61class _Replace(sqlalchemy.sql.Insert):
62 """A SQLAlchemy query that compiles to INSERT ... ON CONFLICT REPLACE
63 on the primary key constraint for the table.
64 """
65 pass
68# Hard to infer what types these should be from SQLAlchemy docs; just disable
69# static typing by calling everything "Any".
70@sqlalchemy.ext.compiler.compiles(_Replace, "sqlite")
71def _replace(insert: Any, compiler: Any, **kwargs: Any) -> Any:
72 """Generate an INSERT ... ON CONFLICT REPLACE query.
73 """
74 # SQLite and PostgreSQL use similar syntax for their ON CONFLICT extension,
75 # but SQLAlchemy only knows about PostgreSQL's, so we have to compile some
76 # custom text SQL ourselves.
77 result = compiler.visit_insert(insert, **kwargs)
78 preparer = compiler.preparer
79 pk_columns = ", ".join([preparer.format_column(col) for col in insert.table.primary_key])
80 result += f" ON CONFLICT ({pk_columns})"
81 columns = [preparer.format_column(col) for col in insert.table.columns
82 if col.name not in insert.table.primary_key]
83 updates = ", ".join([f"{col} = excluded.{col}" for col in columns])
84 result += f" DO UPDATE SET {updates}"
85 return result
88_AUTOINCR_TABLE_SPEC = ddl.TableSpec(
89 fields=[ddl.FieldSpec(name="id", dtype=sqlalchemy.Integer, primaryKey=True)]
90)
93@dataclass
94class _AutoincrementCompoundKeyWorkaround:
95 """A workaround for SQLite's lack of support for compound primary keys that
96 include an autoincrement field.
97 """
99 table: sqlalchemy.schema.Table
100 """A single-column internal table that can be inserted into to yield
101 autoincrement values (`sqlalchemy.schema.Table`).
102 """
104 column: str
105 """The name of the column in the original table that needs to be populated
106 with values from the internal table (`str`).
107 """
110class SqliteDatabase(Database):
111 """An implementation of the `Database` interface for SQLite3.
113 Parameters
114 ----------
115 connection : `sqlalchemy.engine.Connection`
116 An existing connection created by a previous call to `connect`.
117 origin : `int`
118 An integer ID that should be used as the default for any datasets,
119 quanta, or other entities that use a (autoincrement, origin) compound
120 primary key.
121 namespace : `str`, optional
122 The namespace (schema) this database is associated with. If `None`,
123 the default schema for the connection is used (which may be `None`).
124 writeable : `bool`, optional
125 If `True`, allow write operations on the database, including
126 ``CREATE TABLE``.
128 Notes
129 -----
130 The case where ``namespace is not None`` is not yet tested, and may be
131 broken; we need an API for attaching to different databases in order to
132 write those tests, but haven't yet worked out what is common/different
133 across databases well enough to define it.
134 """
136 def __init__(self, *, connection: sqlalchemy.engine.Connection, origin: int,
137 namespace: Optional[str] = None, writeable: bool = True):
138 super().__init__(origin=origin, connection=connection, namespace=namespace)
139 # Get the filename from a call to 'PRAGMA database_list'.
140 with closing(connection.connection.cursor()) as cursor:
141 dbList = list(cursor.execute("PRAGMA database_list").fetchall())
142 if len(dbList) == 0:
143 raise RuntimeError("No database in connection.")
144 if namespace is None:
145 namespace = "main"
146 for _, dbname, filename in dbList:
147 if dbname == namespace:
148 break
149 else:
150 raise RuntimeError(f"No '{namespace}' database in connection.")
151 if not filename:
152 self.filename = None
153 else:
154 self.filename = filename
155 self._writeable = writeable
156 self._autoincr: Dict[str, _AutoincrementCompoundKeyWorkaround] = {}
158 @classmethod
159 def makeDefaultUri(cls, root: str) -> Optional[str]:
160 return "sqlite:///" + os.path.join(root, "gen3.sqlite3")
162 @classmethod
163 def connect(cls, uri: Optional[str] = None, *, filename: Optional[str] = None,
164 writeable: bool = True) -> sqlalchemy.engine.Connection:
165 """Create a `sqlalchemy.engine.Connection` from a SQLAlchemy URI or
166 filename.
168 Parameters
169 ----------
170 uri : `str`
171 A SQLAlchemy URI connection string.
172 filename : `str`
173 Name of the SQLite database file, or `None` to use an in-memory
174 database. Ignored if ``uri is not None``.
175 origin : `int`
176 An integer ID that should be used as the default for any datasets,
177 quanta, or other entities that use a (autoincrement, origin)
178 compound primary key.
179 writeable : `bool`, optional
180 If `True`, allow write operations on the database, including
181 ``CREATE TABLE``.
183 Returns
184 -------
185 cs : `sqlalchemy.engine.Connection`
186 A database connection and transaction state.
187 """
188 # In order to be able to tell SQLite that we want a read-only or
189 # read-write connection, we need to make the SQLite DBAPI connection
190 # with a "URI"-based connection string. SQLAlchemy claims it can do
191 # this
192 # (https://docs.sqlalchemy.org/en/13/dialects/sqlite.html#uri-connections),
193 # but it doesn't seem to work as advertised. To work around this, we
194 # use the 'creator' argument to sqlalchemy.engine.create_engine, which
195 # lets us pass a callable that creates the DBAPI connection.
196 if uri is None:
197 if filename is None:
198 target = ":memory:"
199 uri = "sqlite://"
200 else:
201 target = f"file:{filename}"
202 uri = f"sqlite:///{filename}"
203 else:
204 parsed = urllib.parse.urlparse(uri)
205 queries = parsed.query.split("&")
206 if "uri=true" in queries:
207 # This is a SQLAlchemy URI that is already trying to make a
208 # SQLite connection via a SQLite URI, and hence there may
209 # be URI components for both SQLite and SQLAlchemy. We
210 # don't need to support that, and it'd be a
211 # reimplementation of all of the (broken) logic in
212 # SQLAlchemy for doing this, so we just don't.
213 raise NotImplementedError("SQLite connection strings with 'uri=true' are not supported.")
214 # This is just a SQLAlchemy URI with a non-URI SQLite
215 # connection string inside it. Pull that out so we can use it
216 # in the creator call.
217 if parsed.path.startswith("/"):
218 filename = parsed.path[1:]
219 target = f"file:{filename}"
220 else:
221 filename = None
222 target = ":memory:"
223 if filename is None:
224 if not writeable:
225 raise NotImplementedError("Read-only :memory: databases are not supported.")
226 else:
227 if writeable:
228 target += '?mode=rwc&uri=true'
229 else:
230 target += '?mode=ro&uri=true'
232 def creator() -> sqlite3.Connection:
233 return sqlite3.connect(target, check_same_thread=False, uri=True)
235 engine = sqlalchemy.engine.create_engine(uri, poolclass=sqlalchemy.pool.NullPool,
236 creator=creator)
238 sqlalchemy.event.listen(engine, "connect", _onSqlite3Connect)
239 sqlalchemy.event.listen(engine, "begin", _onSqlite3Begin)
240 try:
241 return engine.connect()
242 except sqlalchemy.exc.OperationalError as err:
243 raise RuntimeError(f"Error creating connection with uri='{uri}', filename='{filename}', "
244 f"target={target}.") from err
246 @classmethod
247 def fromConnection(cls, connection: sqlalchemy.engine.Connection, *, origin: int,
248 namespace: Optional[str] = None, writeable: bool = True) -> Database:
249 return cls(connection=connection, origin=origin, writeable=writeable, namespace=namespace)
251 def isWriteable(self) -> bool:
252 return self._writeable
254 def __str__(self) -> str:
255 if self.filename:
256 return f"SQLite3@{self.filename}"
257 else:
258 return "SQLite3@:memory:"
260 def _lockTables(self, tables: Iterable[sqlalchemy.schema.Table] = ()) -> None:
261 # Docstring inherited.
262 # Our SQLite database always acquires full-database locks at the
263 # beginning of a transaction, so there's no need to acquire table-level
264 # locks - which is good, because SQLite doesn't have table-level
265 # locking.
266 pass
268 # MyPy claims that the return type here isn't covariant with the return
269 # type of the base class method, which is formally correct but irrelevant
270 # - the base class return type is _GeneratorContextManager, but only
271 # because it's generated by the contextmanager decorator.
272 def declareStaticTables(self, *, create: bool) -> ContextManager[StaticTablesContext]: # type: ignore
273 # If the user asked for an in-memory, writeable database, then we may
274 # need to re-create schema even if create=False because schema can be
275 # lost on re-connect. This is only really relevant for tests, and it's
276 # convenient there.
277 if self.filename is None and self.isWriteable():
278 inspector = sqlalchemy.engine.reflection.Inspector(self._connection)
279 tables = inspector.get_table_names(schema=self.namespace)
280 if not tables:
281 create = True
282 return super().declareStaticTables(create=create)
284 def _convertFieldSpec(self, table: str, spec: ddl.FieldSpec, metadata: sqlalchemy.MetaData,
285 **kwargs: Any) -> sqlalchemy.schema.Column:
286 if spec.autoincrement:
287 if not spec.primaryKey:
288 raise RuntimeError(f"Autoincrement field {table}.{spec.name} that is not a "
289 f"primary key is not supported.")
290 if spec.dtype != sqlalchemy.Integer:
291 # SQLite's autoincrement is really limited; it only works if
292 # the column type is exactly "INTEGER". But it also doesn't
293 # care about the distinctions between different integer types,
294 # so it's safe to change it.
295 spec = copy.copy(spec)
296 spec.dtype = sqlalchemy.Integer
297 return super()._convertFieldSpec(table, spec, metadata, **kwargs)
299 def _makeColumnConstraints(self, table: str, spec: ddl.FieldSpec) -> List[sqlalchemy.CheckConstraint]:
300 # For sqlite we force constraints on all string columns since sqlite
301 # ignores everything otherwise and this leads to problems with
302 # other databases.
304 constraints = []
305 if spec.isStringType():
306 name = self.shrinkDatabaseEntityName("_".join([table, "len", spec.name]))
307 constraints.append(sqlalchemy.CheckConstraint(f"length({spec.name})<={spec.length}"
308 # Oracle converts
309 # empty strings to
310 # NULL so check
311 f" AND length({spec.name})>=1",
312 name=name))
314 constraints.extend(super()._makeColumnConstraints(table, spec))
315 return constraints
317 def _convertTableSpec(self, name: str, spec: ddl.TableSpec, metadata: sqlalchemy.MetaData,
318 **kwargs: Any) -> sqlalchemy.schema.Table:
319 primaryKeyFieldNames = set(field.name for field in spec.fields if field.primaryKey)
320 autoincrFieldNames = set(field.name for field in spec.fields if field.autoincrement)
321 if len(autoincrFieldNames) > 1:
322 raise RuntimeError("At most one autoincrement field per table is allowed.")
323 if len(primaryKeyFieldNames) > 1 and len(autoincrFieldNames) > 0:
324 # SQLite's default rowid-based autoincrement doesn't work if the
325 # field is just one field in a compound primary key. As a
326 # workaround, we create an extra table with just one column that
327 # we'll insert into to generate those IDs. That's only safe if
328 # that single-column table's records are already unique with just
329 # the autoincrement field, not the rest of the primary key. In
330 # practice, that means the single-column table's records are those
331 # for which origin == self.origin.
332 autoincrFieldName, = autoincrFieldNames
333 otherPrimaryKeyFieldNames = primaryKeyFieldNames - autoincrFieldNames
334 if otherPrimaryKeyFieldNames != {"origin"}:
335 # We need the only other field in the key to be 'origin'.
336 raise NotImplementedError(
337 "Compound primary keys with an autoincrement are only supported in SQLite "
338 "if the only non-autoincrement primary key field is 'origin'."
339 )
340 self._autoincr[name] = _AutoincrementCompoundKeyWorkaround(
341 table=self._convertTableSpec(f"_autoinc_{name}", _AUTOINCR_TABLE_SPEC, metadata, **kwargs),
342 column=autoincrFieldName
343 )
344 if not spec.recycleIds:
345 kwargs = dict(kwargs, sqlite_autoincrement=True)
346 return super()._convertTableSpec(name, spec, metadata, **kwargs)
348 def insert(self, table: sqlalchemy.schema.Table, *rows: dict, returnIds: bool = False,
349 select: Optional[sqlalchemy.sql.Select] = None,
350 names: Optional[Iterable[str]] = None,
351 ) -> Optional[List[int]]:
352 autoincr = self._autoincr.get(table.name)
353 if autoincr is not None:
354 if select is not None:
355 raise NotImplementedError(
356 "Cannot do INSERT INTO ... SELECT on a SQLite table with a simulated autoincrement "
357 "compound primary key"
358 )
359 # This table has a compound primary key that includes an
360 # autoincrement. That doesn't work natively in SQLite, so we
361 # insert into a single-column table and use those IDs.
362 if not rows:
363 return [] if returnIds else None
364 if autoincr.column in rows[0]:
365 # Caller passed the autoincrement key values explicitly in the
366 # first row. They had better have done the same for all rows,
367 # or SQLAlchemy would have a problem, even if we didn't.
368 assert all(autoincr.column in row for row in rows)
369 # We need to insert only the values that correspond to
370 # ``origin == self.origin`` into the single-column table, to
371 # make sure we don't generate conflicting keys there later.
372 rowsForAutoincrTable = [dict(id=row[autoincr.column])
373 for row in rows if row["origin"] == self.origin]
374 # Insert into the autoincr table and the target table inside
375 # a transaction. The main-table insertion can take care of
376 # returnIds for us.
377 with self.transaction():
378 self._connection.execute(autoincr.table.insert(), *rowsForAutoincrTable)
379 return super().insert(table, *rows, returnIds=returnIds)
380 else:
381 # Caller did not pass autoincrement key values on the first
382 # row. Make sure they didn't ever do that, and also make
383 # sure the origin that was passed in is always self.origin,
384 # because we can't safely generate autoincrement values
385 # otherwise.
386 assert all(autoincr.column not in row and row["origin"] == self.origin for row in rows)
387 # Insert into the autoincr table one by one to get the
388 # primary key values back, then insert into the target table
389 # in the same transaction.
390 with self.transaction():
391 newRows = []
392 ids = []
393 for row in rows:
394 newRow = row.copy()
395 id = self._connection.execute(autoincr.table.insert()).inserted_primary_key[0]
396 newRow[autoincr.column] = id
397 newRows.append(newRow)
398 ids.append(id)
399 # Don't ever ask to returnIds here, because we've already
400 # got them.
401 super().insert(table, *newRows)
402 if returnIds:
403 return ids
404 else:
405 return None
406 else:
407 return super().insert(table, *rows, select=select, names=names, returnIds=returnIds)
409 def replace(self, table: sqlalchemy.schema.Table, *rows: dict) -> None:
410 if not (self.isWriteable() or table.key in self._tempTables):
411 raise ReadOnlyDatabaseError(f"Attempt to replace into read-only database '{self}'.")
412 if not rows:
413 return
414 if table.name in self._autoincr:
415 raise NotImplementedError(
416 "replace does not support compound primary keys with autoincrement fields."
417 )
418 self._connection.execute(_Replace(table), *rows)
420 filename: Optional[str]
421 """Name of the file this database is connected to (`str` or `None`).
423 Set to `None` for in-memory databases.
424 """