Coverage for python/lsst/daf/butler/registry/databases/sqlite.py : 18%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ["SqliteDatabase"]
25from contextlib import closing
26import copy
27from typing import Any, ContextManager, Dict, Iterable, List, Optional
28from dataclasses import dataclass
29import os
30import urllib.parse
32import sqlite3
33import sqlalchemy
34import sqlalchemy.ext.compiler
36from ..interfaces import Database, StaticTablesContext
37from ...core import ddl
40def _onSqlite3Connect(dbapiConnection: sqlite3.Connection,
41 connectionRecord: sqlalchemy.pool._ConnectionRecord) -> None:
42 assert isinstance(dbapiConnection, sqlite3.Connection)
43 # Prevent pysqlite from emitting BEGIN and COMMIT statements.
44 dbapiConnection.isolation_level = None
45 # Enable foreign keys
46 with closing(dbapiConnection.cursor()) as cursor:
47 cursor.execute("PRAGMA foreign_keys=ON;")
48 cursor.execute("PRAGMA busy_timeout = 300000;") # in ms, so 5min (way longer than should be needed)
51def _onSqlite3Begin(connection: sqlalchemy.engine.Connection) -> sqlalchemy.engine.Connection:
52 assert connection.dialect.name == "sqlite"
53 # Replace pysqlite's buggy transaction handling that never BEGINs with our
54 # own that does, and tell SQLite to try to acquire a lock as soon as we
55 # start a transaction (this should lead to more blocking and fewer
56 # deadlocks).
57 connection.execute("BEGIN IMMEDIATE")
58 return connection
61class _Replace(sqlalchemy.sql.Insert):
62 """A SQLAlchemy query that compiles to INSERT ... ON CONFLICT REPLACE
63 on the primary key constraint for the table.
64 """
65 pass
68# SQLite and PostgreSQL use similar syntax for their ON CONFLICT extension,
69# but SQLAlchemy only knows about PostgreSQL's, so we have to compile some
70# custom text SQL ourselves.
72# Hard to infer what types these should be from SQLAlchemy docs; just disable
73# static typing by calling everything "Any".
74@sqlalchemy.ext.compiler.compiles(_Replace, "sqlite")
75def _replace(insert: Any, compiler: Any, **kwargs: Any) -> Any:
76 """Generate an INSERT ... ON CONFLICT REPLACE query.
77 """
78 result = compiler.visit_insert(insert, **kwargs)
79 preparer = compiler.preparer
80 pk_columns = ", ".join([preparer.format_column(col) for col in insert.table.primary_key])
81 result += f" ON CONFLICT ({pk_columns})"
82 columns = [preparer.format_column(col) for col in insert.table.columns
83 if col.name not in insert.table.primary_key]
84 updates = ", ".join([f"{col} = excluded.{col}" for col in columns])
85 result += f" DO UPDATE SET {updates}"
86 return result
89class _Ensure(sqlalchemy.sql.Insert):
90 """A SQLAlchemy query that compiles to INSERT ... ON CONFLICT DO NOTHING.
91 """
92 pass
95@sqlalchemy.ext.compiler.compiles(_Ensure, "sqlite")
96def _ensure(insert: Any, compiler: Any, **kwargs: Any) -> Any:
97 """Generate an INSERT ... ON CONFLICT DO NOTHING query.
98 """
99 result = compiler.visit_insert(insert, **kwargs)
100 result += " ON CONFLICT DO NOTHING"
101 return result
104_AUTOINCR_TABLE_SPEC = ddl.TableSpec(
105 fields=[ddl.FieldSpec(name="id", dtype=sqlalchemy.Integer, primaryKey=True)]
106)
109@dataclass
110class _AutoincrementCompoundKeyWorkaround:
111 """A workaround for SQLite's lack of support for compound primary keys that
112 include an autoincrement field.
113 """
115 table: sqlalchemy.schema.Table
116 """A single-column internal table that can be inserted into to yield
117 autoincrement values (`sqlalchemy.schema.Table`).
118 """
120 column: str
121 """The name of the column in the original table that needs to be populated
122 with values from the internal table (`str`).
123 """
126class SqliteDatabase(Database):
127 """An implementation of the `Database` interface for SQLite3.
129 Parameters
130 ----------
131 connection : `sqlalchemy.engine.Connection`
132 An existing connection created by a previous call to `connect`.
133 origin : `int`
134 An integer ID that should be used as the default for any datasets,
135 quanta, or other entities that use a (autoincrement, origin) compound
136 primary key.
137 namespace : `str`, optional
138 The namespace (schema) this database is associated with. If `None`,
139 the default schema for the connection is used (which may be `None`).
140 writeable : `bool`, optional
141 If `True`, allow write operations on the database, including
142 ``CREATE TABLE``.
144 Notes
145 -----
146 The case where ``namespace is not None`` is not yet tested, and may be
147 broken; we need an API for attaching to different databases in order to
148 write those tests, but haven't yet worked out what is common/different
149 across databases well enough to define it.
150 """
152 def __init__(self, *, connection: sqlalchemy.engine.Connection, origin: int,
153 namespace: Optional[str] = None, writeable: bool = True):
154 super().__init__(origin=origin, connection=connection, namespace=namespace)
155 # Get the filename from a call to 'PRAGMA database_list'.
156 with closing(connection.connection.cursor()) as cursor:
157 dbList = list(cursor.execute("PRAGMA database_list").fetchall())
158 if len(dbList) == 0:
159 raise RuntimeError("No database in connection.")
160 if namespace is None:
161 namespace = "main"
162 for _, dbname, filename in dbList:
163 if dbname == namespace:
164 break
165 else:
166 raise RuntimeError(f"No '{namespace}' database in connection.")
167 if not filename:
168 self.filename = None
169 else:
170 self.filename = filename
171 self._writeable = writeable
172 self._autoincr: Dict[str, _AutoincrementCompoundKeyWorkaround] = {}
174 @classmethod
175 def makeDefaultUri(cls, root: str) -> Optional[str]:
176 return "sqlite:///" + os.path.join(root, "gen3.sqlite3")
178 @classmethod
179 def connect(cls, uri: Optional[str] = None, *, filename: Optional[str] = None,
180 writeable: bool = True) -> sqlalchemy.engine.Connection:
181 """Create a `sqlalchemy.engine.Connection` from a SQLAlchemy URI or
182 filename.
184 Parameters
185 ----------
186 uri : `str`
187 A SQLAlchemy URI connection string.
188 filename : `str`
189 Name of the SQLite database file, or `None` to use an in-memory
190 database. Ignored if ``uri is not None``.
191 origin : `int`
192 An integer ID that should be used as the default for any datasets,
193 quanta, or other entities that use a (autoincrement, origin)
194 compound primary key.
195 writeable : `bool`, optional
196 If `True`, allow write operations on the database, including
197 ``CREATE TABLE``.
199 Returns
200 -------
201 cs : `sqlalchemy.engine.Connection`
202 A database connection and transaction state.
203 """
204 # In order to be able to tell SQLite that we want a read-only or
205 # read-write connection, we need to make the SQLite DBAPI connection
206 # with a "URI"-based connection string. SQLAlchemy claims it can do
207 # this
208 # (https://docs.sqlalchemy.org/en/13/dialects/sqlite.html#uri-connections),
209 # but it doesn't seem to work as advertised. To work around this, we
210 # use the 'creator' argument to sqlalchemy.engine.create_engine, which
211 # lets us pass a callable that creates the DBAPI connection.
212 if uri is None:
213 if filename is None:
214 target = ":memory:"
215 uri = "sqlite://"
216 else:
217 target = f"file:{filename}"
218 uri = f"sqlite:///{filename}"
219 else:
220 parsed = urllib.parse.urlparse(uri)
221 queries = parsed.query.split("&")
222 if "uri=true" in queries:
223 # This is a SQLAlchemy URI that is already trying to make a
224 # SQLite connection via a SQLite URI, and hence there may
225 # be URI components for both SQLite and SQLAlchemy. We
226 # don't need to support that, and it'd be a
227 # reimplementation of all of the (broken) logic in
228 # SQLAlchemy for doing this, so we just don't.
229 raise NotImplementedError("SQLite connection strings with 'uri=true' are not supported.")
230 # This is just a SQLAlchemy URI with a non-URI SQLite
231 # connection string inside it. Pull that out so we can use it
232 # in the creator call.
233 if parsed.path.startswith("/"):
234 filename = parsed.path[1:]
235 target = f"file:{filename}"
236 else:
237 filename = None
238 target = ":memory:"
239 if filename is None:
240 if not writeable:
241 raise NotImplementedError("Read-only :memory: databases are not supported.")
242 else:
243 if writeable:
244 target += '?mode=rwc&uri=true'
245 else:
246 target += '?mode=ro&uri=true'
248 def creator() -> sqlite3.Connection:
249 return sqlite3.connect(target, check_same_thread=False, uri=True)
251 engine = sqlalchemy.engine.create_engine(uri, poolclass=sqlalchemy.pool.NullPool,
252 creator=creator)
254 sqlalchemy.event.listen(engine, "connect", _onSqlite3Connect)
255 sqlalchemy.event.listen(engine, "begin", _onSqlite3Begin)
256 try:
257 return engine.connect()
258 except sqlalchemy.exc.OperationalError as err:
259 raise RuntimeError(f"Error creating connection with uri='{uri}', filename='{filename}', "
260 f"target={target}.") from err
262 @classmethod
263 def fromConnection(cls, connection: sqlalchemy.engine.Connection, *, origin: int,
264 namespace: Optional[str] = None, writeable: bool = True) -> Database:
265 return cls(connection=connection, origin=origin, writeable=writeable, namespace=namespace)
267 def isWriteable(self) -> bool:
268 return self._writeable
270 def __str__(self) -> str:
271 if self.filename:
272 return f"SQLite3@{self.filename}"
273 else:
274 return "SQLite3@:memory:"
276 def _lockTables(self, tables: Iterable[sqlalchemy.schema.Table] = ()) -> None:
277 # Docstring inherited.
278 # Our SQLite database always acquires full-database locks at the
279 # beginning of a transaction, so there's no need to acquire table-level
280 # locks - which is good, because SQLite doesn't have table-level
281 # locking.
282 pass
284 # MyPy claims that the return type here isn't covariant with the return
285 # type of the base class method, which is formally correct but irrelevant
286 # - the base class return type is _GeneratorContextManager, but only
287 # because it's generated by the contextmanager decorator.
288 def declareStaticTables(self, *, create: bool) -> ContextManager[StaticTablesContext]: # type: ignore
289 # If the user asked for an in-memory, writeable database, then we may
290 # need to re-create schema even if create=False because schema can be
291 # lost on re-connect. This is only really relevant for tests, and it's
292 # convenient there.
293 if self.filename is None and self.isWriteable():
294 inspector = sqlalchemy.engine.reflection.Inspector(self._connection)
295 tables = inspector.get_table_names(schema=self.namespace)
296 if not tables:
297 create = True
298 return super().declareStaticTables(create=create)
300 def _convertFieldSpec(self, table: str, spec: ddl.FieldSpec, metadata: sqlalchemy.MetaData,
301 **kwargs: Any) -> sqlalchemy.schema.Column:
302 if spec.autoincrement:
303 if not spec.primaryKey:
304 raise RuntimeError(f"Autoincrement field {table}.{spec.name} that is not a "
305 f"primary key is not supported.")
306 if spec.dtype != sqlalchemy.Integer:
307 # SQLite's autoincrement is really limited; it only works if
308 # the column type is exactly "INTEGER". But it also doesn't
309 # care about the distinctions between different integer types,
310 # so it's safe to change it.
311 spec = copy.copy(spec)
312 spec.dtype = sqlalchemy.Integer
313 return super()._convertFieldSpec(table, spec, metadata, **kwargs)
315 def _makeColumnConstraints(self, table: str, spec: ddl.FieldSpec) -> List[sqlalchemy.CheckConstraint]:
316 # For sqlite we force constraints on all string columns since sqlite
317 # ignores everything otherwise and this leads to problems with
318 # other databases.
320 constraints = []
321 if spec.isStringType():
322 name = self.shrinkDatabaseEntityName("_".join([table, "len", spec.name]))
323 constraints.append(sqlalchemy.CheckConstraint(f"length({spec.name})<={spec.length}"
324 # Oracle converts
325 # empty strings to
326 # NULL so check
327 f" AND length({spec.name})>=1",
328 name=name))
330 constraints.extend(super()._makeColumnConstraints(table, spec))
331 return constraints
333 def _convertTableSpec(self, name: str, spec: ddl.TableSpec, metadata: sqlalchemy.MetaData,
334 **kwargs: Any) -> sqlalchemy.schema.Table:
335 primaryKeyFieldNames = set(field.name for field in spec.fields if field.primaryKey)
336 autoincrFieldNames = set(field.name for field in spec.fields if field.autoincrement)
337 if len(autoincrFieldNames) > 1:
338 raise RuntimeError("At most one autoincrement field per table is allowed.")
339 if len(primaryKeyFieldNames) > 1 and len(autoincrFieldNames) > 0:
340 # SQLite's default rowid-based autoincrement doesn't work if the
341 # field is just one field in a compound primary key. As a
342 # workaround, we create an extra table with just one column that
343 # we'll insert into to generate those IDs. That's only safe if
344 # that single-column table's records are already unique with just
345 # the autoincrement field, not the rest of the primary key. In
346 # practice, that means the single-column table's records are those
347 # for which origin == self.origin.
348 autoincrFieldName, = autoincrFieldNames
349 otherPrimaryKeyFieldNames = primaryKeyFieldNames - autoincrFieldNames
350 if otherPrimaryKeyFieldNames != {"origin"}:
351 # We need the only other field in the key to be 'origin'.
352 raise NotImplementedError(
353 "Compound primary keys with an autoincrement are only supported in SQLite "
354 "if the only non-autoincrement primary key field is 'origin'."
355 )
356 self._autoincr[name] = _AutoincrementCompoundKeyWorkaround(
357 table=self._convertTableSpec(f"_autoinc_{name}", _AUTOINCR_TABLE_SPEC, metadata, **kwargs),
358 column=autoincrFieldName
359 )
360 if not spec.recycleIds:
361 kwargs = dict(kwargs, sqlite_autoincrement=True)
362 return super()._convertTableSpec(name, spec, metadata, **kwargs)
364 def insert(self, table: sqlalchemy.schema.Table, *rows: dict, returnIds: bool = False,
365 select: Optional[sqlalchemy.sql.Select] = None,
366 names: Optional[Iterable[str]] = None,
367 ) -> Optional[List[int]]:
368 self.assertTableWriteable(table, f"Cannot insert into read-only table {table}.")
369 autoincr = self._autoincr.get(table.name)
370 if autoincr is not None:
371 if select is not None:
372 raise NotImplementedError(
373 "Cannot do INSERT INTO ... SELECT on a SQLite table with a simulated autoincrement "
374 "compound primary key"
375 )
376 # This table has a compound primary key that includes an
377 # autoincrement. That doesn't work natively in SQLite, so we
378 # insert into a single-column table and use those IDs.
379 if not rows:
380 return [] if returnIds else None
381 if autoincr.column in rows[0]:
382 # Caller passed the autoincrement key values explicitly in the
383 # first row. They had better have done the same for all rows,
384 # or SQLAlchemy would have a problem, even if we didn't.
385 assert all(autoincr.column in row for row in rows)
386 # We need to insert only the values that correspond to
387 # ``origin == self.origin`` into the single-column table, to
388 # make sure we don't generate conflicting keys there later.
389 rowsForAutoincrTable = [dict(id=row[autoincr.column])
390 for row in rows if row["origin"] == self.origin]
391 # Insert into the autoincr table and the target table inside
392 # a transaction. The main-table insertion can take care of
393 # returnIds for us.
394 with self.transaction():
395 self._connection.execute(autoincr.table.insert(), *rowsForAutoincrTable)
396 return super().insert(table, *rows, returnIds=returnIds)
397 else:
398 # Caller did not pass autoincrement key values on the first
399 # row. Make sure they didn't ever do that, and also make
400 # sure the origin that was passed in is always self.origin,
401 # because we can't safely generate autoincrement values
402 # otherwise.
403 assert all(autoincr.column not in row and row["origin"] == self.origin for row in rows)
404 # Insert into the autoincr table one by one to get the
405 # primary key values back, then insert into the target table
406 # in the same transaction.
407 with self.transaction():
408 newRows = []
409 ids = []
410 for row in rows:
411 newRow = row.copy()
412 id = self._connection.execute(autoincr.table.insert()).inserted_primary_key[0]
413 newRow[autoincr.column] = id
414 newRows.append(newRow)
415 ids.append(id)
416 # Don't ever ask to returnIds here, because we've already
417 # got them.
418 super().insert(table, *newRows)
419 if returnIds:
420 return ids
421 else:
422 return None
423 else:
424 return super().insert(table, *rows, select=select, names=names, returnIds=returnIds)
426 def replace(self, table: sqlalchemy.schema.Table, *rows: dict) -> None:
427 self.assertTableWriteable(table, f"Cannot replace into read-only table {table}.")
428 if not rows:
429 return
430 if table.name in self._autoincr:
431 raise NotImplementedError(
432 "replace does not support compound primary keys with autoincrement fields."
433 )
434 self._connection.execute(_Replace(table), *rows)
436 def ensure(self, table: sqlalchemy.schema.Table, *rows: dict) -> int:
437 self.assertTableWriteable(table, f"Cannot ensure into read-only table {table}.")
438 if not rows:
439 return 0
440 if table.name in self._autoincr:
441 raise NotImplementedError(
442 "ensure does not support compound primary keys with autoincrement fields."
443 )
444 return self._connection.execute(_Ensure(table), *rows).rowcount
446 filename: Optional[str]
447 """Name of the file this database is connected to (`str` or `None`).
449 Set to `None` for in-memory databases.
450 """