Coverage for python/lsst/daf/butler/registry/databases/sqlite.py: 20%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ["SqliteDatabase"]
25import copy
26import os
27import sqlite3
28import urllib.parse
29from contextlib import closing
30from dataclasses import dataclass
31from typing import Any, ContextManager, Dict, Iterable, List, Optional
33import sqlalchemy
34import sqlalchemy.ext.compiler
36from ...core import ddl
37from ..interfaces import Database, StaticTablesContext
40def _onSqlite3Connect(
41 dbapiConnection: sqlite3.Connection, connectionRecord: sqlalchemy.pool._ConnectionRecord
42) -> None:
43 assert isinstance(dbapiConnection, sqlite3.Connection)
44 # Prevent pysqlite from emitting BEGIN and COMMIT statements.
45 dbapiConnection.isolation_level = None
46 # Enable foreign keys
47 with closing(dbapiConnection.cursor()) as cursor:
48 cursor.execute("PRAGMA foreign_keys=ON;")
49 cursor.execute("PRAGMA busy_timeout = 300000;") # in ms, so 5min (way longer than should be needed)
52def _onSqlite3Begin(connection: sqlalchemy.engine.Connection) -> sqlalchemy.engine.Connection:
53 assert connection.dialect.name == "sqlite"
54 # Replace pysqlite's buggy transaction handling that never BEGINs with our
55 # own that does, and tell SQLite to try to acquire a lock as soon as we
56 # start a transaction (this should lead to more blocking and fewer
57 # deadlocks).
58 connection.execute(sqlalchemy.text("BEGIN IMMEDIATE"))
59 return connection
62class _Replace(sqlalchemy.sql.Insert):
63 """A SQLAlchemy query that compiles to INSERT ... ON CONFLICT REPLACE
64 on the primary key constraint for the table.
65 """
67 inherit_cache = True # make it cacheable
70# SQLite and PostgreSQL use similar syntax for their ON CONFLICT extension,
71# but SQLAlchemy only knows about PostgreSQL's, so we have to compile some
72# custom text SQL ourselves.
74# Hard to infer what types these should be from SQLAlchemy docs; just disable
75# static typing by calling everything "Any".
76@sqlalchemy.ext.compiler.compiles(_Replace, "sqlite")
77def _replace(insert: Any, compiler: Any, **kwargs: Any) -> Any:
78 """Generate an INSERT ... ON CONFLICT REPLACE query."""
79 result = compiler.visit_insert(insert, **kwargs)
80 preparer = compiler.preparer
81 pk_columns = ", ".join([preparer.format_column(col) for col in insert.table.primary_key])
82 result += f" ON CONFLICT ({pk_columns})"
83 columns = [
84 preparer.format_column(col)
85 for col in insert.table.columns
86 if col.name not in insert.table.primary_key
87 ]
88 updates = ", ".join([f"{col} = excluded.{col}" for col in columns])
89 result += f" DO UPDATE SET {updates}"
90 return result
93class _Ensure(sqlalchemy.sql.Insert):
94 """A SQLAlchemy query that compiles to
95 ``INSERT ... ON CONFLICT DO NOTHING``.
96 """
98 inherit_cache = True # make it cacheable
101@sqlalchemy.ext.compiler.compiles(_Ensure, "sqlite")
102def _ensure(insert: Any, compiler: Any, **kwargs: Any) -> Any:
103 """Generate an INSERT ... ON CONFLICT DO NOTHING query."""
104 result = compiler.visit_insert(insert, **kwargs)
105 result += " ON CONFLICT DO NOTHING"
106 return result
109_AUTOINCR_TABLE_SPEC = ddl.TableSpec(
110 fields=[ddl.FieldSpec(name="id", dtype=sqlalchemy.Integer, primaryKey=True)]
111)
114@dataclass
115class _AutoincrementCompoundKeyWorkaround:
116 """A workaround for SQLite's lack of support for compound primary keys that
117 include an autoincrement field.
118 """
120 table: sqlalchemy.schema.Table
121 """A single-column internal table that can be inserted into to yield
122 autoincrement values (`sqlalchemy.schema.Table`).
123 """
125 column: str
126 """The name of the column in the original table that needs to be populated
127 with values from the internal table (`str`).
128 """
131class SqliteDatabase(Database):
132 """An implementation of the `Database` interface for SQLite3.
134 Parameters
135 ----------
136 connection : `sqlalchemy.engine.Connection`
137 An existing connection created by a previous call to `connect`.
138 origin : `int`
139 An integer ID that should be used as the default for any datasets,
140 quanta, or other entities that use a (autoincrement, origin) compound
141 primary key.
142 namespace : `str`, optional
143 The namespace (schema) this database is associated with. If `None`,
144 the default schema for the connection is used (which may be `None`).
145 writeable : `bool`, optional
146 If `True`, allow write operations on the database, including
147 ``CREATE TABLE``.
149 Notes
150 -----
151 The case where ``namespace is not None`` is not yet tested, and may be
152 broken; we need an API for attaching to different databases in order to
153 write those tests, but haven't yet worked out what is common/different
154 across databases well enough to define it.
155 """
157 def __init__(
158 self,
159 *,
160 engine: sqlalchemy.engine.Engine,
161 origin: int,
162 namespace: Optional[str] = None,
163 writeable: bool = True,
164 ):
165 super().__init__(origin=origin, engine=engine, namespace=namespace)
166 # Get the filename from a call to 'PRAGMA database_list'.
167 with engine.connect() as connection:
168 with closing(connection.connection.cursor()) as cursor:
169 dbList = list(cursor.execute("PRAGMA database_list").fetchall())
170 if len(dbList) == 0:
171 raise RuntimeError("No database in connection.")
172 if namespace is None:
173 namespace = "main"
174 for _, dbname, filename in dbList:
175 if dbname == namespace:
176 break
177 else:
178 raise RuntimeError(f"No '{namespace}' database in connection.")
179 if not filename:
180 self.filename = None
181 else:
182 self.filename = filename
183 self._writeable = writeable
184 self._autoincr: Dict[str, _AutoincrementCompoundKeyWorkaround] = {}
186 @classmethod
187 def makeDefaultUri(cls, root: str) -> Optional[str]:
188 return "sqlite:///" + os.path.join(root, "gen3.sqlite3")
190 @classmethod
191 def makeEngine(
192 cls, uri: Optional[str] = None, *, filename: Optional[str] = None, writeable: bool = True
193 ) -> sqlalchemy.engine.Engine:
194 """Create a `sqlalchemy.engine.Engine` from a SQLAlchemy URI or
195 filename.
197 Parameters
198 ----------
199 uri : `str`
200 A SQLAlchemy URI connection string.
201 filename : `str`
202 Name of the SQLite database file, or `None` to use an in-memory
203 database. Ignored if ``uri is not None``.
204 writeable : `bool`, optional
205 If `True`, allow write operations on the database, including
206 ``CREATE TABLE``.
208 Returns
209 -------
210 engine : `sqlalchemy.engine.Engine`
211 A database engine.
212 """
213 # In order to be able to tell SQLite that we want a read-only or
214 # read-write connection, we need to make the SQLite DBAPI connection
215 # with a "URI"-based connection string. SQLAlchemy claims it can do
216 # this
217 # (https://docs.sqlalchemy.org/en/13/dialects/sqlite.html#uri-connections),
218 # but it doesn't seem to work as advertised. To work around this, we
219 # use the 'creator' argument to sqlalchemy.engine.create_engine, which
220 # lets us pass a callable that creates the DBAPI connection.
221 if uri is None:
222 if filename is None:
223 target = ":memory:"
224 uri = "sqlite://"
225 else:
226 target = f"file:{filename}"
227 uri = f"sqlite:///{filename}"
228 else:
229 parsed = urllib.parse.urlparse(uri)
230 queries = parsed.query.split("&")
231 if "uri=true" in queries:
232 # This is a SQLAlchemy URI that is already trying to make a
233 # SQLite connection via a SQLite URI, and hence there may
234 # be URI components for both SQLite and SQLAlchemy. We
235 # don't need to support that, and it'd be a
236 # reimplementation of all of the (broken) logic in
237 # SQLAlchemy for doing this, so we just don't.
238 raise NotImplementedError("SQLite connection strings with 'uri=true' are not supported.")
239 # This is just a SQLAlchemy URI with a non-URI SQLite
240 # connection string inside it. Pull that out so we can use it
241 # in the creator call.
242 if parsed.path.startswith("/"):
243 filename = parsed.path[1:]
244 target = f"file:{filename}"
245 else:
246 filename = None
247 target = ":memory:"
248 if filename is None:
249 if not writeable:
250 raise NotImplementedError("Read-only :memory: databases are not supported.")
251 else:
252 if writeable:
253 target += "?mode=rwc&uri=true"
254 else:
255 target += "?mode=ro&uri=true"
257 def creator() -> sqlite3.Connection:
258 return sqlite3.connect(target, check_same_thread=False, uri=True)
260 engine = sqlalchemy.engine.create_engine(uri, creator=creator)
262 sqlalchemy.event.listen(engine, "connect", _onSqlite3Connect)
263 sqlalchemy.event.listen(engine, "begin", _onSqlite3Begin)
265 return engine
267 @classmethod
268 def fromEngine(
269 cls,
270 engine: sqlalchemy.engine.Engine,
271 *,
272 origin: int,
273 namespace: Optional[str] = None,
274 writeable: bool = True,
275 ) -> Database:
276 return cls(engine=engine, origin=origin, writeable=writeable, namespace=namespace)
278 def isWriteable(self) -> bool:
279 return self._writeable
281 def __str__(self) -> str:
282 if self.filename:
283 return f"SQLite3@{self.filename}"
284 else:
285 return "SQLite3@:memory:"
287 def _lockTables(
288 self, connection: sqlalchemy.engine.Connection, tables: Iterable[sqlalchemy.schema.Table] = ()
289 ) -> None:
290 # Docstring inherited.
291 # Our SQLite database always acquires full-database locks at the
292 # beginning of a transaction, so there's no need to acquire table-level
293 # locks - which is good, because SQLite doesn't have table-level
294 # locking.
295 pass
297 # MyPy claims that the return type here isn't covariant with the return
298 # type of the base class method, which is formally correct but irrelevant
299 # - the base class return type is _GeneratorContextManager, but only
300 # because it's generated by the contextmanager decorator.
301 def declareStaticTables(self, *, create: bool) -> ContextManager[StaticTablesContext]: # type: ignore
302 # If the user asked for an in-memory, writeable database, then we may
303 # need to re-create schema even if create=False because schema can be
304 # lost on re-connect. This is only really relevant for tests, and it's
305 # convenient there.
306 if self.filename is None and self.isWriteable():
307 inspector = sqlalchemy.inspect(self._engine)
308 tables = inspector.get_table_names(schema=self.namespace)
309 if not tables:
310 create = True
311 return super().declareStaticTables(create=create)
313 def _convertFieldSpec(
314 self, table: str, spec: ddl.FieldSpec, metadata: sqlalchemy.MetaData, **kwargs: Any
315 ) -> sqlalchemy.schema.Column:
316 if spec.autoincrement:
317 if not spec.primaryKey:
318 raise RuntimeError(
319 f"Autoincrement field {table}.{spec.name} that is not a primary key is not supported."
320 )
321 if spec.dtype != sqlalchemy.Integer:
322 # SQLite's autoincrement is really limited; it only works if
323 # the column type is exactly "INTEGER". But it also doesn't
324 # care about the distinctions between different integer types,
325 # so it's safe to change it.
326 spec = copy.copy(spec)
327 spec.dtype = sqlalchemy.Integer
328 return super()._convertFieldSpec(table, spec, metadata, **kwargs)
330 def _makeColumnConstraints(self, table: str, spec: ddl.FieldSpec) -> List[sqlalchemy.CheckConstraint]:
331 # For sqlite we force constraints on all string columns since sqlite
332 # ignores everything otherwise and this leads to problems with
333 # other databases.
335 constraints = []
336 if spec.isStringType():
337 name = self.shrinkDatabaseEntityName("_".join([table, "len", spec.name]))
338 constraints.append(
339 sqlalchemy.CheckConstraint(
340 f"length({spec.name})<={spec.length}"
341 # Oracle converts
342 # empty strings to
343 # NULL so check
344 f" AND length({spec.name})>=1",
345 name=name,
346 )
347 )
349 constraints.extend(super()._makeColumnConstraints(table, spec))
350 return constraints
352 def _convertTableSpec(
353 self, name: str, spec: ddl.TableSpec, metadata: sqlalchemy.MetaData, **kwargs: Any
354 ) -> sqlalchemy.schema.Table:
355 primaryKeyFieldNames = set(field.name for field in spec.fields if field.primaryKey)
356 autoincrFieldNames = set(field.name for field in spec.fields if field.autoincrement)
357 if len(autoincrFieldNames) > 1:
358 raise RuntimeError("At most one autoincrement field per table is allowed.")
359 if len(primaryKeyFieldNames) > 1 and len(autoincrFieldNames) > 0:
360 # SQLite's default rowid-based autoincrement doesn't work if the
361 # field is just one field in a compound primary key. As a
362 # workaround, we create an extra table with just one column that
363 # we'll insert into to generate those IDs. That's only safe if
364 # that single-column table's records are already unique with just
365 # the autoincrement field, not the rest of the primary key. In
366 # practice, that means the single-column table's records are those
367 # for which origin == self.origin.
368 (autoincrFieldName,) = autoincrFieldNames
369 otherPrimaryKeyFieldNames = primaryKeyFieldNames - autoincrFieldNames
370 if otherPrimaryKeyFieldNames != {"origin"}:
371 # We need the only other field in the key to be 'origin'.
372 raise NotImplementedError(
373 "Compound primary keys with an autoincrement are only supported in SQLite "
374 "if the only non-autoincrement primary key field is 'origin'."
375 )
376 self._autoincr[name] = _AutoincrementCompoundKeyWorkaround(
377 table=self._convertTableSpec(f"_autoinc_{name}", _AUTOINCR_TABLE_SPEC, metadata, **kwargs),
378 column=autoincrFieldName,
379 )
380 if not spec.recycleIds:
381 kwargs = dict(kwargs, sqlite_autoincrement=True)
382 return super()._convertTableSpec(name, spec, metadata, **kwargs)
384 def insert(
385 self,
386 table: sqlalchemy.schema.Table,
387 *rows: dict,
388 returnIds: bool = False,
389 select: Optional[sqlalchemy.sql.Select] = None,
390 names: Optional[Iterable[str]] = None,
391 ) -> Optional[List[int]]:
392 self.assertTableWriteable(table, f"Cannot insert into read-only table {table}.")
393 autoincr = self._autoincr.get(table.name)
394 if autoincr is not None:
395 if select is not None:
396 raise NotImplementedError(
397 "Cannot do INSERT INTO ... SELECT on a SQLite table with a simulated autoincrement "
398 "compound primary key"
399 )
400 # This table has a compound primary key that includes an
401 # autoincrement. That doesn't work natively in SQLite, so we
402 # insert into a single-column table and use those IDs.
403 if not rows:
404 return [] if returnIds else None
405 if autoincr.column in rows[0]:
406 # Caller passed the autoincrement key values explicitly in the
407 # first row. They had better have done the same for all rows,
408 # or SQLAlchemy would have a problem, even if we didn't.
409 assert all(autoincr.column in row for row in rows)
410 # We need to insert only the values that correspond to
411 # ``origin == self.origin`` into the single-column table, to
412 # make sure we don't generate conflicting keys there later.
413 rowsForAutoincrTable = [
414 dict(id=row[autoincr.column]) for row in rows if row["origin"] == self.origin
415 ]
416 # Insert into the autoincr table and the target table inside
417 # a transaction. The main-table insertion can take care of
418 # returnIds for us.
419 with self.transaction(), self._connection() as connection:
420 connection.execute(autoincr.table.insert(), rowsForAutoincrTable)
421 return super().insert(table, *rows, returnIds=returnIds)
422 else:
423 # Caller did not pass autoincrement key values on the first
424 # row. Make sure they didn't ever do that, and also make
425 # sure the origin that was passed in is always self.origin,
426 # because we can't safely generate autoincrement values
427 # otherwise.
428 assert all(autoincr.column not in row and row["origin"] == self.origin for row in rows)
429 # Insert into the autoincr table one by one to get the
430 # primary key values back, then insert into the target table
431 # in the same transaction.
432 with self.transaction():
433 newRows = []
434 ids = []
435 for row in rows:
436 newRow = row.copy()
437 with self._connection() as connection:
438 id = connection.execute(autoincr.table.insert()).inserted_primary_key[0]
439 newRow[autoincr.column] = id
440 newRows.append(newRow)
441 ids.append(id)
442 # Don't ever ask to returnIds here, because we've already
443 # got them.
444 super().insert(table, *newRows)
445 if returnIds:
446 return ids
447 else:
448 return None
449 else:
450 return super().insert(table, *rows, select=select, names=names, returnIds=returnIds)
452 def replace(self, table: sqlalchemy.schema.Table, *rows: dict) -> None:
453 self.assertTableWriteable(table, f"Cannot replace into read-only table {table}.")
454 if not rows:
455 return
456 if table.name in self._autoincr:
457 raise NotImplementedError(
458 "replace does not support compound primary keys with autoincrement fields."
459 )
460 with self._connection() as connection:
461 connection.execute(_Replace(table), rows)
463 def ensure(self, table: sqlalchemy.schema.Table, *rows: dict) -> int:
464 self.assertTableWriteable(table, f"Cannot ensure into read-only table {table}.")
465 if not rows:
466 return 0
467 if table.name in self._autoincr:
468 raise NotImplementedError(
469 "ensure does not support compound primary keys with autoincrement fields."
470 )
471 with self._connection() as connection:
472 return connection.execute(_Ensure(table), rows).rowcount
474 filename: Optional[str]
475 """Name of the file this database is connected to (`str` or `None`).
477 Set to `None` for in-memory databases.
478 """