Coverage for python/lsst/daf/butler/registry/databases/sqlite.py: 23%
163 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-19 10:53 +0000
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-19 10:53 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
27from __future__ import annotations
29__all__ = ["SqliteDatabase"]
31import copy
32import os
33import sqlite3
34import urllib.parse
35from collections.abc import Iterable
36from contextlib import AbstractContextManager, closing
37from typing import Any
39import sqlalchemy
40import sqlalchemy.dialects.sqlite
41import sqlalchemy.ext.compiler
43from ... import ddl
44from ..._named import NamedValueAbstractSet
45from ..interfaces import Database, StaticTablesContext
48def _onSqlite3Connect(
49 dbapiConnection: sqlite3.Connection, connectionRecord: sqlalchemy.pool._ConnectionRecord
50) -> None:
51 assert isinstance(dbapiConnection, sqlite3.Connection)
52 # Prevent pysqlite from emitting BEGIN and COMMIT statements.
53 dbapiConnection.isolation_level = None
54 # Enable foreign keys
55 with closing(dbapiConnection.cursor()) as cursor:
56 cursor.execute("PRAGMA foreign_keys=ON;")
57 cursor.execute("PRAGMA busy_timeout = 300000;") # in ms, so 5min (way longer than should be needed)
60class SqliteDatabase(Database):
61 """An implementation of the `Database` interface for SQLite3.
63 Parameters
64 ----------
65 engine : `sqlalchemy.engine.Engine`
66 Engine to use for this connection.
67 origin : `int`
68 An integer ID that should be used as the default for any datasets,
69 quanta, or other entities that use a (autoincrement, origin) compound
70 primary key.
71 namespace : `str`, optional
72 The namespace (schema) this database is associated with. If `None`,
73 the default schema for the connection is used (which may be `None`).
74 writeable : `bool`, optional
75 If `True`, allow write operations on the database, including
76 ``CREATE TABLE``.
78 Notes
79 -----
80 The case where ``namespace is not None`` is not yet tested, and may be
81 broken; we need an API for attaching to different databases in order to
82 write those tests, but haven't yet worked out what is common/different
83 across databases well enough to define it.
84 """
86 def __init__(
87 self,
88 *,
89 engine: sqlalchemy.engine.Engine,
90 origin: int,
91 namespace: str | None = None,
92 writeable: bool = True,
93 ):
94 filename = _find_database_filename(engine, namespace)
95 self._init(
96 engine=engine,
97 origin=origin,
98 namespace=namespace,
99 writeable=writeable,
100 filename=filename,
101 metadata=None,
102 )
104 def _init(
105 self,
106 *,
107 engine: sqlalchemy.engine.Engine,
108 origin: int,
109 namespace: str | None = None,
110 writeable: bool = True,
111 filename: str | None,
112 metadata: sqlalchemy.schema.MetaData | None,
113 ) -> None:
114 # Initialization logic shared between ``__init__`` and ``clone``.
115 super().__init__(origin=origin, engine=engine, namespace=namespace, metadata=metadata)
116 self._writeable = writeable
117 self.filename = filename
119 def clone(self) -> SqliteDatabase:
120 clone = self.__new__(type(self))
121 clone._init(
122 engine=self._engine,
123 origin=self.origin,
124 namespace=self.namespace,
125 writeable=self._writeable,
126 filename=self.filename,
127 metadata=self._metadata,
128 )
129 return clone
131 @classmethod
132 def makeDefaultUri(cls, root: str) -> str | None:
133 return "sqlite:///" + os.path.join(root, "gen3.sqlite3")
135 @classmethod
136 def makeEngine(
137 cls,
138 uri: str | sqlalchemy.engine.URL | None = None,
139 *,
140 filename: str | None = None,
141 writeable: bool = True,
142 ) -> sqlalchemy.engine.Engine:
143 """Create a `sqlalchemy.engine.Engine` from a SQLAlchemy URI or
144 filename.
146 Parameters
147 ----------
148 uri : `str` or `sqlalchemy.engine.URL`, optional
149 A SQLAlchemy URI connection string.
150 filename : `str`
151 Name of the SQLite database file, or `None` to use an in-memory
152 database. Ignored if ``uri is not None``.
153 writeable : `bool`, optional
154 If `True`, allow write operations on the database, including
155 ``CREATE TABLE``.
157 Returns
158 -------
159 engine : `sqlalchemy.engine.Engine`
160 A database engine.
161 """
162 # In order to be able to tell SQLite that we want a read-only or
163 # read-write connection, we need to make the SQLite DBAPI connection
164 # with a "URI"-based connection string. SQLAlchemy claims it can do
165 # this
166 # (https://docs.sqlalchemy.org/en/13/dialects/sqlite.html#uri-connections),
167 # but it doesn't seem to work as advertised. To work around this, we
168 # use the 'creator' argument to sqlalchemy.engine.create_engine, which
169 # lets us pass a callable that creates the DBAPI connection.
170 if uri is None:
171 if filename is None:
172 target = ":memory:"
173 uri = "sqlite://"
174 else:
175 target = f"file:{filename}"
176 uri = f"sqlite:///{filename}"
177 else:
178 if isinstance(uri, sqlalchemy.engine.URL):
179 # We have to parse strings anyway, so convert it to string.
180 uri = uri.render_as_string(hide_password=False)
181 parsed = urllib.parse.urlparse(uri)
182 queries = parsed.query.split("&")
183 if "uri=true" in queries:
184 # This is a SQLAlchemy URI that is already trying to make a
185 # SQLite connection via a SQLite URI, and hence there may
186 # be URI components for both SQLite and SQLAlchemy. We
187 # don't need to support that, and it'd be a
188 # reimplementation of all of the (broken) logic in
189 # SQLAlchemy for doing this, so we just don't.
190 raise NotImplementedError("SQLite connection strings with 'uri=true' are not supported.")
191 # This is just a SQLAlchemy URI with a non-URI SQLite
192 # connection string inside it. Pull that out so we can use it
193 # in the creator call.
194 if parsed.path.startswith("/"):
195 filename = parsed.path[1:]
196 target = f"file:{filename}"
197 else:
198 filename = None
199 target = ":memory:"
200 if filename is None:
201 if not writeable:
202 raise NotImplementedError("Read-only :memory: databases are not supported.")
203 else:
204 if writeable:
205 target += "?mode=rwc&uri=true"
206 else:
207 target += "?mode=ro&uri=true"
209 def creator() -> sqlite3.Connection:
210 return sqlite3.connect(target, check_same_thread=False, uri=True)
212 engine = sqlalchemy.engine.create_engine(uri, creator=creator)
214 sqlalchemy.event.listen(engine, "connect", _onSqlite3Connect)
216 def _onSqlite3Begin(connection: sqlalchemy.engine.Connection) -> sqlalchemy.engine.Connection:
217 assert connection.dialect.name == "sqlite"
218 # Replace pysqlite's buggy transaction handling that never BEGINs
219 # with our own that does, and tell SQLite to try to acquire a lock
220 # as soon as we start a transaction that might involve writes (this
221 # should lead to more blocking and fewer deadlocks).
222 if writeable:
223 connection.execute(sqlalchemy.text("BEGIN IMMEDIATE"))
224 else:
225 connection.execute(sqlalchemy.text("BEGIN"))
226 return connection
228 sqlalchemy.event.listen(engine, "begin", _onSqlite3Begin)
230 return engine
232 @classmethod
233 def fromEngine(
234 cls,
235 engine: sqlalchemy.engine.Engine,
236 *,
237 origin: int,
238 namespace: str | None = None,
239 writeable: bool = True,
240 ) -> Database:
241 return cls(engine=engine, origin=origin, writeable=writeable, namespace=namespace)
243 def isWriteable(self) -> bool:
244 return self._writeable
246 def __str__(self) -> str:
247 if self.filename:
248 return f"SQLite3@{self.filename}"
249 else:
250 return "SQLite3@:memory:"
252 def _lockTables(
253 self, connection: sqlalchemy.engine.Connection, tables: Iterable[sqlalchemy.schema.Table] = ()
254 ) -> None:
255 # Docstring inherited.
256 # Our SQLite database always acquires full-database locks at the
257 # beginning of a transaction, so there's no need to acquire table-level
258 # locks - which is good, because SQLite doesn't have table-level
259 # locking.
260 pass
262 # MyPy claims that the return type here isn't covariant with the return
263 # type of the base class method, which is formally correct but irrelevant
264 # - the base class return type is _GeneratorContextManager, but only
265 # because it's generated by the contextmanager decorator.
266 def declareStaticTables( # type: ignore
267 self, *, create: bool
268 ) -> AbstractContextManager[StaticTablesContext]:
269 # If the user asked for an in-memory, writeable database, then we may
270 # need to re-create schema even if create=False because schema can be
271 # lost on re-connect. This is only really relevant for tests, and it's
272 # convenient there.
273 if self.filename is None and self.isWriteable():
274 inspector = sqlalchemy.inspect(self._engine)
275 tables = inspector.get_table_names(schema=self.namespace)
276 if not tables:
277 create = True
278 return super().declareStaticTables(create=create)
280 def _convertFieldSpec(
281 self, table: str, spec: ddl.FieldSpec, metadata: sqlalchemy.MetaData, **kwargs: Any
282 ) -> sqlalchemy.schema.Column:
283 if spec.autoincrement:
284 if not spec.primaryKey:
285 raise RuntimeError(
286 f"Autoincrement field {table}.{spec.name} that is not a primary key is not supported."
287 )
288 if spec.dtype != sqlalchemy.Integer:
289 # SQLite's autoincrement is really limited; it only works if
290 # the column type is exactly "INTEGER". But it also doesn't
291 # care about the distinctions between different integer types,
292 # so it's safe to change it.
293 spec = copy.copy(spec)
294 spec.dtype = sqlalchemy.Integer
295 return super()._convertFieldSpec(table, spec, metadata, **kwargs)
297 def _makeColumnConstraints(self, table: str, spec: ddl.FieldSpec) -> list[sqlalchemy.CheckConstraint]:
298 # For sqlite we force constraints on all string columns since sqlite
299 # ignores everything otherwise and this leads to problems with
300 # other databases.
302 constraints = []
303 if spec.isStringType():
304 name = self.shrinkDatabaseEntityName("_".join([table, "len", spec.name]))
305 constraints.append(
306 sqlalchemy.CheckConstraint(
307 f'length("{spec.name}")<={spec.length}'
308 # Oracle converts
309 # empty strings to
310 # NULL so check
311 f' AND length("{spec.name}")>=1',
312 name=name,
313 )
314 )
316 constraints.extend(super()._makeColumnConstraints(table, spec))
317 return constraints
319 def _convertTableSpec(
320 self, name: str, spec: ddl.TableSpec, metadata: sqlalchemy.MetaData, **kwargs: Any
321 ) -> sqlalchemy.schema.Table:
322 primaryKeyFieldNames = {field.name for field in spec.fields if field.primaryKey}
323 autoincrFieldNames = {field.name for field in spec.fields if field.autoincrement}
324 if len(autoincrFieldNames) > 1:
325 raise RuntimeError("At most one autoincrement field per table is allowed.")
326 if len(primaryKeyFieldNames) > 1 and len(autoincrFieldNames) > 0:
327 # SQLite's default rowid-based autoincrement doesn't work if the
328 # field is just one field in a compound primary key. As a
329 # workaround, we create an extra table with just one column that
330 # we'll insert into to generate those IDs. That's only safe if
331 # that single-column table's records are already unique with just
332 # the autoincrement field, not the rest of the primary key. In
333 # practice, that means the single-column table's records are those
334 # for which origin == self.origin.
335 (autoincrFieldName,) = autoincrFieldNames
336 otherPrimaryKeyFieldNames = primaryKeyFieldNames - autoincrFieldNames
337 if otherPrimaryKeyFieldNames != {"origin"}:
338 # We need the only other field in the key to be 'origin'.
339 raise NotImplementedError(
340 "Compound primary keys with an autoincrement are only supported in SQLite "
341 "if the only non-autoincrement primary key field is 'origin'."
342 )
343 if not spec.recycleIds:
344 kwargs = dict(kwargs, sqlite_autoincrement=True)
345 return super()._convertTableSpec(name, spec, metadata, **kwargs)
347 def replace(self, table: sqlalchemy.schema.Table, *rows: dict) -> None:
348 self.assertTableWriteable(table, f"Cannot replace into read-only table {table}.")
349 if not rows:
350 return
351 query = sqlalchemy.dialects.sqlite.insert(table)
352 excluded = query.excluded
353 data = {
354 column.name: getattr(excluded, column.name)
355 for column in table.columns
356 if column.name not in table.primary_key
357 }
358 query = query.on_conflict_do_update(index_elements=table.primary_key, set_=data)
359 with self._transaction() as (_, connection):
360 connection.execute(query, rows)
362 def ensure(self, table: sqlalchemy.schema.Table, *rows: dict, primary_key_only: bool = False) -> int:
363 self.assertTableWriteable(table, f"Cannot ensure into read-only table {table}.")
364 if not rows:
365 return 0
366 query = sqlalchemy.dialects.sqlite.insert(table)
367 if primary_key_only:
368 query = query.on_conflict_do_nothing(index_elements=table.primary_key)
369 else:
370 query = query.on_conflict_do_nothing()
371 with self._transaction() as (_, connection):
372 return connection.execute(query, rows).rowcount
374 def constant_rows(
375 self,
376 fields: NamedValueAbstractSet[ddl.FieldSpec],
377 *rows: dict,
378 name: str | None = None,
379 ) -> sqlalchemy.sql.FromClause:
380 # Docstring inherited.
381 # While SQLite supports VALUES, it doesn't support assigning a name
382 # to that construct or the names of its columns, and hence there's no
383 # way to actually join it into a SELECT query. It seems the only
384 # alternative is something like:
385 #
386 # SELECT ? AS a, ? AS b
387 # UNION ALL
388 # SELECT ? AS a, ? AS b
389 #
390 selects = [
391 sqlalchemy.sql.select(
392 *[sqlalchemy.sql.literal(row[field.name], field.dtype).label(field.name) for field in fields]
393 )
394 for row in rows
395 ]
396 return sqlalchemy.sql.union_all(*selects).alias(name)
398 @property
399 def has_distinct_on(self) -> bool:
400 # Docstring inherited.
401 return False
403 @property
404 def has_any_aggregate(self) -> bool:
405 # Docstring inherited.
406 return True
408 def apply_any_aggregate(self, column: sqlalchemy.ColumnElement[Any]) -> sqlalchemy.ColumnElement[Any]:
409 # Docstring inherited.
410 # In SQLite, columns are permitted in the SELECT clause without an
411 # aggregate function even if they're not in the GROUP BY, with an
412 # arbitrary value picked if there is more than one.
413 return column
415 filename: str | None
416 """Name of the file this database is connected to (`str` or `None`).
418 Set to `None` for in-memory databases.
419 """
422def _find_database_filename(
423 engine: sqlalchemy.engine.Engine,
424 namespace: str | None = None,
425) -> str | None:
426 # Get the filename from a call to 'PRAGMA database_list'.
427 with engine.connect() as connection, closing(connection.connection.cursor()) as cursor:
428 dbList = list(cursor.execute("PRAGMA database_list").fetchall())
429 if len(dbList) == 0:
430 raise RuntimeError("No database in connection.")
431 if namespace is None:
432 namespace = "main"
433 # Look for the filename associated with this namespace.
434 for _, dbname, filename in dbList: # B007
435 if dbname == namespace:
436 break
437 else:
438 raise RuntimeError(f"No '{namespace}' database in connection.")
439 if not filename:
440 return None
441 else:
442 return filename