Coverage for python/lsst/daf/butler/registry/databases/sqlite.py: 19%
146 statements
« prev ^ index » next coverage.py v7.3.1, created at 2023-10-02 08:00 +0000
« prev ^ index » next coverage.py v7.3.1, created at 2023-10-02 08:00 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
27from __future__ import annotations
29__all__ = ["SqliteDatabase"]
31import copy
32import os
33import sqlite3
34import urllib.parse
35from collections.abc import Iterable
36from contextlib import AbstractContextManager, closing
37from typing import Any
39import sqlalchemy
40import sqlalchemy.dialects.sqlite
41import sqlalchemy.ext.compiler
43from ...core import ddl
44from ...core.named import NamedValueAbstractSet
45from ..interfaces import Database, StaticTablesContext
48def _onSqlite3Connect(
49 dbapiConnection: sqlite3.Connection, connectionRecord: sqlalchemy.pool._ConnectionRecord
50) -> None:
51 assert isinstance(dbapiConnection, sqlite3.Connection)
52 # Prevent pysqlite from emitting BEGIN and COMMIT statements.
53 dbapiConnection.isolation_level = None
54 # Enable foreign keys
55 with closing(dbapiConnection.cursor()) as cursor:
56 cursor.execute("PRAGMA foreign_keys=ON;")
57 cursor.execute("PRAGMA busy_timeout = 300000;") # in ms, so 5min (way longer than should be needed)
60class SqliteDatabase(Database):
61 """An implementation of the `Database` interface for SQLite3.
63 Parameters
64 ----------
65 connection : `sqlalchemy.engine.Connection`
66 An existing connection created by a previous call to `connect`.
67 origin : `int`
68 An integer ID that should be used as the default for any datasets,
69 quanta, or other entities that use a (autoincrement, origin) compound
70 primary key.
71 namespace : `str`, optional
72 The namespace (schema) this database is associated with. If `None`,
73 the default schema for the connection is used (which may be `None`).
74 writeable : `bool`, optional
75 If `True`, allow write operations on the database, including
76 ``CREATE TABLE``.
78 Notes
79 -----
80 The case where ``namespace is not None`` is not yet tested, and may be
81 broken; we need an API for attaching to different databases in order to
82 write those tests, but haven't yet worked out what is common/different
83 across databases well enough to define it.
84 """
86 def __init__(
87 self,
88 *,
89 engine: sqlalchemy.engine.Engine,
90 origin: int,
91 namespace: str | None = None,
92 writeable: bool = True,
93 ):
94 super().__init__(origin=origin, engine=engine, namespace=namespace)
95 # Get the filename from a call to 'PRAGMA database_list'.
96 with engine.connect() as connection, closing(connection.connection.cursor()) as cursor:
97 dbList = list(cursor.execute("PRAGMA database_list").fetchall())
98 if len(dbList) == 0:
99 raise RuntimeError("No database in connection.")
100 if namespace is None:
101 namespace = "main"
102 # Look for the filename associated with this namespace.
103 for _, dbname, filename in dbList: # B007
104 if dbname == namespace:
105 break
106 else:
107 raise RuntimeError(f"No '{namespace}' database in connection.")
108 if not filename:
109 self.filename = None
110 else:
111 self.filename = filename
112 self._writeable = writeable
114 @classmethod
115 def makeDefaultUri(cls, root: str) -> str | None:
116 return "sqlite:///" + os.path.join(root, "gen3.sqlite3")
118 @classmethod
119 def makeEngine(
120 cls,
121 uri: str | sqlalchemy.engine.URL | None = None,
122 *,
123 filename: str | None = None,
124 writeable: bool = True,
125 ) -> sqlalchemy.engine.Engine:
126 """Create a `sqlalchemy.engine.Engine` from a SQLAlchemy URI or
127 filename.
129 Parameters
130 ----------
131 uri : `str` or `sqlalchemy.engine.URL`, optional
132 A SQLAlchemy URI connection string.
133 filename : `str`
134 Name of the SQLite database file, or `None` to use an in-memory
135 database. Ignored if ``uri is not None``.
136 writeable : `bool`, optional
137 If `True`, allow write operations on the database, including
138 ``CREATE TABLE``.
140 Returns
141 -------
142 engine : `sqlalchemy.engine.Engine`
143 A database engine.
144 """
145 # In order to be able to tell SQLite that we want a read-only or
146 # read-write connection, we need to make the SQLite DBAPI connection
147 # with a "URI"-based connection string. SQLAlchemy claims it can do
148 # this
149 # (https://docs.sqlalchemy.org/en/13/dialects/sqlite.html#uri-connections),
150 # but it doesn't seem to work as advertised. To work around this, we
151 # use the 'creator' argument to sqlalchemy.engine.create_engine, which
152 # lets us pass a callable that creates the DBAPI connection.
153 if uri is None:
154 if filename is None:
155 target = ":memory:"
156 uri = "sqlite://"
157 else:
158 target = f"file:{filename}"
159 uri = f"sqlite:///{filename}"
160 else:
161 if isinstance(uri, sqlalchemy.engine.URL):
162 # We have to parse strings anyway, so convert it to string.
163 uri = uri.render_as_string(hide_password=False)
164 parsed = urllib.parse.urlparse(uri)
165 queries = parsed.query.split("&")
166 if "uri=true" in queries:
167 # This is a SQLAlchemy URI that is already trying to make a
168 # SQLite connection via a SQLite URI, and hence there may
169 # be URI components for both SQLite and SQLAlchemy. We
170 # don't need to support that, and it'd be a
171 # reimplementation of all of the (broken) logic in
172 # SQLAlchemy for doing this, so we just don't.
173 raise NotImplementedError("SQLite connection strings with 'uri=true' are not supported.")
174 # This is just a SQLAlchemy URI with a non-URI SQLite
175 # connection string inside it. Pull that out so we can use it
176 # in the creator call.
177 if parsed.path.startswith("/"):
178 filename = parsed.path[1:]
179 target = f"file:{filename}"
180 else:
181 filename = None
182 target = ":memory:"
183 if filename is None:
184 if not writeable:
185 raise NotImplementedError("Read-only :memory: databases are not supported.")
186 else:
187 if writeable:
188 target += "?mode=rwc&uri=true"
189 else:
190 target += "?mode=ro&uri=true"
192 def creator() -> sqlite3.Connection:
193 return sqlite3.connect(target, check_same_thread=False, uri=True)
195 engine = sqlalchemy.engine.create_engine(uri, creator=creator)
197 sqlalchemy.event.listen(engine, "connect", _onSqlite3Connect)
199 def _onSqlite3Begin(connection: sqlalchemy.engine.Connection) -> sqlalchemy.engine.Connection:
200 assert connection.dialect.name == "sqlite"
201 # Replace pysqlite's buggy transaction handling that never BEGINs
202 # with our own that does, and tell SQLite to try to acquire a lock
203 # as soon as we start a transaction that might involve writes (this
204 # should lead to more blocking and fewer deadlocks).
205 if writeable:
206 connection.execute(sqlalchemy.text("BEGIN IMMEDIATE"))
207 else:
208 connection.execute(sqlalchemy.text("BEGIN"))
209 return connection
211 sqlalchemy.event.listen(engine, "begin", _onSqlite3Begin)
213 return engine
215 @classmethod
216 def fromEngine(
217 cls,
218 engine: sqlalchemy.engine.Engine,
219 *,
220 origin: int,
221 namespace: str | None = None,
222 writeable: bool = True,
223 ) -> Database:
224 return cls(engine=engine, origin=origin, writeable=writeable, namespace=namespace)
226 def isWriteable(self) -> bool:
227 return self._writeable
229 def __str__(self) -> str:
230 if self.filename:
231 return f"SQLite3@{self.filename}"
232 else:
233 return "SQLite3@:memory:"
235 def _lockTables(
236 self, connection: sqlalchemy.engine.Connection, tables: Iterable[sqlalchemy.schema.Table] = ()
237 ) -> None:
238 # Docstring inherited.
239 # Our SQLite database always acquires full-database locks at the
240 # beginning of a transaction, so there's no need to acquire table-level
241 # locks - which is good, because SQLite doesn't have table-level
242 # locking.
243 pass
245 # MyPy claims that the return type here isn't covariant with the return
246 # type of the base class method, which is formally correct but irrelevant
247 # - the base class return type is _GeneratorContextManager, but only
248 # because it's generated by the contextmanager decorator.
249 def declareStaticTables( # type: ignore
250 self, *, create: bool
251 ) -> AbstractContextManager[StaticTablesContext]:
252 # If the user asked for an in-memory, writeable database, then we may
253 # need to re-create schema even if create=False because schema can be
254 # lost on re-connect. This is only really relevant for tests, and it's
255 # convenient there.
256 if self.filename is None and self.isWriteable():
257 inspector = sqlalchemy.inspect(self._engine)
258 tables = inspector.get_table_names(schema=self.namespace)
259 if not tables:
260 create = True
261 return super().declareStaticTables(create=create)
263 def _convertFieldSpec(
264 self, table: str, spec: ddl.FieldSpec, metadata: sqlalchemy.MetaData, **kwargs: Any
265 ) -> sqlalchemy.schema.Column:
266 if spec.autoincrement:
267 if not spec.primaryKey:
268 raise RuntimeError(
269 f"Autoincrement field {table}.{spec.name} that is not a primary key is not supported."
270 )
271 if spec.dtype != sqlalchemy.Integer:
272 # SQLite's autoincrement is really limited; it only works if
273 # the column type is exactly "INTEGER". But it also doesn't
274 # care about the distinctions between different integer types,
275 # so it's safe to change it.
276 spec = copy.copy(spec)
277 spec.dtype = sqlalchemy.Integer
278 return super()._convertFieldSpec(table, spec, metadata, **kwargs)
280 def _makeColumnConstraints(self, table: str, spec: ddl.FieldSpec) -> list[sqlalchemy.CheckConstraint]:
281 # For sqlite we force constraints on all string columns since sqlite
282 # ignores everything otherwise and this leads to problems with
283 # other databases.
285 constraints = []
286 if spec.isStringType():
287 name = self.shrinkDatabaseEntityName("_".join([table, "len", spec.name]))
288 constraints.append(
289 sqlalchemy.CheckConstraint(
290 f'length("{spec.name}")<={spec.length}'
291 # Oracle converts
292 # empty strings to
293 # NULL so check
294 f' AND length("{spec.name}")>=1',
295 name=name,
296 )
297 )
299 constraints.extend(super()._makeColumnConstraints(table, spec))
300 return constraints
302 def _convertTableSpec(
303 self, name: str, spec: ddl.TableSpec, metadata: sqlalchemy.MetaData, **kwargs: Any
304 ) -> sqlalchemy.schema.Table:
305 primaryKeyFieldNames = {field.name for field in spec.fields if field.primaryKey}
306 autoincrFieldNames = {field.name for field in spec.fields if field.autoincrement}
307 if len(autoincrFieldNames) > 1:
308 raise RuntimeError("At most one autoincrement field per table is allowed.")
309 if len(primaryKeyFieldNames) > 1 and len(autoincrFieldNames) > 0:
310 # SQLite's default rowid-based autoincrement doesn't work if the
311 # field is just one field in a compound primary key. As a
312 # workaround, we create an extra table with just one column that
313 # we'll insert into to generate those IDs. That's only safe if
314 # that single-column table's records are already unique with just
315 # the autoincrement field, not the rest of the primary key. In
316 # practice, that means the single-column table's records are those
317 # for which origin == self.origin.
318 (autoincrFieldName,) = autoincrFieldNames
319 otherPrimaryKeyFieldNames = primaryKeyFieldNames - autoincrFieldNames
320 if otherPrimaryKeyFieldNames != {"origin"}:
321 # We need the only other field in the key to be 'origin'.
322 raise NotImplementedError(
323 "Compound primary keys with an autoincrement are only supported in SQLite "
324 "if the only non-autoincrement primary key field is 'origin'."
325 )
326 if not spec.recycleIds:
327 kwargs = dict(kwargs, sqlite_autoincrement=True)
328 return super()._convertTableSpec(name, spec, metadata, **kwargs)
330 def replace(self, table: sqlalchemy.schema.Table, *rows: dict) -> None:
331 self.assertTableWriteable(table, f"Cannot replace into read-only table {table}.")
332 if not rows:
333 return
334 query = sqlalchemy.dialects.sqlite.insert(table)
335 excluded = query.excluded
336 data = {
337 column.name: getattr(excluded, column.name)
338 for column in table.columns
339 if column.name not in table.primary_key
340 }
341 query = query.on_conflict_do_update(index_elements=table.primary_key, set_=data)
342 with self._transaction() as (_, connection):
343 connection.execute(query, rows)
345 def ensure(self, table: sqlalchemy.schema.Table, *rows: dict, primary_key_only: bool = False) -> int:
346 self.assertTableWriteable(table, f"Cannot ensure into read-only table {table}.")
347 if not rows:
348 return 0
349 query = sqlalchemy.dialects.sqlite.insert(table)
350 if primary_key_only:
351 query = query.on_conflict_do_nothing(index_elements=table.primary_key)
352 else:
353 query = query.on_conflict_do_nothing()
354 with self._transaction() as (_, connection):
355 return connection.execute(query, rows).rowcount
357 def constant_rows(
358 self,
359 fields: NamedValueAbstractSet[ddl.FieldSpec],
360 *rows: dict,
361 name: str | None = None,
362 ) -> sqlalchemy.sql.FromClause:
363 # Docstring inherited.
364 # While SQLite supports VALUES, it doesn't support assigning a name
365 # to that construct or the names of its columns, and hence there's no
366 # way to actually join it into a SELECT query. It seems the only
367 # alternative is something like:
368 #
369 # SELECT ? AS a, ? AS b
370 # UNION ALL
371 # SELECT ? AS a, ? AS b
372 #
373 selects = [
374 sqlalchemy.sql.select(
375 *[sqlalchemy.sql.literal(row[field.name], field.dtype).label(field.name) for field in fields]
376 )
377 for row in rows
378 ]
379 return sqlalchemy.sql.union_all(*selects).alias(name)
381 filename: str | None
382 """Name of the file this database is connected to (`str` or `None`).
384 Set to `None` for in-memory databases.
385 """