Coverage for python/lsst/daf/butler/registry/databases/sqlite.py: 15%
143 statements
« prev ^ index » next coverage.py v7.5.0, created at 2024-04-24 23:50 -0700
« prev ^ index » next coverage.py v7.5.0, created at 2024-04-24 23:50 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ["SqliteDatabase"]
25import copy
26import os
27import sqlite3
28import urllib.parse
29from contextlib import closing
30from typing import Any, ContextManager, Iterable, List, Optional
32import sqlalchemy
33import sqlalchemy.dialects.sqlite
34import sqlalchemy.ext.compiler
36from ...core import ddl
37from ..interfaces import Database, StaticTablesContext
40def _onSqlite3Connect(
41 dbapiConnection: sqlite3.Connection, connectionRecord: sqlalchemy.pool._ConnectionRecord
42) -> None:
43 assert isinstance(dbapiConnection, sqlite3.Connection)
44 # Prevent pysqlite from emitting BEGIN and COMMIT statements.
45 dbapiConnection.isolation_level = None
46 # Enable foreign keys
47 with closing(dbapiConnection.cursor()) as cursor:
48 cursor.execute("PRAGMA foreign_keys=ON;")
49 cursor.execute("PRAGMA busy_timeout = 300000;") # in ms, so 5min (way longer than should be needed)
52class SqliteDatabase(Database):
53 """An implementation of the `Database` interface for SQLite3.
55 Parameters
56 ----------
57 connection : `sqlalchemy.engine.Connection`
58 An existing connection created by a previous call to `connect`.
59 origin : `int`
60 An integer ID that should be used as the default for any datasets,
61 quanta, or other entities that use a (autoincrement, origin) compound
62 primary key.
63 namespace : `str`, optional
64 The namespace (schema) this database is associated with. If `None`,
65 the default schema for the connection is used (which may be `None`).
66 writeable : `bool`, optional
67 If `True`, allow write operations on the database, including
68 ``CREATE TABLE``.
70 Notes
71 -----
72 The case where ``namespace is not None`` is not yet tested, and may be
73 broken; we need an API for attaching to different databases in order to
74 write those tests, but haven't yet worked out what is common/different
75 across databases well enough to define it.
76 """
78 def __init__(
79 self,
80 *,
81 engine: sqlalchemy.engine.Engine,
82 origin: int,
83 namespace: Optional[str] = None,
84 writeable: bool = True,
85 ):
86 super().__init__(origin=origin, engine=engine, namespace=namespace)
87 # Get the filename from a call to 'PRAGMA database_list'.
88 with engine.connect() as connection:
89 with closing(connection.connection.cursor()) as cursor:
90 dbList = list(cursor.execute("PRAGMA database_list").fetchall())
91 if len(dbList) == 0:
92 raise RuntimeError("No database in connection.")
93 if namespace is None:
94 namespace = "main"
95 for _, dbname, filename in dbList:
96 if dbname == namespace:
97 break
98 else:
99 raise RuntimeError(f"No '{namespace}' database in connection.")
100 if not filename:
101 self.filename = None
102 else:
103 self.filename = filename
104 self._writeable = writeable
106 @classmethod
107 def makeDefaultUri(cls, root: str) -> Optional[str]:
108 return "sqlite:///" + os.path.join(root, "gen3.sqlite3")
110 @classmethod
111 def makeEngine(
112 cls, uri: Optional[str] = None, *, filename: Optional[str] = None, writeable: bool = True
113 ) -> sqlalchemy.engine.Engine:
114 """Create a `sqlalchemy.engine.Engine` from a SQLAlchemy URI or
115 filename.
117 Parameters
118 ----------
119 uri : `str`
120 A SQLAlchemy URI connection string.
121 filename : `str`
122 Name of the SQLite database file, or `None` to use an in-memory
123 database. Ignored if ``uri is not None``.
124 writeable : `bool`, optional
125 If `True`, allow write operations on the database, including
126 ``CREATE TABLE``.
128 Returns
129 -------
130 engine : `sqlalchemy.engine.Engine`
131 A database engine.
132 """
133 # In order to be able to tell SQLite that we want a read-only or
134 # read-write connection, we need to make the SQLite DBAPI connection
135 # with a "URI"-based connection string. SQLAlchemy claims it can do
136 # this
137 # (https://docs.sqlalchemy.org/en/13/dialects/sqlite.html#uri-connections),
138 # but it doesn't seem to work as advertised. To work around this, we
139 # use the 'creator' argument to sqlalchemy.engine.create_engine, which
140 # lets us pass a callable that creates the DBAPI connection.
141 if uri is None:
142 if filename is None:
143 target = ":memory:"
144 uri = "sqlite://"
145 else:
146 target = f"file:{filename}"
147 uri = f"sqlite:///{filename}"
148 else:
149 parsed = urllib.parse.urlparse(uri)
150 queries = parsed.query.split("&")
151 if "uri=true" in queries:
152 # This is a SQLAlchemy URI that is already trying to make a
153 # SQLite connection via a SQLite URI, and hence there may
154 # be URI components for both SQLite and SQLAlchemy. We
155 # don't need to support that, and it'd be a
156 # reimplementation of all of the (broken) logic in
157 # SQLAlchemy for doing this, so we just don't.
158 raise NotImplementedError("SQLite connection strings with 'uri=true' are not supported.")
159 # This is just a SQLAlchemy URI with a non-URI SQLite
160 # connection string inside it. Pull that out so we can use it
161 # in the creator call.
162 if parsed.path.startswith("/"):
163 filename = parsed.path[1:]
164 target = f"file:{filename}"
165 else:
166 filename = None
167 target = ":memory:"
168 if filename is None:
169 if not writeable:
170 raise NotImplementedError("Read-only :memory: databases are not supported.")
171 else:
172 if writeable:
173 target += "?mode=rwc&uri=true"
174 else:
175 target += "?mode=ro&uri=true"
177 def creator() -> sqlite3.Connection:
178 return sqlite3.connect(target, check_same_thread=False, uri=True)
180 engine = sqlalchemy.engine.create_engine(uri, creator=creator)
182 sqlalchemy.event.listen(engine, "connect", _onSqlite3Connect)
184 def _onSqlite3Begin(connection: sqlalchemy.engine.Connection) -> sqlalchemy.engine.Connection:
185 assert connection.dialect.name == "sqlite"
186 # Replace pysqlite's buggy transaction handling that never BEGINs
187 # with our own that does, and tell SQLite to try to acquire a lock
188 # as soon as we start a transaction that might involve writes (this
189 # should lead to more blocking and fewer deadlocks).
190 if writeable:
191 connection.execute(sqlalchemy.text("BEGIN IMMEDIATE"))
192 else:
193 connection.execute(sqlalchemy.text("BEGIN"))
194 return connection
196 sqlalchemy.event.listen(engine, "begin", _onSqlite3Begin)
198 return engine
200 @classmethod
201 def fromEngine(
202 cls,
203 engine: sqlalchemy.engine.Engine,
204 *,
205 origin: int,
206 namespace: Optional[str] = None,
207 writeable: bool = True,
208 ) -> Database:
209 return cls(engine=engine, origin=origin, writeable=writeable, namespace=namespace)
211 def isWriteable(self) -> bool:
212 return self._writeable
214 def __str__(self) -> str:
215 if self.filename:
216 return f"SQLite3@{self.filename}"
217 else:
218 return "SQLite3@:memory:"
220 def _lockTables(
221 self, connection: sqlalchemy.engine.Connection, tables: Iterable[sqlalchemy.schema.Table] = ()
222 ) -> None:
223 # Docstring inherited.
224 # Our SQLite database always acquires full-database locks at the
225 # beginning of a transaction, so there's no need to acquire table-level
226 # locks - which is good, because SQLite doesn't have table-level
227 # locking.
228 pass
230 # MyPy claims that the return type here isn't covariant with the return
231 # type of the base class method, which is formally correct but irrelevant
232 # - the base class return type is _GeneratorContextManager, but only
233 # because it's generated by the contextmanager decorator.
234 def declareStaticTables(self, *, create: bool) -> ContextManager[StaticTablesContext]: # type: ignore
235 # If the user asked for an in-memory, writeable database, then we may
236 # need to re-create schema even if create=False because schema can be
237 # lost on re-connect. This is only really relevant for tests, and it's
238 # convenient there.
239 if self.filename is None and self.isWriteable():
240 inspector = sqlalchemy.inspect(self._engine)
241 tables = inspector.get_table_names(schema=self.namespace)
242 if not tables:
243 create = True
244 return super().declareStaticTables(create=create)
246 def _convertFieldSpec(
247 self, table: str, spec: ddl.FieldSpec, metadata: sqlalchemy.MetaData, **kwargs: Any
248 ) -> sqlalchemy.schema.Column:
249 if spec.autoincrement:
250 if not spec.primaryKey:
251 raise RuntimeError(
252 f"Autoincrement field {table}.{spec.name} that is not a primary key is not supported."
253 )
254 if spec.dtype != sqlalchemy.Integer:
255 # SQLite's autoincrement is really limited; it only works if
256 # the column type is exactly "INTEGER". But it also doesn't
257 # care about the distinctions between different integer types,
258 # so it's safe to change it.
259 spec = copy.copy(spec)
260 spec.dtype = sqlalchemy.Integer
261 return super()._convertFieldSpec(table, spec, metadata, **kwargs)
263 def _makeColumnConstraints(self, table: str, spec: ddl.FieldSpec) -> List[sqlalchemy.CheckConstraint]:
264 # For sqlite we force constraints on all string columns since sqlite
265 # ignores everything otherwise and this leads to problems with
266 # other databases.
268 constraints = []
269 if spec.isStringType():
270 name = self.shrinkDatabaseEntityName("_".join([table, "len", spec.name]))
271 constraints.append(
272 sqlalchemy.CheckConstraint(
273 f"length({spec.name})<={spec.length}"
274 # Oracle converts
275 # empty strings to
276 # NULL so check
277 f" AND length({spec.name})>=1",
278 name=name,
279 )
280 )
282 constraints.extend(super()._makeColumnConstraints(table, spec))
283 return constraints
285 def _convertTableSpec(
286 self, name: str, spec: ddl.TableSpec, metadata: sqlalchemy.MetaData, **kwargs: Any
287 ) -> sqlalchemy.schema.Table:
288 primaryKeyFieldNames = set(field.name for field in spec.fields if field.primaryKey)
289 autoincrFieldNames = set(field.name for field in spec.fields if field.autoincrement)
290 if len(autoincrFieldNames) > 1:
291 raise RuntimeError("At most one autoincrement field per table is allowed.")
292 if len(primaryKeyFieldNames) > 1 and len(autoincrFieldNames) > 0:
293 # SQLite's default rowid-based autoincrement doesn't work if the
294 # field is just one field in a compound primary key. As a
295 # workaround, we create an extra table with just one column that
296 # we'll insert into to generate those IDs. That's only safe if
297 # that single-column table's records are already unique with just
298 # the autoincrement field, not the rest of the primary key. In
299 # practice, that means the single-column table's records are those
300 # for which origin == self.origin.
301 (autoincrFieldName,) = autoincrFieldNames
302 otherPrimaryKeyFieldNames = primaryKeyFieldNames - autoincrFieldNames
303 if otherPrimaryKeyFieldNames != {"origin"}:
304 # We need the only other field in the key to be 'origin'.
305 raise NotImplementedError(
306 "Compound primary keys with an autoincrement are only supported in SQLite "
307 "if the only non-autoincrement primary key field is 'origin'."
308 )
309 if not spec.recycleIds:
310 kwargs = dict(kwargs, sqlite_autoincrement=True)
311 return super()._convertTableSpec(name, spec, metadata, **kwargs)
313 def replace(self, table: sqlalchemy.schema.Table, *rows: dict) -> None:
314 self.assertTableWriteable(table, f"Cannot replace into read-only table {table}.")
315 if not rows:
316 return
317 query = sqlalchemy.dialects.sqlite.insert(table)
318 excluded = query.excluded
319 data = {
320 column.name: getattr(excluded, column.name)
321 for column in table.columns
322 if column.name not in table.primary_key
323 }
324 query = query.on_conflict_do_update(index_elements=table.primary_key, set_=data)
325 with self._transaction() as (_, connection):
326 connection.execute(query, rows)
328 def ensure(self, table: sqlalchemy.schema.Table, *rows: dict, primary_key_only: bool = False) -> int:
329 self.assertTableWriteable(table, f"Cannot ensure into read-only table {table}.")
330 if not rows:
331 return 0
332 query = sqlalchemy.dialects.sqlite.insert(table)
333 if primary_key_only:
334 query = query.on_conflict_do_nothing(index_elements=table.primary_key)
335 else:
336 query = query.on_conflict_do_nothing()
337 with self._transaction() as (_, connection):
338 return connection.execute(query, rows).rowcount
340 filename: Optional[str]
341 """Name of the file this database is connected to (`str` or `None`).
343 Set to `None` for in-memory databases.
344 """