Coverage for python/lsst/daf/butler/registry/databases/sqlite.py: 19%
147 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-21 09:55 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-21 09:55 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ["SqliteDatabase"]
25import copy
26import os
27import sqlite3
28import urllib.parse
29from collections.abc import Iterable
30from contextlib import closing
31from typing import Any, ContextManager
33import sqlalchemy
34import sqlalchemy.dialects.sqlite
35import sqlalchemy.ext.compiler
37from ...core import ddl
38from ...core.named import NamedValueAbstractSet
39from ..interfaces import Database, StaticTablesContext
42def _onSqlite3Connect(
43 dbapiConnection: sqlite3.Connection, connectionRecord: sqlalchemy.pool._ConnectionRecord
44) -> None:
45 assert isinstance(dbapiConnection, sqlite3.Connection)
46 # Prevent pysqlite from emitting BEGIN and COMMIT statements.
47 dbapiConnection.isolation_level = None
48 # Enable foreign keys
49 with closing(dbapiConnection.cursor()) as cursor:
50 cursor.execute("PRAGMA foreign_keys=ON;")
51 cursor.execute("PRAGMA busy_timeout = 300000;") # in ms, so 5min (way longer than should be needed)
54class SqliteDatabase(Database):
55 """An implementation of the `Database` interface for SQLite3.
57 Parameters
58 ----------
59 connection : `sqlalchemy.engine.Connection`
60 An existing connection created by a previous call to `connect`.
61 origin : `int`
62 An integer ID that should be used as the default for any datasets,
63 quanta, or other entities that use a (autoincrement, origin) compound
64 primary key.
65 namespace : `str`, optional
66 The namespace (schema) this database is associated with. If `None`,
67 the default schema for the connection is used (which may be `None`).
68 writeable : `bool`, optional
69 If `True`, allow write operations on the database, including
70 ``CREATE TABLE``.
72 Notes
73 -----
74 The case where ``namespace is not None`` is not yet tested, and may be
75 broken; we need an API for attaching to different databases in order to
76 write those tests, but haven't yet worked out what is common/different
77 across databases well enough to define it.
78 """
80 def __init__(
81 self,
82 *,
83 engine: sqlalchemy.engine.Engine,
84 origin: int,
85 namespace: str | None = None,
86 writeable: bool = True,
87 ):
88 super().__init__(origin=origin, engine=engine, namespace=namespace)
89 # Get the filename from a call to 'PRAGMA database_list'.
90 with engine.connect() as connection:
91 with closing(connection.connection.cursor()) as cursor:
92 dbList = list(cursor.execute("PRAGMA database_list").fetchall())
93 if len(dbList) == 0:
94 raise RuntimeError("No database in connection.")
95 if namespace is None:
96 namespace = "main"
97 for _, dbname, filename in dbList:
98 if dbname == namespace:
99 break
100 else:
101 raise RuntimeError(f"No '{namespace}' database in connection.")
102 if not filename:
103 self.filename = None
104 else:
105 self.filename = filename
106 self._writeable = writeable
108 @classmethod
109 def makeDefaultUri(cls, root: str) -> str | None:
110 return "sqlite:///" + os.path.join(root, "gen3.sqlite3")
112 @classmethod
113 def makeEngine(
114 cls,
115 uri: str | sqlalchemy.engine.URL | None = None,
116 *,
117 filename: str | None = None,
118 writeable: bool = True,
119 ) -> sqlalchemy.engine.Engine:
120 """Create a `sqlalchemy.engine.Engine` from a SQLAlchemy URI or
121 filename.
123 Parameters
124 ----------
125 uri : `str` or `sqlalchemy.engine.URL`, optional
126 A SQLAlchemy URI connection string.
127 filename : `str`
128 Name of the SQLite database file, or `None` to use an in-memory
129 database. Ignored if ``uri is not None``.
130 writeable : `bool`, optional
131 If `True`, allow write operations on the database, including
132 ``CREATE TABLE``.
134 Returns
135 -------
136 engine : `sqlalchemy.engine.Engine`
137 A database engine.
138 """
139 # In order to be able to tell SQLite that we want a read-only or
140 # read-write connection, we need to make the SQLite DBAPI connection
141 # with a "URI"-based connection string. SQLAlchemy claims it can do
142 # this
143 # (https://docs.sqlalchemy.org/en/13/dialects/sqlite.html#uri-connections),
144 # but it doesn't seem to work as advertised. To work around this, we
145 # use the 'creator' argument to sqlalchemy.engine.create_engine, which
146 # lets us pass a callable that creates the DBAPI connection.
147 if uri is None:
148 if filename is None:
149 target = ":memory:"
150 uri = "sqlite://"
151 else:
152 target = f"file:{filename}"
153 uri = f"sqlite:///{filename}"
154 else:
155 if isinstance(uri, sqlalchemy.engine.URL):
156 # We have to parse strings anyway, so convert it to string.
157 uri = uri.render_as_string(hide_password=False)
158 parsed = urllib.parse.urlparse(uri)
159 queries = parsed.query.split("&")
160 if "uri=true" in queries:
161 # This is a SQLAlchemy URI that is already trying to make a
162 # SQLite connection via a SQLite URI, and hence there may
163 # be URI components for both SQLite and SQLAlchemy. We
164 # don't need to support that, and it'd be a
165 # reimplementation of all of the (broken) logic in
166 # SQLAlchemy for doing this, so we just don't.
167 raise NotImplementedError("SQLite connection strings with 'uri=true' are not supported.")
168 # This is just a SQLAlchemy URI with a non-URI SQLite
169 # connection string inside it. Pull that out so we can use it
170 # in the creator call.
171 if parsed.path.startswith("/"):
172 filename = parsed.path[1:]
173 target = f"file:{filename}"
174 else:
175 filename = None
176 target = ":memory:"
177 if filename is None:
178 if not writeable:
179 raise NotImplementedError("Read-only :memory: databases are not supported.")
180 else:
181 if writeable:
182 target += "?mode=rwc&uri=true"
183 else:
184 target += "?mode=ro&uri=true"
186 def creator() -> sqlite3.Connection:
187 return sqlite3.connect(target, check_same_thread=False, uri=True)
189 engine = sqlalchemy.engine.create_engine(uri, creator=creator)
191 sqlalchemy.event.listen(engine, "connect", _onSqlite3Connect)
193 def _onSqlite3Begin(connection: sqlalchemy.engine.Connection) -> sqlalchemy.engine.Connection:
194 assert connection.dialect.name == "sqlite"
195 # Replace pysqlite's buggy transaction handling that never BEGINs
196 # with our own that does, and tell SQLite to try to acquire a lock
197 # as soon as we start a transaction that might involve writes (this
198 # should lead to more blocking and fewer deadlocks).
199 if writeable:
200 connection.execute(sqlalchemy.text("BEGIN IMMEDIATE"))
201 else:
202 connection.execute(sqlalchemy.text("BEGIN"))
203 return connection
205 sqlalchemy.event.listen(engine, "begin", _onSqlite3Begin)
207 return engine
209 @classmethod
210 def fromEngine(
211 cls,
212 engine: sqlalchemy.engine.Engine,
213 *,
214 origin: int,
215 namespace: str | None = None,
216 writeable: bool = True,
217 ) -> Database:
218 return cls(engine=engine, origin=origin, writeable=writeable, namespace=namespace)
220 def isWriteable(self) -> bool:
221 return self._writeable
223 def __str__(self) -> str:
224 if self.filename:
225 return f"SQLite3@{self.filename}"
226 else:
227 return "SQLite3@:memory:"
229 def _lockTables(
230 self, connection: sqlalchemy.engine.Connection, tables: Iterable[sqlalchemy.schema.Table] = ()
231 ) -> None:
232 # Docstring inherited.
233 # Our SQLite database always acquires full-database locks at the
234 # beginning of a transaction, so there's no need to acquire table-level
235 # locks - which is good, because SQLite doesn't have table-level
236 # locking.
237 pass
239 # MyPy claims that the return type here isn't covariant with the return
240 # type of the base class method, which is formally correct but irrelevant
241 # - the base class return type is _GeneratorContextManager, but only
242 # because it's generated by the contextmanager decorator.
243 def declareStaticTables(self, *, create: bool) -> ContextManager[StaticTablesContext]: # type: ignore
244 # If the user asked for an in-memory, writeable database, then we may
245 # need to re-create schema even if create=False because schema can be
246 # lost on re-connect. This is only really relevant for tests, and it's
247 # convenient there.
248 if self.filename is None and self.isWriteable():
249 inspector = sqlalchemy.inspect(self._engine)
250 tables = inspector.get_table_names(schema=self.namespace)
251 if not tables:
252 create = True
253 return super().declareStaticTables(create=create)
255 def _convertFieldSpec(
256 self, table: str, spec: ddl.FieldSpec, metadata: sqlalchemy.MetaData, **kwargs: Any
257 ) -> sqlalchemy.schema.Column:
258 if spec.autoincrement:
259 if not spec.primaryKey:
260 raise RuntimeError(
261 f"Autoincrement field {table}.{spec.name} that is not a primary key is not supported."
262 )
263 if spec.dtype != sqlalchemy.Integer:
264 # SQLite's autoincrement is really limited; it only works if
265 # the column type is exactly "INTEGER". But it also doesn't
266 # care about the distinctions between different integer types,
267 # so it's safe to change it.
268 spec = copy.copy(spec)
269 spec.dtype = sqlalchemy.Integer
270 return super()._convertFieldSpec(table, spec, metadata, **kwargs)
272 def _makeColumnConstraints(self, table: str, spec: ddl.FieldSpec) -> list[sqlalchemy.CheckConstraint]:
273 # For sqlite we force constraints on all string columns since sqlite
274 # ignores everything otherwise and this leads to problems with
275 # other databases.
277 constraints = []
278 if spec.isStringType():
279 name = self.shrinkDatabaseEntityName("_".join([table, "len", spec.name]))
280 constraints.append(
281 sqlalchemy.CheckConstraint(
282 f'length("{spec.name}")<={spec.length}'
283 # Oracle converts
284 # empty strings to
285 # NULL so check
286 f' AND length("{spec.name}")>=1',
287 name=name,
288 )
289 )
291 constraints.extend(super()._makeColumnConstraints(table, spec))
292 return constraints
294 def _convertTableSpec(
295 self, name: str, spec: ddl.TableSpec, metadata: sqlalchemy.MetaData, **kwargs: Any
296 ) -> sqlalchemy.schema.Table:
297 primaryKeyFieldNames = {field.name for field in spec.fields if field.primaryKey}
298 autoincrFieldNames = {field.name for field in spec.fields if field.autoincrement}
299 if len(autoincrFieldNames) > 1:
300 raise RuntimeError("At most one autoincrement field per table is allowed.")
301 if len(primaryKeyFieldNames) > 1 and len(autoincrFieldNames) > 0:
302 # SQLite's default rowid-based autoincrement doesn't work if the
303 # field is just one field in a compound primary key. As a
304 # workaround, we create an extra table with just one column that
305 # we'll insert into to generate those IDs. That's only safe if
306 # that single-column table's records are already unique with just
307 # the autoincrement field, not the rest of the primary key. In
308 # practice, that means the single-column table's records are those
309 # for which origin == self.origin.
310 (autoincrFieldName,) = autoincrFieldNames
311 otherPrimaryKeyFieldNames = primaryKeyFieldNames - autoincrFieldNames
312 if otherPrimaryKeyFieldNames != {"origin"}:
313 # We need the only other field in the key to be 'origin'.
314 raise NotImplementedError(
315 "Compound primary keys with an autoincrement are only supported in SQLite "
316 "if the only non-autoincrement primary key field is 'origin'."
317 )
318 if not spec.recycleIds:
319 kwargs = dict(kwargs, sqlite_autoincrement=True)
320 return super()._convertTableSpec(name, spec, metadata, **kwargs)
322 def replace(self, table: sqlalchemy.schema.Table, *rows: dict) -> None:
323 self.assertTableWriteable(table, f"Cannot replace into read-only table {table}.")
324 if not rows:
325 return
326 query = sqlalchemy.dialects.sqlite.insert(table)
327 excluded = query.excluded
328 data = {
329 column.name: getattr(excluded, column.name)
330 for column in table.columns
331 if column.name not in table.primary_key
332 }
333 query = query.on_conflict_do_update(index_elements=table.primary_key, set_=data)
334 with self._transaction() as (_, connection):
335 connection.execute(query, rows)
337 def ensure(self, table: sqlalchemy.schema.Table, *rows: dict, primary_key_only: bool = False) -> int:
338 self.assertTableWriteable(table, f"Cannot ensure into read-only table {table}.")
339 if not rows:
340 return 0
341 query = sqlalchemy.dialects.sqlite.insert(table)
342 if primary_key_only:
343 query = query.on_conflict_do_nothing(index_elements=table.primary_key)
344 else:
345 query = query.on_conflict_do_nothing()
346 with self._transaction() as (_, connection):
347 return connection.execute(query, rows).rowcount
349 def constant_rows(
350 self,
351 fields: NamedValueAbstractSet[ddl.FieldSpec],
352 *rows: dict,
353 name: str | None = None,
354 ) -> sqlalchemy.sql.FromClause:
355 # Docstring inherited.
356 # While SQLite supports VALUES, it doesn't support assigning a name
357 # to that construct or the names of its columns, and hence there's no
358 # way to actually join it into a SELECT query. It seems the only
359 # alternative is something like:
360 #
361 # SELECT ? AS a, ? AS b
362 # UNION ALL
363 # SELECT ? AS a, ? AS b
364 #
365 selects = [
366 sqlalchemy.sql.select(
367 *[sqlalchemy.sql.literal(row[field.name], field.dtype).label(field.name) for field in fields]
368 )
369 for row in rows
370 ]
371 return sqlalchemy.sql.union_all(*selects).alias(name)
373 filename: str | None
374 """Name of the file this database is connected to (`str` or `None`).
376 Set to `None` for in-memory databases.
377 """