Coverage for python/lsst/daf/butler/registry/databases/sqlite.py: 16%
147 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-01-07 02:05 -0800
« prev ^ index » next coverage.py v6.5.0, created at 2023-01-07 02:05 -0800
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ["SqliteDatabase"]
25import copy
26import os
27import sqlite3
28import urllib.parse
29from contextlib import closing
30from typing import Any, ContextManager, Iterable, List, Optional
32import sqlalchemy
33import sqlalchemy.dialects.sqlite
34import sqlalchemy.ext.compiler
36from ...core import ddl
37from ...core.named import NamedValueAbstractSet
38from ..interfaces import Database, StaticTablesContext
41def _onSqlite3Connect(
42 dbapiConnection: sqlite3.Connection, connectionRecord: sqlalchemy.pool._ConnectionRecord
43) -> None:
44 assert isinstance(dbapiConnection, sqlite3.Connection)
45 # Prevent pysqlite from emitting BEGIN and COMMIT statements.
46 dbapiConnection.isolation_level = None
47 # Enable foreign keys
48 with closing(dbapiConnection.cursor()) as cursor:
49 cursor.execute("PRAGMA foreign_keys=ON;")
50 cursor.execute("PRAGMA busy_timeout = 300000;") # in ms, so 5min (way longer than should be needed)
53class SqliteDatabase(Database):
54 """An implementation of the `Database` interface for SQLite3.
56 Parameters
57 ----------
58 connection : `sqlalchemy.engine.Connection`
59 An existing connection created by a previous call to `connect`.
60 origin : `int`
61 An integer ID that should be used as the default for any datasets,
62 quanta, or other entities that use a (autoincrement, origin) compound
63 primary key.
64 namespace : `str`, optional
65 The namespace (schema) this database is associated with. If `None`,
66 the default schema for the connection is used (which may be `None`).
67 writeable : `bool`, optional
68 If `True`, allow write operations on the database, including
69 ``CREATE TABLE``.
71 Notes
72 -----
73 The case where ``namespace is not None`` is not yet tested, and may be
74 broken; we need an API for attaching to different databases in order to
75 write those tests, but haven't yet worked out what is common/different
76 across databases well enough to define it.
77 """
79 def __init__(
80 self,
81 *,
82 engine: sqlalchemy.engine.Engine,
83 origin: int,
84 namespace: Optional[str] = None,
85 writeable: bool = True,
86 ):
87 super().__init__(origin=origin, engine=engine, namespace=namespace)
88 # Get the filename from a call to 'PRAGMA database_list'.
89 with engine.connect() as connection:
90 with closing(connection.connection.cursor()) as cursor:
91 dbList = list(cursor.execute("PRAGMA database_list").fetchall())
92 if len(dbList) == 0:
93 raise RuntimeError("No database in connection.")
94 if namespace is None:
95 namespace = "main"
96 for _, dbname, filename in dbList:
97 if dbname == namespace:
98 break
99 else:
100 raise RuntimeError(f"No '{namespace}' database in connection.")
101 if not filename:
102 self.filename = None
103 else:
104 self.filename = filename
105 self._writeable = writeable
107 @classmethod
108 def makeDefaultUri(cls, root: str) -> Optional[str]:
109 return "sqlite:///" + os.path.join(root, "gen3.sqlite3")
111 @classmethod
112 def makeEngine(
113 cls, uri: Optional[str] = None, *, filename: Optional[str] = None, writeable: bool = True
114 ) -> sqlalchemy.engine.Engine:
115 """Create a `sqlalchemy.engine.Engine` from a SQLAlchemy URI or
116 filename.
118 Parameters
119 ----------
120 uri : `str`
121 A SQLAlchemy URI connection string.
122 filename : `str`
123 Name of the SQLite database file, or `None` to use an in-memory
124 database. Ignored if ``uri is not None``.
125 writeable : `bool`, optional
126 If `True`, allow write operations on the database, including
127 ``CREATE TABLE``.
129 Returns
130 -------
131 engine : `sqlalchemy.engine.Engine`
132 A database engine.
133 """
134 # In order to be able to tell SQLite that we want a read-only or
135 # read-write connection, we need to make the SQLite DBAPI connection
136 # with a "URI"-based connection string. SQLAlchemy claims it can do
137 # this
138 # (https://docs.sqlalchemy.org/en/13/dialects/sqlite.html#uri-connections),
139 # but it doesn't seem to work as advertised. To work around this, we
140 # use the 'creator' argument to sqlalchemy.engine.create_engine, which
141 # lets us pass a callable that creates the DBAPI connection.
142 if uri is None:
143 if filename is None:
144 target = ":memory:"
145 uri = "sqlite://"
146 else:
147 target = f"file:{filename}"
148 uri = f"sqlite:///{filename}"
149 else:
150 parsed = urllib.parse.urlparse(uri)
151 queries = parsed.query.split("&")
152 if "uri=true" in queries:
153 # This is a SQLAlchemy URI that is already trying to make a
154 # SQLite connection via a SQLite URI, and hence there may
155 # be URI components for both SQLite and SQLAlchemy. We
156 # don't need to support that, and it'd be a
157 # reimplementation of all of the (broken) logic in
158 # SQLAlchemy for doing this, so we just don't.
159 raise NotImplementedError("SQLite connection strings with 'uri=true' are not supported.")
160 # This is just a SQLAlchemy URI with a non-URI SQLite
161 # connection string inside it. Pull that out so we can use it
162 # in the creator call.
163 if parsed.path.startswith("/"):
164 filename = parsed.path[1:]
165 target = f"file:{filename}"
166 else:
167 filename = None
168 target = ":memory:"
169 if filename is None:
170 if not writeable:
171 raise NotImplementedError("Read-only :memory: databases are not supported.")
172 else:
173 if writeable:
174 target += "?mode=rwc&uri=true"
175 else:
176 target += "?mode=ro&uri=true"
178 def creator() -> sqlite3.Connection:
179 return sqlite3.connect(target, check_same_thread=False, uri=True)
181 engine = sqlalchemy.engine.create_engine(uri, creator=creator)
183 sqlalchemy.event.listen(engine, "connect", _onSqlite3Connect)
185 def _onSqlite3Begin(connection: sqlalchemy.engine.Connection) -> sqlalchemy.engine.Connection:
186 assert connection.dialect.name == "sqlite"
187 # Replace pysqlite's buggy transaction handling that never BEGINs
188 # with our own that does, and tell SQLite to try to acquire a lock
189 # as soon as we start a transaction that might involve writes (this
190 # should lead to more blocking and fewer deadlocks).
191 if writeable:
192 connection.execute(sqlalchemy.text("BEGIN IMMEDIATE"))
193 else:
194 connection.execute(sqlalchemy.text("BEGIN"))
195 return connection
197 sqlalchemy.event.listen(engine, "begin", _onSqlite3Begin)
199 return engine
201 @classmethod
202 def fromEngine(
203 cls,
204 engine: sqlalchemy.engine.Engine,
205 *,
206 origin: int,
207 namespace: Optional[str] = None,
208 writeable: bool = True,
209 ) -> Database:
210 return cls(engine=engine, origin=origin, writeable=writeable, namespace=namespace)
212 def isWriteable(self) -> bool:
213 return self._writeable
215 def __str__(self) -> str:
216 if self.filename:
217 return f"SQLite3@{self.filename}"
218 else:
219 return "SQLite3@:memory:"
221 def _lockTables(
222 self, connection: sqlalchemy.engine.Connection, tables: Iterable[sqlalchemy.schema.Table] = ()
223 ) -> None:
224 # Docstring inherited.
225 # Our SQLite database always acquires full-database locks at the
226 # beginning of a transaction, so there's no need to acquire table-level
227 # locks - which is good, because SQLite doesn't have table-level
228 # locking.
229 pass
231 # MyPy claims that the return type here isn't covariant with the return
232 # type of the base class method, which is formally correct but irrelevant
233 # - the base class return type is _GeneratorContextManager, but only
234 # because it's generated by the contextmanager decorator.
235 def declareStaticTables(self, *, create: bool) -> ContextManager[StaticTablesContext]: # type: ignore
236 # If the user asked for an in-memory, writeable database, then we may
237 # need to re-create schema even if create=False because schema can be
238 # lost on re-connect. This is only really relevant for tests, and it's
239 # convenient there.
240 if self.filename is None and self.isWriteable():
241 inspector = sqlalchemy.inspect(self._engine)
242 tables = inspector.get_table_names(schema=self.namespace)
243 if not tables:
244 create = True
245 return super().declareStaticTables(create=create)
247 def _convertFieldSpec(
248 self, table: str, spec: ddl.FieldSpec, metadata: sqlalchemy.MetaData, **kwargs: Any
249 ) -> sqlalchemy.schema.Column:
250 if spec.autoincrement:
251 if not spec.primaryKey:
252 raise RuntimeError(
253 f"Autoincrement field {table}.{spec.name} that is not a primary key is not supported."
254 )
255 if spec.dtype != sqlalchemy.Integer:
256 # SQLite's autoincrement is really limited; it only works if
257 # the column type is exactly "INTEGER". But it also doesn't
258 # care about the distinctions between different integer types,
259 # so it's safe to change it.
260 spec = copy.copy(spec)
261 spec.dtype = sqlalchemy.Integer
262 return super()._convertFieldSpec(table, spec, metadata, **kwargs)
264 def _makeColumnConstraints(self, table: str, spec: ddl.FieldSpec) -> List[sqlalchemy.CheckConstraint]:
265 # For sqlite we force constraints on all string columns since sqlite
266 # ignores everything otherwise and this leads to problems with
267 # other databases.
269 constraints = []
270 if spec.isStringType():
271 name = self.shrinkDatabaseEntityName("_".join([table, "len", spec.name]))
272 constraints.append(
273 sqlalchemy.CheckConstraint(
274 f'length("{spec.name}")<={spec.length}'
275 # Oracle converts
276 # empty strings to
277 # NULL so check
278 f' AND length("{spec.name}")>=1',
279 name=name,
280 )
281 )
283 constraints.extend(super()._makeColumnConstraints(table, spec))
284 return constraints
286 def _convertTableSpec(
287 self, name: str, spec: ddl.TableSpec, metadata: sqlalchemy.MetaData, **kwargs: Any
288 ) -> sqlalchemy.schema.Table:
289 primaryKeyFieldNames = set(field.name for field in spec.fields if field.primaryKey)
290 autoincrFieldNames = set(field.name for field in spec.fields if field.autoincrement)
291 if len(autoincrFieldNames) > 1:
292 raise RuntimeError("At most one autoincrement field per table is allowed.")
293 if len(primaryKeyFieldNames) > 1 and len(autoincrFieldNames) > 0:
294 # SQLite's default rowid-based autoincrement doesn't work if the
295 # field is just one field in a compound primary key. As a
296 # workaround, we create an extra table with just one column that
297 # we'll insert into to generate those IDs. That's only safe if
298 # that single-column table's records are already unique with just
299 # the autoincrement field, not the rest of the primary key. In
300 # practice, that means the single-column table's records are those
301 # for which origin == self.origin.
302 (autoincrFieldName,) = autoincrFieldNames
303 otherPrimaryKeyFieldNames = primaryKeyFieldNames - autoincrFieldNames
304 if otherPrimaryKeyFieldNames != {"origin"}:
305 # We need the only other field in the key to be 'origin'.
306 raise NotImplementedError(
307 "Compound primary keys with an autoincrement are only supported in SQLite "
308 "if the only non-autoincrement primary key field is 'origin'."
309 )
310 if not spec.recycleIds:
311 kwargs = dict(kwargs, sqlite_autoincrement=True)
312 return super()._convertTableSpec(name, spec, metadata, **kwargs)
314 def replace(self, table: sqlalchemy.schema.Table, *rows: dict) -> None:
315 self.assertTableWriteable(table, f"Cannot replace into read-only table {table}.")
316 if not rows:
317 return
318 query = sqlalchemy.dialects.sqlite.insert(table)
319 excluded = query.excluded
320 data = {
321 column.name: getattr(excluded, column.name)
322 for column in table.columns
323 if column.name not in table.primary_key
324 }
325 query = query.on_conflict_do_update(index_elements=table.primary_key, set_=data)
326 with self._transaction() as (_, connection):
327 connection.execute(query, rows)
329 def ensure(self, table: sqlalchemy.schema.Table, *rows: dict, primary_key_only: bool = False) -> int:
330 self.assertTableWriteable(table, f"Cannot ensure into read-only table {table}.")
331 if not rows:
332 return 0
333 query = sqlalchemy.dialects.sqlite.insert(table)
334 if primary_key_only:
335 query = query.on_conflict_do_nothing(index_elements=table.primary_key)
336 else:
337 query = query.on_conflict_do_nothing()
338 with self._transaction() as (_, connection):
339 return connection.execute(query, rows).rowcount
341 def constant_rows(
342 self,
343 fields: NamedValueAbstractSet[ddl.FieldSpec],
344 *rows: dict,
345 name: Optional[str] = None,
346 ) -> sqlalchemy.sql.FromClause:
347 # Docstring inherited.
348 # While SQLite supports VALUES, it doesn't support assigning a name
349 # to that construct or the names of its columns, and hence there's no
350 # way to actually join it into a SELECT query. It seems the only
351 # alternative is something like:
352 #
353 # SELECT ? AS a, ? AS b
354 # UNION ALL
355 # SELECT ? AS a, ? AS b
356 #
357 selects = [
358 sqlalchemy.sql.select(
359 *[sqlalchemy.sql.literal(row[field.name], field.dtype).label(field.name) for field in fields]
360 )
361 for row in rows
362 ]
363 return sqlalchemy.sql.union_all(*selects).alias(name)
365 filename: Optional[str]
366 """Name of the file this database is connected to (`str` or `None`).
368 Set to `None` for in-memory databases.
369 """