Coverage for python/lsst/daf/butler/registry/databases/sqlite.py: 16%
146 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-06-06 09:38 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2023-06-06 09:38 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ["SqliteDatabase"]
25import copy
26import os
27import sqlite3
28import urllib.parse
29from contextlib import closing
30from typing import Any, ContextManager, Iterable, List, Optional
32import sqlalchemy
33import sqlalchemy.dialects.sqlite
34import sqlalchemy.ext.compiler
36from ...core import ddl
37from ...core.named import NamedValueAbstractSet
38from ..interfaces import Database, StaticTablesContext
41def _onSqlite3Connect(
42 dbapiConnection: sqlite3.Connection, connectionRecord: sqlalchemy.pool._ConnectionRecord
43) -> None:
44 assert isinstance(dbapiConnection, sqlite3.Connection)
45 # Prevent pysqlite from emitting BEGIN and COMMIT statements.
46 dbapiConnection.isolation_level = None
47 # Enable foreign keys
48 with closing(dbapiConnection.cursor()) as cursor:
49 cursor.execute("PRAGMA foreign_keys=ON;")
50 cursor.execute("PRAGMA busy_timeout = 300000;") # in ms, so 5min (way longer than should be needed)
53class SqliteDatabase(Database):
54 """An implementation of the `Database` interface for SQLite3.
56 Parameters
57 ----------
58 connection : `sqlalchemy.engine.Connection`
59 An existing connection created by a previous call to `connect`.
60 origin : `int`
61 An integer ID that should be used as the default for any datasets,
62 quanta, or other entities that use a (autoincrement, origin) compound
63 primary key.
64 namespace : `str`, optional
65 The namespace (schema) this database is associated with. If `None`,
66 the default schema for the connection is used (which may be `None`).
67 writeable : `bool`, optional
68 If `True`, allow write operations on the database, including
69 ``CREATE TABLE``.
71 Notes
72 -----
73 The case where ``namespace is not None`` is not yet tested, and may be
74 broken; we need an API for attaching to different databases in order to
75 write those tests, but haven't yet worked out what is common/different
76 across databases well enough to define it.
77 """
79 def __init__(
80 self,
81 *,
82 engine: sqlalchemy.engine.Engine,
83 origin: int,
84 namespace: Optional[str] = None,
85 writeable: bool = True,
86 ):
87 super().__init__(origin=origin, engine=engine, namespace=namespace)
88 # Get the filename from a call to 'PRAGMA database_list'.
89 with engine.connect() as connection:
90 with closing(connection.connection.cursor()) as cursor:
91 dbList = list(cursor.execute("PRAGMA database_list").fetchall())
92 if len(dbList) == 0:
93 raise RuntimeError("No database in connection.")
94 if namespace is None:
95 namespace = "main"
96 for _, dbname, filename in dbList:
97 if dbname == namespace:
98 break
99 else:
100 raise RuntimeError(f"No '{namespace}' database in connection.")
101 if not filename:
102 self.filename = None
103 else:
104 self.filename = filename
105 self._writeable = writeable
107 @classmethod
108 def makeDefaultUri(cls, root: str) -> Optional[str]:
109 return "sqlite:///" + os.path.join(root, "gen3.sqlite3")
111 @classmethod
112 def makeEngine(
113 cls,
114 uri: str | sqlalchemy.engine.URL | None = None,
115 *,
116 filename: Optional[str] = None,
117 writeable: bool = True,
118 ) -> sqlalchemy.engine.Engine:
119 """Create a `sqlalchemy.engine.Engine` from a SQLAlchemy URI or
120 filename.
122 Parameters
123 ----------
124 uri : `str` or `sqlalchemy.engine.URL`, optional
125 A SQLAlchemy URI connection string.
126 filename : `str`
127 Name of the SQLite database file, or `None` to use an in-memory
128 database. Ignored if ``uri is not None``.
129 writeable : `bool`, optional
130 If `True`, allow write operations on the database, including
131 ``CREATE TABLE``.
133 Returns
134 -------
135 engine : `sqlalchemy.engine.Engine`
136 A database engine.
137 """
138 # In order to be able to tell SQLite that we want a read-only or
139 # read-write connection, we need to make the SQLite DBAPI connection
140 # with a "URI"-based connection string. SQLAlchemy claims it can do
141 # this
142 # (https://docs.sqlalchemy.org/en/13/dialects/sqlite.html#uri-connections),
143 # but it doesn't seem to work as advertised. To work around this, we
144 # use the 'creator' argument to sqlalchemy.engine.create_engine, which
145 # lets us pass a callable that creates the DBAPI connection.
146 if uri is None:
147 if filename is None:
148 target = ":memory:"
149 uri = "sqlite://"
150 else:
151 target = f"file:{filename}"
152 uri = f"sqlite:///{filename}"
153 else:
154 if isinstance(uri, sqlalchemy.engine.URL):
155 # We have to parse strings anyway, so convert it to string.
156 uri = uri.render_as_string(hide_password=False)
157 parsed = urllib.parse.urlparse(uri)
158 queries = parsed.query.split("&")
159 if "uri=true" in queries:
160 # This is a SQLAlchemy URI that is already trying to make a
161 # SQLite connection via a SQLite URI, and hence there may
162 # be URI components for both SQLite and SQLAlchemy. We
163 # don't need to support that, and it'd be a
164 # reimplementation of all of the (broken) logic in
165 # SQLAlchemy for doing this, so we just don't.
166 raise NotImplementedError("SQLite connection strings with 'uri=true' are not supported.")
167 # This is just a SQLAlchemy URI with a non-URI SQLite
168 # connection string inside it. Pull that out so we can use it
169 # in the creator call.
170 if parsed.path.startswith("/"):
171 filename = parsed.path[1:]
172 target = f"file:{filename}"
173 else:
174 filename = None
175 target = ":memory:"
176 if filename is None:
177 if not writeable:
178 raise NotImplementedError("Read-only :memory: databases are not supported.")
179 else:
180 if writeable:
181 target += "?mode=rwc&uri=true"
182 else:
183 target += "?mode=ro&uri=true"
185 def creator() -> sqlite3.Connection:
186 return sqlite3.connect(target, check_same_thread=False, uri=True)
188 engine = sqlalchemy.engine.create_engine(uri, creator=creator)
190 sqlalchemy.event.listen(engine, "connect", _onSqlite3Connect)
192 def _onSqlite3Begin(connection: sqlalchemy.engine.Connection) -> sqlalchemy.engine.Connection:
193 assert connection.dialect.name == "sqlite"
194 # Replace pysqlite's buggy transaction handling that never BEGINs
195 # with our own that does, and tell SQLite to try to acquire a lock
196 # as soon as we start a transaction that might involve writes (this
197 # should lead to more blocking and fewer deadlocks).
198 if writeable:
199 connection.execute(sqlalchemy.text("BEGIN IMMEDIATE"))
200 else:
201 connection.execute(sqlalchemy.text("BEGIN"))
202 return connection
204 sqlalchemy.event.listen(engine, "begin", _onSqlite3Begin)
206 return engine
208 @classmethod
209 def fromEngine(
210 cls,
211 engine: sqlalchemy.engine.Engine,
212 *,
213 origin: int,
214 namespace: Optional[str] = None,
215 writeable: bool = True,
216 ) -> Database:
217 return cls(engine=engine, origin=origin, writeable=writeable, namespace=namespace)
219 def isWriteable(self) -> bool:
220 return self._writeable
222 def __str__(self) -> str:
223 if self.filename:
224 return f"SQLite3@{self.filename}"
225 else:
226 return "SQLite3@:memory:"
228 def _lockTables(
229 self, connection: sqlalchemy.engine.Connection, tables: Iterable[sqlalchemy.schema.Table] = ()
230 ) -> None:
231 # Docstring inherited.
232 # Our SQLite database always acquires full-database locks at the
233 # beginning of a transaction, so there's no need to acquire table-level
234 # locks - which is good, because SQLite doesn't have table-level
235 # locking.
236 pass
238 # MyPy claims that the return type here isn't covariant with the return
239 # type of the base class method, which is formally correct but irrelevant
240 # - the base class return type is _GeneratorContextManager, but only
241 # because it's generated by the contextmanager decorator.
242 def declareStaticTables(self, *, create: bool) -> ContextManager[StaticTablesContext]: # type: ignore
243 # If the user asked for an in-memory, writeable database, then we may
244 # need to re-create schema even if create=False because schema can be
245 # lost on re-connect. This is only really relevant for tests, and it's
246 # convenient there.
247 if self.filename is None and self.isWriteable():
248 inspector = sqlalchemy.inspect(self._engine)
249 tables = inspector.get_table_names(schema=self.namespace)
250 if not tables:
251 create = True
252 return super().declareStaticTables(create=create)
254 def _convertFieldSpec(
255 self, table: str, spec: ddl.FieldSpec, metadata: sqlalchemy.MetaData, **kwargs: Any
256 ) -> sqlalchemy.schema.Column:
257 if spec.autoincrement:
258 if not spec.primaryKey:
259 raise RuntimeError(
260 f"Autoincrement field {table}.{spec.name} that is not a primary key is not supported."
261 )
262 if spec.dtype != sqlalchemy.Integer:
263 # SQLite's autoincrement is really limited; it only works if
264 # the column type is exactly "INTEGER". But it also doesn't
265 # care about the distinctions between different integer types,
266 # so it's safe to change it.
267 spec = copy.copy(spec)
268 spec.dtype = sqlalchemy.Integer
269 return super()._convertFieldSpec(table, spec, metadata, **kwargs)
271 def _makeColumnConstraints(self, table: str, spec: ddl.FieldSpec) -> List[sqlalchemy.CheckConstraint]:
272 # For sqlite we force constraints on all string columns since sqlite
273 # ignores everything otherwise and this leads to problems with
274 # other databases.
276 constraints = []
277 if spec.isStringType():
278 name = self.shrinkDatabaseEntityName("_".join([table, "len", spec.name]))
279 constraints.append(
280 sqlalchemy.CheckConstraint(
281 f'length("{spec.name}")<={spec.length}'
282 # Oracle converts
283 # empty strings to
284 # NULL so check
285 f' AND length("{spec.name}")>=1',
286 name=name,
287 )
288 )
290 constraints.extend(super()._makeColumnConstraints(table, spec))
291 return constraints
293 def _convertTableSpec(
294 self, name: str, spec: ddl.TableSpec, metadata: sqlalchemy.MetaData, **kwargs: Any
295 ) -> sqlalchemy.schema.Table:
296 primaryKeyFieldNames = set(field.name for field in spec.fields if field.primaryKey)
297 autoincrFieldNames = set(field.name for field in spec.fields if field.autoincrement)
298 if len(autoincrFieldNames) > 1:
299 raise RuntimeError("At most one autoincrement field per table is allowed.")
300 if len(primaryKeyFieldNames) > 1 and len(autoincrFieldNames) > 0:
301 # SQLite's default rowid-based autoincrement doesn't work if the
302 # field is just one field in a compound primary key. As a
303 # workaround, we create an extra table with just one column that
304 # we'll insert into to generate those IDs. That's only safe if
305 # that single-column table's records are already unique with just
306 # the autoincrement field, not the rest of the primary key. In
307 # practice, that means the single-column table's records are those
308 # for which origin == self.origin.
309 (autoincrFieldName,) = autoincrFieldNames
310 otherPrimaryKeyFieldNames = primaryKeyFieldNames - autoincrFieldNames
311 if otherPrimaryKeyFieldNames != {"origin"}:
312 # We need the only other field in the key to be 'origin'.
313 raise NotImplementedError(
314 "Compound primary keys with an autoincrement are only supported in SQLite "
315 "if the only non-autoincrement primary key field is 'origin'."
316 )
317 if not spec.recycleIds:
318 kwargs = dict(kwargs, sqlite_autoincrement=True)
319 return super()._convertTableSpec(name, spec, metadata, **kwargs)
321 def replace(self, table: sqlalchemy.schema.Table, *rows: dict) -> None:
322 self.assertTableWriteable(table, f"Cannot replace into read-only table {table}.")
323 if not rows:
324 return
325 query = sqlalchemy.dialects.sqlite.insert(table)
326 excluded = query.excluded
327 data = {
328 column.name: getattr(excluded, column.name)
329 for column in table.columns
330 if column.name not in table.primary_key
331 }
332 query = query.on_conflict_do_update(index_elements=table.primary_key, set_=data)
333 with self._transaction() as (_, connection):
334 connection.execute(query, rows)
336 def ensure(self, table: sqlalchemy.schema.Table, *rows: dict, primary_key_only: bool = False) -> int:
337 self.assertTableWriteable(table, f"Cannot ensure into read-only table {table}.")
338 if not rows:
339 return 0
340 query = sqlalchemy.dialects.sqlite.insert(table)
341 if primary_key_only:
342 query = query.on_conflict_do_nothing(index_elements=table.primary_key)
343 else:
344 query = query.on_conflict_do_nothing()
345 with self._transaction() as (_, connection):
346 return connection.execute(query, rows).rowcount
348 def constant_rows(
349 self,
350 fields: NamedValueAbstractSet[ddl.FieldSpec],
351 *rows: dict,
352 name: Optional[str] = None,
353 ) -> sqlalchemy.sql.FromClause:
354 # Docstring inherited.
355 # While SQLite supports VALUES, it doesn't support assigning a name
356 # to that construct or the names of its columns, and hence there's no
357 # way to actually join it into a SELECT query. It seems the only
358 # alternative is something like:
359 #
360 # SELECT ? AS a, ? AS b
361 # UNION ALL
362 # SELECT ? AS a, ? AS b
363 #
364 selects = [
365 sqlalchemy.sql.select(
366 *[sqlalchemy.sql.literal(row[field.name], field.dtype).label(field.name) for field in fields]
367 )
368 for row in rows
369 ]
370 return sqlalchemy.sql.union_all(*selects).alias(name)
372 filename: Optional[str]
373 """Name of the file this database is connected to (`str` or `None`).
375 Set to `None` for in-memory databases.
376 """