Coverage for python/lsst/daf/butler/registry/databases/sqlite.py: 18%
145 statements
« prev ^ index » next coverage.py v6.4.4, created at 2022-09-27 02:00 -0700
« prev ^ index » next coverage.py v6.4.4, created at 2022-09-27 02:00 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ["SqliteDatabase"]
25import copy
26import os
27import sqlite3
28import urllib.parse
29from contextlib import closing
30from typing import Any, ContextManager, Iterable, List, Optional
32import sqlalchemy
33import sqlalchemy.dialects.sqlite
34import sqlalchemy.ext.compiler
36from ...core import ddl
37from ...core.named import NamedValueAbstractSet
38from ..interfaces import Database, StaticTablesContext
41def _onSqlite3Connect(
42 dbapiConnection: sqlite3.Connection, connectionRecord: sqlalchemy.pool._ConnectionRecord
43) -> None:
44 assert isinstance(dbapiConnection, sqlite3.Connection)
45 # Prevent pysqlite from emitting BEGIN and COMMIT statements.
46 dbapiConnection.isolation_level = None
47 # Enable foreign keys
48 with closing(dbapiConnection.cursor()) as cursor:
49 cursor.execute("PRAGMA foreign_keys=ON;")
50 cursor.execute("PRAGMA busy_timeout = 300000;") # in ms, so 5min (way longer than should be needed)
53def _onSqlite3Begin(connection: sqlalchemy.engine.Connection) -> sqlalchemy.engine.Connection:
54 assert connection.dialect.name == "sqlite"
55 # Replace pysqlite's buggy transaction handling that never BEGINs with our
56 # own that does, and tell SQLite to try to acquire a lock as soon as we
57 # start a transaction (this should lead to more blocking and fewer
58 # deadlocks).
59 connection.execute(sqlalchemy.text("BEGIN IMMEDIATE"))
60 return connection
63class SqliteDatabase(Database):
64 """An implementation of the `Database` interface for SQLite3.
66 Parameters
67 ----------
68 connection : `sqlalchemy.engine.Connection`
69 An existing connection created by a previous call to `connect`.
70 origin : `int`
71 An integer ID that should be used as the default for any datasets,
72 quanta, or other entities that use a (autoincrement, origin) compound
73 primary key.
74 namespace : `str`, optional
75 The namespace (schema) this database is associated with. If `None`,
76 the default schema for the connection is used (which may be `None`).
77 writeable : `bool`, optional
78 If `True`, allow write operations on the database, including
79 ``CREATE TABLE``.
81 Notes
82 -----
83 The case where ``namespace is not None`` is not yet tested, and may be
84 broken; we need an API for attaching to different databases in order to
85 write those tests, but haven't yet worked out what is common/different
86 across databases well enough to define it.
87 """
89 def __init__(
90 self,
91 *,
92 engine: sqlalchemy.engine.Engine,
93 origin: int,
94 namespace: Optional[str] = None,
95 writeable: bool = True,
96 ):
97 super().__init__(origin=origin, engine=engine, namespace=namespace)
98 # Get the filename from a call to 'PRAGMA database_list'.
99 with engine.connect() as connection:
100 with closing(connection.connection.cursor()) as cursor:
101 dbList = list(cursor.execute("PRAGMA database_list").fetchall())
102 if len(dbList) == 0:
103 raise RuntimeError("No database in connection.")
104 if namespace is None:
105 namespace = "main"
106 for _, dbname, filename in dbList:
107 if dbname == namespace:
108 break
109 else:
110 raise RuntimeError(f"No '{namespace}' database in connection.")
111 if not filename:
112 self.filename = None
113 else:
114 self.filename = filename
115 self._writeable = writeable
117 @classmethod
118 def makeDefaultUri(cls, root: str) -> Optional[str]:
119 return "sqlite:///" + os.path.join(root, "gen3.sqlite3")
121 @classmethod
122 def makeEngine(
123 cls, uri: Optional[str] = None, *, filename: Optional[str] = None, writeable: bool = True
124 ) -> sqlalchemy.engine.Engine:
125 """Create a `sqlalchemy.engine.Engine` from a SQLAlchemy URI or
126 filename.
128 Parameters
129 ----------
130 uri : `str`
131 A SQLAlchemy URI connection string.
132 filename : `str`
133 Name of the SQLite database file, or `None` to use an in-memory
134 database. Ignored if ``uri is not None``.
135 writeable : `bool`, optional
136 If `True`, allow write operations on the database, including
137 ``CREATE TABLE``.
139 Returns
140 -------
141 engine : `sqlalchemy.engine.Engine`
142 A database engine.
143 """
144 # In order to be able to tell SQLite that we want a read-only or
145 # read-write connection, we need to make the SQLite DBAPI connection
146 # with a "URI"-based connection string. SQLAlchemy claims it can do
147 # this
148 # (https://docs.sqlalchemy.org/en/13/dialects/sqlite.html#uri-connections),
149 # but it doesn't seem to work as advertised. To work around this, we
150 # use the 'creator' argument to sqlalchemy.engine.create_engine, which
151 # lets us pass a callable that creates the DBAPI connection.
152 if uri is None:
153 if filename is None:
154 target = ":memory:"
155 uri = "sqlite://"
156 else:
157 target = f"file:{filename}"
158 uri = f"sqlite:///{filename}"
159 else:
160 parsed = urllib.parse.urlparse(uri)
161 queries = parsed.query.split("&")
162 if "uri=true" in queries:
163 # This is a SQLAlchemy URI that is already trying to make a
164 # SQLite connection via a SQLite URI, and hence there may
165 # be URI components for both SQLite and SQLAlchemy. We
166 # don't need to support that, and it'd be a
167 # reimplementation of all of the (broken) logic in
168 # SQLAlchemy for doing this, so we just don't.
169 raise NotImplementedError("SQLite connection strings with 'uri=true' are not supported.")
170 # This is just a SQLAlchemy URI with a non-URI SQLite
171 # connection string inside it. Pull that out so we can use it
172 # in the creator call.
173 if parsed.path.startswith("/"):
174 filename = parsed.path[1:]
175 target = f"file:{filename}"
176 else:
177 filename = None
178 target = ":memory:"
179 if filename is None:
180 if not writeable:
181 raise NotImplementedError("Read-only :memory: databases are not supported.")
182 else:
183 if writeable:
184 target += "?mode=rwc&uri=true"
185 else:
186 target += "?mode=ro&uri=true"
188 def creator() -> sqlite3.Connection:
189 return sqlite3.connect(target, check_same_thread=False, uri=True)
191 engine = sqlalchemy.engine.create_engine(uri, creator=creator)
193 sqlalchemy.event.listen(engine, "connect", _onSqlite3Connect)
194 sqlalchemy.event.listen(engine, "begin", _onSqlite3Begin)
196 return engine
198 @classmethod
199 def fromEngine(
200 cls,
201 engine: sqlalchemy.engine.Engine,
202 *,
203 origin: int,
204 namespace: Optional[str] = None,
205 writeable: bool = True,
206 ) -> Database:
207 return cls(engine=engine, origin=origin, writeable=writeable, namespace=namespace)
209 def isWriteable(self) -> bool:
210 return self._writeable
212 def __str__(self) -> str:
213 if self.filename:
214 return f"SQLite3@{self.filename}"
215 else:
216 return "SQLite3@:memory:"
218 def _lockTables(
219 self, connection: sqlalchemy.engine.Connection, tables: Iterable[sqlalchemy.schema.Table] = ()
220 ) -> None:
221 # Docstring inherited.
222 # Our SQLite database always acquires full-database locks at the
223 # beginning of a transaction, so there's no need to acquire table-level
224 # locks - which is good, because SQLite doesn't have table-level
225 # locking.
226 pass
228 # MyPy claims that the return type here isn't covariant with the return
229 # type of the base class method, which is formally correct but irrelevant
230 # - the base class return type is _GeneratorContextManager, but only
231 # because it's generated by the contextmanager decorator.
232 def declareStaticTables(self, *, create: bool) -> ContextManager[StaticTablesContext]: # type: ignore
233 # If the user asked for an in-memory, writeable database, then we may
234 # need to re-create schema even if create=False because schema can be
235 # lost on re-connect. This is only really relevant for tests, and it's
236 # convenient there.
237 if self.filename is None and self.isWriteable():
238 inspector = sqlalchemy.inspect(self._engine)
239 tables = inspector.get_table_names(schema=self.namespace)
240 if not tables:
241 create = True
242 return super().declareStaticTables(create=create)
244 def _convertFieldSpec(
245 self, table: str, spec: ddl.FieldSpec, metadata: sqlalchemy.MetaData, **kwargs: Any
246 ) -> sqlalchemy.schema.Column:
247 if spec.autoincrement:
248 if not spec.primaryKey:
249 raise RuntimeError(
250 f"Autoincrement field {table}.{spec.name} that is not a primary key is not supported."
251 )
252 if spec.dtype != sqlalchemy.Integer:
253 # SQLite's autoincrement is really limited; it only works if
254 # the column type is exactly "INTEGER". But it also doesn't
255 # care about the distinctions between different integer types,
256 # so it's safe to change it.
257 spec = copy.copy(spec)
258 spec.dtype = sqlalchemy.Integer
259 return super()._convertFieldSpec(table, spec, metadata, **kwargs)
261 def _makeColumnConstraints(self, table: str, spec: ddl.FieldSpec) -> List[sqlalchemy.CheckConstraint]:
262 # For sqlite we force constraints on all string columns since sqlite
263 # ignores everything otherwise and this leads to problems with
264 # other databases.
266 constraints = []
267 if spec.isStringType():
268 name = self.shrinkDatabaseEntityName("_".join([table, "len", spec.name]))
269 constraints.append(
270 sqlalchemy.CheckConstraint(
271 f'length("{spec.name}")<={spec.length}'
272 # Oracle converts
273 # empty strings to
274 # NULL so check
275 f' AND length("{spec.name}")>=1',
276 name=name,
277 )
278 )
280 constraints.extend(super()._makeColumnConstraints(table, spec))
281 return constraints
283 def _convertTableSpec(
284 self, name: str, spec: ddl.TableSpec, metadata: sqlalchemy.MetaData, **kwargs: Any
285 ) -> sqlalchemy.schema.Table:
286 primaryKeyFieldNames = set(field.name for field in spec.fields if field.primaryKey)
287 autoincrFieldNames = set(field.name for field in spec.fields if field.autoincrement)
288 if len(autoincrFieldNames) > 1:
289 raise RuntimeError("At most one autoincrement field per table is allowed.")
290 if len(primaryKeyFieldNames) > 1 and len(autoincrFieldNames) > 0:
291 # SQLite's default rowid-based autoincrement doesn't work if the
292 # field is just one field in a compound primary key. As a
293 # workaround, we create an extra table with just one column that
294 # we'll insert into to generate those IDs. That's only safe if
295 # that single-column table's records are already unique with just
296 # the autoincrement field, not the rest of the primary key. In
297 # practice, that means the single-column table's records are those
298 # for which origin == self.origin.
299 (autoincrFieldName,) = autoincrFieldNames
300 otherPrimaryKeyFieldNames = primaryKeyFieldNames - autoincrFieldNames
301 if otherPrimaryKeyFieldNames != {"origin"}:
302 # We need the only other field in the key to be 'origin'.
303 raise NotImplementedError(
304 "Compound primary keys with an autoincrement are only supported in SQLite "
305 "if the only non-autoincrement primary key field is 'origin'."
306 )
307 if not spec.recycleIds:
308 kwargs = dict(kwargs, sqlite_autoincrement=True)
309 return super()._convertTableSpec(name, spec, metadata, **kwargs)
311 def replace(self, table: sqlalchemy.schema.Table, *rows: dict) -> None:
312 self.assertTableWriteable(table, f"Cannot replace into read-only table {table}.")
313 if not rows:
314 return
315 query = sqlalchemy.dialects.sqlite.insert(table)
316 excluded = query.excluded
317 data = {
318 column.name: getattr(excluded, column.name)
319 for column in table.columns
320 if column.name not in table.primary_key
321 }
322 query = query.on_conflict_do_update(index_elements=table.primary_key, set_=data)
323 with self._connection() as connection:
324 connection.execute(query, rows)
326 def ensure(self, table: sqlalchemy.schema.Table, *rows: dict, primary_key_only: bool = False) -> int:
327 self.assertTableWriteable(table, f"Cannot ensure into read-only table {table}.")
328 if not rows:
329 return 0
330 query = sqlalchemy.dialects.sqlite.insert(table)
331 if primary_key_only:
332 query = query.on_conflict_do_nothing(index_elements=table.primary_key)
333 else:
334 query = query.on_conflict_do_nothing()
335 with self._connection() as connection:
336 return connection.execute(query, rows).rowcount
338 def constant_rows(
339 self,
340 fields: NamedValueAbstractSet[ddl.FieldSpec],
341 *rows: dict,
342 name: Optional[str] = None,
343 ) -> sqlalchemy.sql.FromClause:
344 # Docstring inherited.
345 # While SQLite supports VALUES, it doesn't support assigning a name
346 # to that construct or the names of its columns, and hence there's no
347 # way to actually join it into a SELECT query. It seems the only
348 # alternative is something like:
349 #
350 # SELECT ? AS a, ? AS b
351 # UNION ALL
352 # SELECT ? AS a, ? AS b
353 #
354 selects = [
355 sqlalchemy.sql.select(
356 *[sqlalchemy.sql.literal(row[field.name], field.dtype).label(field.name) for field in fields]
357 )
358 for row in rows
359 ]
360 return sqlalchemy.sql.union_all(*selects).alias(name)
362 filename: Optional[str]
363 """Name of the file this database is connected to (`str` or `None`).
365 Set to `None` for in-memory databases.
366 """