Coverage for python/lsst/daf/butler/registry/databases/sqlite.py: 18%
141 statements
« prev ^ index » next coverage.py v6.4, created at 2022-05-24 02:27 -0700
« prev ^ index » next coverage.py v6.4, created at 2022-05-24 02:27 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ["SqliteDatabase"]
25import copy
26import os
27import sqlite3
28import urllib.parse
29from contextlib import closing
30from typing import Any, ContextManager, Iterable, List, Optional
32import sqlalchemy
33import sqlalchemy.dialects.sqlite
34import sqlalchemy.ext.compiler
36from ...core import ddl
37from ..interfaces import Database, StaticTablesContext
40def _onSqlite3Connect(
41 dbapiConnection: sqlite3.Connection, connectionRecord: sqlalchemy.pool._ConnectionRecord
42) -> None:
43 assert isinstance(dbapiConnection, sqlite3.Connection)
44 # Prevent pysqlite from emitting BEGIN and COMMIT statements.
45 dbapiConnection.isolation_level = None
46 # Enable foreign keys
47 with closing(dbapiConnection.cursor()) as cursor:
48 cursor.execute("PRAGMA foreign_keys=ON;")
49 cursor.execute("PRAGMA busy_timeout = 300000;") # in ms, so 5min (way longer than should be needed)
52def _onSqlite3Begin(connection: sqlalchemy.engine.Connection) -> sqlalchemy.engine.Connection:
53 assert connection.dialect.name == "sqlite"
54 # Replace pysqlite's buggy transaction handling that never BEGINs with our
55 # own that does, and tell SQLite to try to acquire a lock as soon as we
56 # start a transaction (this should lead to more blocking and fewer
57 # deadlocks).
58 connection.execute(sqlalchemy.text("BEGIN IMMEDIATE"))
59 return connection
62class SqliteDatabase(Database):
63 """An implementation of the `Database` interface for SQLite3.
65 Parameters
66 ----------
67 connection : `sqlalchemy.engine.Connection`
68 An existing connection created by a previous call to `connect`.
69 origin : `int`
70 An integer ID that should be used as the default for any datasets,
71 quanta, or other entities that use a (autoincrement, origin) compound
72 primary key.
73 namespace : `str`, optional
74 The namespace (schema) this database is associated with. If `None`,
75 the default schema for the connection is used (which may be `None`).
76 writeable : `bool`, optional
77 If `True`, allow write operations on the database, including
78 ``CREATE TABLE``.
80 Notes
81 -----
82 The case where ``namespace is not None`` is not yet tested, and may be
83 broken; we need an API for attaching to different databases in order to
84 write those tests, but haven't yet worked out what is common/different
85 across databases well enough to define it.
86 """
88 def __init__(
89 self,
90 *,
91 engine: sqlalchemy.engine.Engine,
92 origin: int,
93 namespace: Optional[str] = None,
94 writeable: bool = True,
95 ):
96 super().__init__(origin=origin, engine=engine, namespace=namespace)
97 # Get the filename from a call to 'PRAGMA database_list'.
98 with engine.connect() as connection:
99 with closing(connection.connection.cursor()) as cursor:
100 dbList = list(cursor.execute("PRAGMA database_list").fetchall())
101 if len(dbList) == 0:
102 raise RuntimeError("No database in connection.")
103 if namespace is None:
104 namespace = "main"
105 for _, dbname, filename in dbList:
106 if dbname == namespace:
107 break
108 else:
109 raise RuntimeError(f"No '{namespace}' database in connection.")
110 if not filename:
111 self.filename = None
112 else:
113 self.filename = filename
114 self._writeable = writeable
116 @classmethod
117 def makeDefaultUri(cls, root: str) -> Optional[str]:
118 return "sqlite:///" + os.path.join(root, "gen3.sqlite3")
120 @classmethod
121 def makeEngine(
122 cls, uri: Optional[str] = None, *, filename: Optional[str] = None, writeable: bool = True
123 ) -> sqlalchemy.engine.Engine:
124 """Create a `sqlalchemy.engine.Engine` from a SQLAlchemy URI or
125 filename.
127 Parameters
128 ----------
129 uri : `str`
130 A SQLAlchemy URI connection string.
131 filename : `str`
132 Name of the SQLite database file, or `None` to use an in-memory
133 database. Ignored if ``uri is not None``.
134 writeable : `bool`, optional
135 If `True`, allow write operations on the database, including
136 ``CREATE TABLE``.
138 Returns
139 -------
140 engine : `sqlalchemy.engine.Engine`
141 A database engine.
142 """
143 # In order to be able to tell SQLite that we want a read-only or
144 # read-write connection, we need to make the SQLite DBAPI connection
145 # with a "URI"-based connection string. SQLAlchemy claims it can do
146 # this
147 # (https://docs.sqlalchemy.org/en/13/dialects/sqlite.html#uri-connections),
148 # but it doesn't seem to work as advertised. To work around this, we
149 # use the 'creator' argument to sqlalchemy.engine.create_engine, which
150 # lets us pass a callable that creates the DBAPI connection.
151 if uri is None:
152 if filename is None:
153 target = ":memory:"
154 uri = "sqlite://"
155 else:
156 target = f"file:{filename}"
157 uri = f"sqlite:///{filename}"
158 else:
159 parsed = urllib.parse.urlparse(uri)
160 queries = parsed.query.split("&")
161 if "uri=true" in queries:
162 # This is a SQLAlchemy URI that is already trying to make a
163 # SQLite connection via a SQLite URI, and hence there may
164 # be URI components for both SQLite and SQLAlchemy. We
165 # don't need to support that, and it'd be a
166 # reimplementation of all of the (broken) logic in
167 # SQLAlchemy for doing this, so we just don't.
168 raise NotImplementedError("SQLite connection strings with 'uri=true' are not supported.")
169 # This is just a SQLAlchemy URI with a non-URI SQLite
170 # connection string inside it. Pull that out so we can use it
171 # in the creator call.
172 if parsed.path.startswith("/"):
173 filename = parsed.path[1:]
174 target = f"file:{filename}"
175 else:
176 filename = None
177 target = ":memory:"
178 if filename is None:
179 if not writeable:
180 raise NotImplementedError("Read-only :memory: databases are not supported.")
181 else:
182 if writeable:
183 target += "?mode=rwc&uri=true"
184 else:
185 target += "?mode=ro&uri=true"
187 def creator() -> sqlite3.Connection:
188 return sqlite3.connect(target, check_same_thread=False, uri=True)
190 engine = sqlalchemy.engine.create_engine(uri, creator=creator)
192 sqlalchemy.event.listen(engine, "connect", _onSqlite3Connect)
193 sqlalchemy.event.listen(engine, "begin", _onSqlite3Begin)
195 return engine
197 @classmethod
198 def fromEngine(
199 cls,
200 engine: sqlalchemy.engine.Engine,
201 *,
202 origin: int,
203 namespace: Optional[str] = None,
204 writeable: bool = True,
205 ) -> Database:
206 return cls(engine=engine, origin=origin, writeable=writeable, namespace=namespace)
208 def isWriteable(self) -> bool:
209 return self._writeable
211 def __str__(self) -> str:
212 if self.filename:
213 return f"SQLite3@{self.filename}"
214 else:
215 return "SQLite3@:memory:"
217 def _lockTables(
218 self, connection: sqlalchemy.engine.Connection, tables: Iterable[sqlalchemy.schema.Table] = ()
219 ) -> None:
220 # Docstring inherited.
221 # Our SQLite database always acquires full-database locks at the
222 # beginning of a transaction, so there's no need to acquire table-level
223 # locks - which is good, because SQLite doesn't have table-level
224 # locking.
225 pass
227 # MyPy claims that the return type here isn't covariant with the return
228 # type of the base class method, which is formally correct but irrelevant
229 # - the base class return type is _GeneratorContextManager, but only
230 # because it's generated by the contextmanager decorator.
231 def declareStaticTables(self, *, create: bool) -> ContextManager[StaticTablesContext]: # type: ignore
232 # If the user asked for an in-memory, writeable database, then we may
233 # need to re-create schema even if create=False because schema can be
234 # lost on re-connect. This is only really relevant for tests, and it's
235 # convenient there.
236 if self.filename is None and self.isWriteable():
237 inspector = sqlalchemy.inspect(self._engine)
238 tables = inspector.get_table_names(schema=self.namespace)
239 if not tables:
240 create = True
241 return super().declareStaticTables(create=create)
243 def _convertFieldSpec(
244 self, table: str, spec: ddl.FieldSpec, metadata: sqlalchemy.MetaData, **kwargs: Any
245 ) -> sqlalchemy.schema.Column:
246 if spec.autoincrement:
247 if not spec.primaryKey:
248 raise RuntimeError(
249 f"Autoincrement field {table}.{spec.name} that is not a primary key is not supported."
250 )
251 if spec.dtype != sqlalchemy.Integer:
252 # SQLite's autoincrement is really limited; it only works if
253 # the column type is exactly "INTEGER". But it also doesn't
254 # care about the distinctions between different integer types,
255 # so it's safe to change it.
256 spec = copy.copy(spec)
257 spec.dtype = sqlalchemy.Integer
258 return super()._convertFieldSpec(table, spec, metadata, **kwargs)
260 def _makeColumnConstraints(self, table: str, spec: ddl.FieldSpec) -> List[sqlalchemy.CheckConstraint]:
261 # For sqlite we force constraints on all string columns since sqlite
262 # ignores everything otherwise and this leads to problems with
263 # other databases.
265 constraints = []
266 if spec.isStringType():
267 name = self.shrinkDatabaseEntityName("_".join([table, "len", spec.name]))
268 constraints.append(
269 sqlalchemy.CheckConstraint(
270 f"length({spec.name})<={spec.length}"
271 # Oracle converts
272 # empty strings to
273 # NULL so check
274 f" AND length({spec.name})>=1",
275 name=name,
276 )
277 )
279 constraints.extend(super()._makeColumnConstraints(table, spec))
280 return constraints
282 def _convertTableSpec(
283 self, name: str, spec: ddl.TableSpec, metadata: sqlalchemy.MetaData, **kwargs: Any
284 ) -> sqlalchemy.schema.Table:
285 primaryKeyFieldNames = set(field.name for field in spec.fields if field.primaryKey)
286 autoincrFieldNames = set(field.name for field in spec.fields if field.autoincrement)
287 if len(autoincrFieldNames) > 1:
288 raise RuntimeError("At most one autoincrement field per table is allowed.")
289 if len(primaryKeyFieldNames) > 1 and len(autoincrFieldNames) > 0:
290 # SQLite's default rowid-based autoincrement doesn't work if the
291 # field is just one field in a compound primary key. As a
292 # workaround, we create an extra table with just one column that
293 # we'll insert into to generate those IDs. That's only safe if
294 # that single-column table's records are already unique with just
295 # the autoincrement field, not the rest of the primary key. In
296 # practice, that means the single-column table's records are those
297 # for which origin == self.origin.
298 (autoincrFieldName,) = autoincrFieldNames
299 otherPrimaryKeyFieldNames = primaryKeyFieldNames - autoincrFieldNames
300 if otherPrimaryKeyFieldNames != {"origin"}:
301 # We need the only other field in the key to be 'origin'.
302 raise NotImplementedError(
303 "Compound primary keys with an autoincrement are only supported in SQLite "
304 "if the only non-autoincrement primary key field is 'origin'."
305 )
306 if not spec.recycleIds:
307 kwargs = dict(kwargs, sqlite_autoincrement=True)
308 return super()._convertTableSpec(name, spec, metadata, **kwargs)
310 def replace(self, table: sqlalchemy.schema.Table, *rows: dict) -> None:
311 self.assertTableWriteable(table, f"Cannot replace into read-only table {table}.")
312 if not rows:
313 return
314 query = sqlalchemy.dialects.sqlite.insert(table)
315 excluded = query.excluded
316 data = {
317 column.name: getattr(excluded, column.name)
318 for column in table.columns
319 if column.name not in table.primary_key
320 }
321 query = query.on_conflict_do_update(index_elements=table.primary_key, set_=data)
322 with self._connection() as connection:
323 connection.execute(query, rows)
325 def ensure(self, table: sqlalchemy.schema.Table, *rows: dict, primary_key_only: bool = False) -> int:
326 self.assertTableWriteable(table, f"Cannot ensure into read-only table {table}.")
327 if not rows:
328 return 0
329 query = sqlalchemy.dialects.sqlite.insert(table)
330 if primary_key_only:
331 query = query.on_conflict_do_nothing(index_elements=table.primary_key)
332 else:
333 query = query.on_conflict_do_nothing()
334 with self._connection() as connection:
335 return connection.execute(query, rows).rowcount
337 filename: Optional[str]
338 """Name of the file this database is connected to (`str` or `None`).
340 Set to `None` for in-memory databases.
341 """