Coverage for python/lsst/daf/butler/registry/databases/sqlite.py: 19%

146 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-10-25 15:14 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ["SqliteDatabase"] 

24 

25import copy 

26import os 

27import sqlite3 

28import urllib.parse 

29from collections.abc import Iterable 

30from contextlib import AbstractContextManager, closing 

31from typing import Any 

32 

33import sqlalchemy 

34import sqlalchemy.dialects.sqlite 

35import sqlalchemy.ext.compiler 

36 

37from ...core import ddl 

38from ...core.named import NamedValueAbstractSet 

39from ..interfaces import Database, StaticTablesContext 

40 

41 

42def _onSqlite3Connect( 

43 dbapiConnection: sqlite3.Connection, connectionRecord: sqlalchemy.pool._ConnectionRecord 

44) -> None: 

45 assert isinstance(dbapiConnection, sqlite3.Connection) 

46 # Prevent pysqlite from emitting BEGIN and COMMIT statements. 

47 dbapiConnection.isolation_level = None 

48 # Enable foreign keys 

49 with closing(dbapiConnection.cursor()) as cursor: 

50 cursor.execute("PRAGMA foreign_keys=ON;") 

51 cursor.execute("PRAGMA busy_timeout = 300000;") # in ms, so 5min (way longer than should be needed) 

52 

53 

54class SqliteDatabase(Database): 

55 """An implementation of the `Database` interface for SQLite3. 

56 

57 Parameters 

58 ---------- 

59 connection : `sqlalchemy.engine.Connection` 

60 An existing connection created by a previous call to `connect`. 

61 origin : `int` 

62 An integer ID that should be used as the default for any datasets, 

63 quanta, or other entities that use a (autoincrement, origin) compound 

64 primary key. 

65 namespace : `str`, optional 

66 The namespace (schema) this database is associated with. If `None`, 

67 the default schema for the connection is used (which may be `None`). 

68 writeable : `bool`, optional 

69 If `True`, allow write operations on the database, including 

70 ``CREATE TABLE``. 

71 

72 Notes 

73 ----- 

74 The case where ``namespace is not None`` is not yet tested, and may be 

75 broken; we need an API for attaching to different databases in order to 

76 write those tests, but haven't yet worked out what is common/different 

77 across databases well enough to define it. 

78 """ 

79 

80 def __init__( 

81 self, 

82 *, 

83 engine: sqlalchemy.engine.Engine, 

84 origin: int, 

85 namespace: str | None = None, 

86 writeable: bool = True, 

87 ): 

88 super().__init__(origin=origin, engine=engine, namespace=namespace) 

89 # Get the filename from a call to 'PRAGMA database_list'. 

90 with engine.connect() as connection, closing(connection.connection.cursor()) as cursor: 

91 dbList = list(cursor.execute("PRAGMA database_list").fetchall()) 

92 if len(dbList) == 0: 

93 raise RuntimeError("No database in connection.") 

94 if namespace is None: 

95 namespace = "main" 

96 # Look for the filename associated with this namespace. 

97 for _, dbname, filename in dbList: # B007 

98 if dbname == namespace: 

99 break 

100 else: 

101 raise RuntimeError(f"No '{namespace}' database in connection.") 

102 if not filename: 

103 self.filename = None 

104 else: 

105 self.filename = filename 

106 self._writeable = writeable 

107 

108 @classmethod 

109 def makeDefaultUri(cls, root: str) -> str | None: 

110 return "sqlite:///" + os.path.join(root, "gen3.sqlite3") 

111 

112 @classmethod 

113 def makeEngine( 

114 cls, 

115 uri: str | sqlalchemy.engine.URL | None = None, 

116 *, 

117 filename: str | None = None, 

118 writeable: bool = True, 

119 ) -> sqlalchemy.engine.Engine: 

120 """Create a `sqlalchemy.engine.Engine` from a SQLAlchemy URI or 

121 filename. 

122 

123 Parameters 

124 ---------- 

125 uri : `str` or `sqlalchemy.engine.URL`, optional 

126 A SQLAlchemy URI connection string. 

127 filename : `str` 

128 Name of the SQLite database file, or `None` to use an in-memory 

129 database. Ignored if ``uri is not None``. 

130 writeable : `bool`, optional 

131 If `True`, allow write operations on the database, including 

132 ``CREATE TABLE``. 

133 

134 Returns 

135 ------- 

136 engine : `sqlalchemy.engine.Engine` 

137 A database engine. 

138 """ 

139 # In order to be able to tell SQLite that we want a read-only or 

140 # read-write connection, we need to make the SQLite DBAPI connection 

141 # with a "URI"-based connection string. SQLAlchemy claims it can do 

142 # this 

143 # (https://docs.sqlalchemy.org/en/13/dialects/sqlite.html#uri-connections), 

144 # but it doesn't seem to work as advertised. To work around this, we 

145 # use the 'creator' argument to sqlalchemy.engine.create_engine, which 

146 # lets us pass a callable that creates the DBAPI connection. 

147 if uri is None: 

148 if filename is None: 

149 target = ":memory:" 

150 uri = "sqlite://" 

151 else: 

152 target = f"file:{filename}" 

153 uri = f"sqlite:///{filename}" 

154 else: 

155 if isinstance(uri, sqlalchemy.engine.URL): 

156 # We have to parse strings anyway, so convert it to string. 

157 uri = uri.render_as_string(hide_password=False) 

158 parsed = urllib.parse.urlparse(uri) 

159 queries = parsed.query.split("&") 

160 if "uri=true" in queries: 

161 # This is a SQLAlchemy URI that is already trying to make a 

162 # SQLite connection via a SQLite URI, and hence there may 

163 # be URI components for both SQLite and SQLAlchemy. We 

164 # don't need to support that, and it'd be a 

165 # reimplementation of all of the (broken) logic in 

166 # SQLAlchemy for doing this, so we just don't. 

167 raise NotImplementedError("SQLite connection strings with 'uri=true' are not supported.") 

168 # This is just a SQLAlchemy URI with a non-URI SQLite 

169 # connection string inside it. Pull that out so we can use it 

170 # in the creator call. 

171 if parsed.path.startswith("/"): 

172 filename = parsed.path[1:] 

173 target = f"file:{filename}" 

174 else: 

175 filename = None 

176 target = ":memory:" 

177 if filename is None: 

178 if not writeable: 

179 raise NotImplementedError("Read-only :memory: databases are not supported.") 

180 else: 

181 if writeable: 

182 target += "?mode=rwc&uri=true" 

183 else: 

184 target += "?mode=ro&uri=true" 

185 

186 def creator() -> sqlite3.Connection: 

187 return sqlite3.connect(target, check_same_thread=False, uri=True) 

188 

189 engine = sqlalchemy.engine.create_engine(uri, creator=creator) 

190 

191 sqlalchemy.event.listen(engine, "connect", _onSqlite3Connect) 

192 

193 def _onSqlite3Begin(connection: sqlalchemy.engine.Connection) -> sqlalchemy.engine.Connection: 

194 assert connection.dialect.name == "sqlite" 

195 # Replace pysqlite's buggy transaction handling that never BEGINs 

196 # with our own that does, and tell SQLite to try to acquire a lock 

197 # as soon as we start a transaction that might involve writes (this 

198 # should lead to more blocking and fewer deadlocks). 

199 if writeable: 

200 connection.execute(sqlalchemy.text("BEGIN IMMEDIATE")) 

201 else: 

202 connection.execute(sqlalchemy.text("BEGIN")) 

203 return connection 

204 

205 sqlalchemy.event.listen(engine, "begin", _onSqlite3Begin) 

206 

207 return engine 

208 

209 @classmethod 

210 def fromEngine( 

211 cls, 

212 engine: sqlalchemy.engine.Engine, 

213 *, 

214 origin: int, 

215 namespace: str | None = None, 

216 writeable: bool = True, 

217 ) -> Database: 

218 return cls(engine=engine, origin=origin, writeable=writeable, namespace=namespace) 

219 

220 def isWriteable(self) -> bool: 

221 return self._writeable 

222 

223 def __str__(self) -> str: 

224 if self.filename: 

225 return f"SQLite3@{self.filename}" 

226 else: 

227 return "SQLite3@:memory:" 

228 

229 def _lockTables( 

230 self, connection: sqlalchemy.engine.Connection, tables: Iterable[sqlalchemy.schema.Table] = () 

231 ) -> None: 

232 # Docstring inherited. 

233 # Our SQLite database always acquires full-database locks at the 

234 # beginning of a transaction, so there's no need to acquire table-level 

235 # locks - which is good, because SQLite doesn't have table-level 

236 # locking. 

237 pass 

238 

239 # MyPy claims that the return type here isn't covariant with the return 

240 # type of the base class method, which is formally correct but irrelevant 

241 # - the base class return type is _GeneratorContextManager, but only 

242 # because it's generated by the contextmanager decorator. 

243 def declareStaticTables( # type: ignore 

244 self, *, create: bool 

245 ) -> AbstractContextManager[StaticTablesContext]: 

246 # If the user asked for an in-memory, writeable database, then we may 

247 # need to re-create schema even if create=False because schema can be 

248 # lost on re-connect. This is only really relevant for tests, and it's 

249 # convenient there. 

250 if self.filename is None and self.isWriteable(): 

251 inspector = sqlalchemy.inspect(self._engine) 

252 tables = inspector.get_table_names(schema=self.namespace) 

253 if not tables: 

254 create = True 

255 return super().declareStaticTables(create=create) 

256 

257 def _convertFieldSpec( 

258 self, table: str, spec: ddl.FieldSpec, metadata: sqlalchemy.MetaData, **kwargs: Any 

259 ) -> sqlalchemy.schema.Column: 

260 if spec.autoincrement: 

261 if not spec.primaryKey: 

262 raise RuntimeError( 

263 f"Autoincrement field {table}.{spec.name} that is not a primary key is not supported." 

264 ) 

265 if spec.dtype != sqlalchemy.Integer: 

266 # SQLite's autoincrement is really limited; it only works if 

267 # the column type is exactly "INTEGER". But it also doesn't 

268 # care about the distinctions between different integer types, 

269 # so it's safe to change it. 

270 spec = copy.copy(spec) 

271 spec.dtype = sqlalchemy.Integer 

272 return super()._convertFieldSpec(table, spec, metadata, **kwargs) 

273 

274 def _makeColumnConstraints(self, table: str, spec: ddl.FieldSpec) -> list[sqlalchemy.CheckConstraint]: 

275 # For sqlite we force constraints on all string columns since sqlite 

276 # ignores everything otherwise and this leads to problems with 

277 # other databases. 

278 

279 constraints = [] 

280 if spec.isStringType(): 

281 name = self.shrinkDatabaseEntityName("_".join([table, "len", spec.name])) 

282 constraints.append( 

283 sqlalchemy.CheckConstraint( 

284 f'length("{spec.name}")<={spec.length}' 

285 # Oracle converts 

286 # empty strings to 

287 # NULL so check 

288 f' AND length("{spec.name}")>=1', 

289 name=name, 

290 ) 

291 ) 

292 

293 constraints.extend(super()._makeColumnConstraints(table, spec)) 

294 return constraints 

295 

296 def _convertTableSpec( 

297 self, name: str, spec: ddl.TableSpec, metadata: sqlalchemy.MetaData, **kwargs: Any 

298 ) -> sqlalchemy.schema.Table: 

299 primaryKeyFieldNames = {field.name for field in spec.fields if field.primaryKey} 

300 autoincrFieldNames = {field.name for field in spec.fields if field.autoincrement} 

301 if len(autoincrFieldNames) > 1: 

302 raise RuntimeError("At most one autoincrement field per table is allowed.") 

303 if len(primaryKeyFieldNames) > 1 and len(autoincrFieldNames) > 0: 

304 # SQLite's default rowid-based autoincrement doesn't work if the 

305 # field is just one field in a compound primary key. As a 

306 # workaround, we create an extra table with just one column that 

307 # we'll insert into to generate those IDs. That's only safe if 

308 # that single-column table's records are already unique with just 

309 # the autoincrement field, not the rest of the primary key. In 

310 # practice, that means the single-column table's records are those 

311 # for which origin == self.origin. 

312 (autoincrFieldName,) = autoincrFieldNames 

313 otherPrimaryKeyFieldNames = primaryKeyFieldNames - autoincrFieldNames 

314 if otherPrimaryKeyFieldNames != {"origin"}: 

315 # We need the only other field in the key to be 'origin'. 

316 raise NotImplementedError( 

317 "Compound primary keys with an autoincrement are only supported in SQLite " 

318 "if the only non-autoincrement primary key field is 'origin'." 

319 ) 

320 if not spec.recycleIds: 

321 kwargs = dict(kwargs, sqlite_autoincrement=True) 

322 return super()._convertTableSpec(name, spec, metadata, **kwargs) 

323 

324 def replace(self, table: sqlalchemy.schema.Table, *rows: dict) -> None: 

325 self.assertTableWriteable(table, f"Cannot replace into read-only table {table}.") 

326 if not rows: 

327 return 

328 query = sqlalchemy.dialects.sqlite.insert(table) 

329 excluded = query.excluded 

330 data = { 

331 column.name: getattr(excluded, column.name) 

332 for column in table.columns 

333 if column.name not in table.primary_key 

334 } 

335 query = query.on_conflict_do_update(index_elements=table.primary_key, set_=data) 

336 with self._transaction() as (_, connection): 

337 connection.execute(query, rows) 

338 

339 def ensure(self, table: sqlalchemy.schema.Table, *rows: dict, primary_key_only: bool = False) -> int: 

340 self.assertTableWriteable(table, f"Cannot ensure into read-only table {table}.") 

341 if not rows: 

342 return 0 

343 query = sqlalchemy.dialects.sqlite.insert(table) 

344 if primary_key_only: 

345 query = query.on_conflict_do_nothing(index_elements=table.primary_key) 

346 else: 

347 query = query.on_conflict_do_nothing() 

348 with self._transaction() as (_, connection): 

349 return connection.execute(query, rows).rowcount 

350 

351 def constant_rows( 

352 self, 

353 fields: NamedValueAbstractSet[ddl.FieldSpec], 

354 *rows: dict, 

355 name: str | None = None, 

356 ) -> sqlalchemy.sql.FromClause: 

357 # Docstring inherited. 

358 # While SQLite supports VALUES, it doesn't support assigning a name 

359 # to that construct or the names of its columns, and hence there's no 

360 # way to actually join it into a SELECT query. It seems the only 

361 # alternative is something like: 

362 # 

363 # SELECT ? AS a, ? AS b 

364 # UNION ALL 

365 # SELECT ? AS a, ? AS b 

366 # 

367 selects = [ 

368 sqlalchemy.sql.select( 

369 *[sqlalchemy.sql.literal(row[field.name], field.dtype).label(field.name) for field in fields] 

370 ) 

371 for row in rows 

372 ] 

373 return sqlalchemy.sql.union_all(*selects).alias(name) 

374 

375 filename: str | None 

376 """Name of the file this database is connected to (`str` or `None`). 

377 

378 Set to `None` for in-memory databases. 

379 """