Coverage for python/lsst/daf/butler/registry/databases/sqlite.py: 19%

146 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-12-05 11:07 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27from __future__ import annotations 

28 

29__all__ = ["SqliteDatabase"] 

30 

31import copy 

32import os 

33import sqlite3 

34import urllib.parse 

35from collections.abc import Iterable 

36from contextlib import AbstractContextManager, closing 

37from typing import Any 

38 

39import sqlalchemy 

40import sqlalchemy.dialects.sqlite 

41import sqlalchemy.ext.compiler 

42 

43from ... import ddl 

44from ..._named import NamedValueAbstractSet 

45from ..interfaces import Database, StaticTablesContext 

46 

47 

48def _onSqlite3Connect( 

49 dbapiConnection: sqlite3.Connection, connectionRecord: sqlalchemy.pool._ConnectionRecord 

50) -> None: 

51 assert isinstance(dbapiConnection, sqlite3.Connection) 

52 # Prevent pysqlite from emitting BEGIN and COMMIT statements. 

53 dbapiConnection.isolation_level = None 

54 # Enable foreign keys 

55 with closing(dbapiConnection.cursor()) as cursor: 

56 cursor.execute("PRAGMA foreign_keys=ON;") 

57 cursor.execute("PRAGMA busy_timeout = 300000;") # in ms, so 5min (way longer than should be needed) 

58 

59 

60class SqliteDatabase(Database): 

61 """An implementation of the `Database` interface for SQLite3. 

62 

63 Parameters 

64 ---------- 

65 connection : `sqlalchemy.engine.Connection` 

66 An existing connection created by a previous call to `connect`. 

67 origin : `int` 

68 An integer ID that should be used as the default for any datasets, 

69 quanta, or other entities that use a (autoincrement, origin) compound 

70 primary key. 

71 namespace : `str`, optional 

72 The namespace (schema) this database is associated with. If `None`, 

73 the default schema for the connection is used (which may be `None`). 

74 writeable : `bool`, optional 

75 If `True`, allow write operations on the database, including 

76 ``CREATE TABLE``. 

77 

78 Notes 

79 ----- 

80 The case where ``namespace is not None`` is not yet tested, and may be 

81 broken; we need an API for attaching to different databases in order to 

82 write those tests, but haven't yet worked out what is common/different 

83 across databases well enough to define it. 

84 """ 

85 

86 def __init__( 

87 self, 

88 *, 

89 engine: sqlalchemy.engine.Engine, 

90 origin: int, 

91 namespace: str | None = None, 

92 writeable: bool = True, 

93 ): 

94 super().__init__(origin=origin, engine=engine, namespace=namespace) 

95 # Get the filename from a call to 'PRAGMA database_list'. 

96 with engine.connect() as connection, closing(connection.connection.cursor()) as cursor: 

97 dbList = list(cursor.execute("PRAGMA database_list").fetchall()) 

98 if len(dbList) == 0: 

99 raise RuntimeError("No database in connection.") 

100 if namespace is None: 

101 namespace = "main" 

102 # Look for the filename associated with this namespace. 

103 for _, dbname, filename in dbList: # B007 

104 if dbname == namespace: 

105 break 

106 else: 

107 raise RuntimeError(f"No '{namespace}' database in connection.") 

108 if not filename: 

109 self.filename = None 

110 else: 

111 self.filename = filename 

112 self._writeable = writeable 

113 

114 @classmethod 

115 def makeDefaultUri(cls, root: str) -> str | None: 

116 return "sqlite:///" + os.path.join(root, "gen3.sqlite3") 

117 

118 @classmethod 

119 def makeEngine( 

120 cls, 

121 uri: str | sqlalchemy.engine.URL | None = None, 

122 *, 

123 filename: str | None = None, 

124 writeable: bool = True, 

125 ) -> sqlalchemy.engine.Engine: 

126 """Create a `sqlalchemy.engine.Engine` from a SQLAlchemy URI or 

127 filename. 

128 

129 Parameters 

130 ---------- 

131 uri : `str` or `sqlalchemy.engine.URL`, optional 

132 A SQLAlchemy URI connection string. 

133 filename : `str` 

134 Name of the SQLite database file, or `None` to use an in-memory 

135 database. Ignored if ``uri is not None``. 

136 writeable : `bool`, optional 

137 If `True`, allow write operations on the database, including 

138 ``CREATE TABLE``. 

139 

140 Returns 

141 ------- 

142 engine : `sqlalchemy.engine.Engine` 

143 A database engine. 

144 """ 

145 # In order to be able to tell SQLite that we want a read-only or 

146 # read-write connection, we need to make the SQLite DBAPI connection 

147 # with a "URI"-based connection string. SQLAlchemy claims it can do 

148 # this 

149 # (https://docs.sqlalchemy.org/en/13/dialects/sqlite.html#uri-connections), 

150 # but it doesn't seem to work as advertised. To work around this, we 

151 # use the 'creator' argument to sqlalchemy.engine.create_engine, which 

152 # lets us pass a callable that creates the DBAPI connection. 

153 if uri is None: 

154 if filename is None: 

155 target = ":memory:" 

156 uri = "sqlite://" 

157 else: 

158 target = f"file:{filename}" 

159 uri = f"sqlite:///{filename}" 

160 else: 

161 if isinstance(uri, sqlalchemy.engine.URL): 

162 # We have to parse strings anyway, so convert it to string. 

163 uri = uri.render_as_string(hide_password=False) 

164 parsed = urllib.parse.urlparse(uri) 

165 queries = parsed.query.split("&") 

166 if "uri=true" in queries: 

167 # This is a SQLAlchemy URI that is already trying to make a 

168 # SQLite connection via a SQLite URI, and hence there may 

169 # be URI components for both SQLite and SQLAlchemy. We 

170 # don't need to support that, and it'd be a 

171 # reimplementation of all of the (broken) logic in 

172 # SQLAlchemy for doing this, so we just don't. 

173 raise NotImplementedError("SQLite connection strings with 'uri=true' are not supported.") 

174 # This is just a SQLAlchemy URI with a non-URI SQLite 

175 # connection string inside it. Pull that out so we can use it 

176 # in the creator call. 

177 if parsed.path.startswith("/"): 

178 filename = parsed.path[1:] 

179 target = f"file:{filename}" 

180 else: 

181 filename = None 

182 target = ":memory:" 

183 if filename is None: 

184 if not writeable: 

185 raise NotImplementedError("Read-only :memory: databases are not supported.") 

186 else: 

187 if writeable: 

188 target += "?mode=rwc&uri=true" 

189 else: 

190 target += "?mode=ro&uri=true" 

191 

192 def creator() -> sqlite3.Connection: 

193 return sqlite3.connect(target, check_same_thread=False, uri=True) 

194 

195 engine = sqlalchemy.engine.create_engine(uri, creator=creator) 

196 

197 sqlalchemy.event.listen(engine, "connect", _onSqlite3Connect) 

198 

199 def _onSqlite3Begin(connection: sqlalchemy.engine.Connection) -> sqlalchemy.engine.Connection: 

200 assert connection.dialect.name == "sqlite" 

201 # Replace pysqlite's buggy transaction handling that never BEGINs 

202 # with our own that does, and tell SQLite to try to acquire a lock 

203 # as soon as we start a transaction that might involve writes (this 

204 # should lead to more blocking and fewer deadlocks). 

205 if writeable: 

206 connection.execute(sqlalchemy.text("BEGIN IMMEDIATE")) 

207 else: 

208 connection.execute(sqlalchemy.text("BEGIN")) 

209 return connection 

210 

211 sqlalchemy.event.listen(engine, "begin", _onSqlite3Begin) 

212 

213 return engine 

214 

215 @classmethod 

216 def fromEngine( 

217 cls, 

218 engine: sqlalchemy.engine.Engine, 

219 *, 

220 origin: int, 

221 namespace: str | None = None, 

222 writeable: bool = True, 

223 ) -> Database: 

224 return cls(engine=engine, origin=origin, writeable=writeable, namespace=namespace) 

225 

226 def isWriteable(self) -> bool: 

227 return self._writeable 

228 

229 def __str__(self) -> str: 

230 if self.filename: 

231 return f"SQLite3@{self.filename}" 

232 else: 

233 return "SQLite3@:memory:" 

234 

235 def _lockTables( 

236 self, connection: sqlalchemy.engine.Connection, tables: Iterable[sqlalchemy.schema.Table] = () 

237 ) -> None: 

238 # Docstring inherited. 

239 # Our SQLite database always acquires full-database locks at the 

240 # beginning of a transaction, so there's no need to acquire table-level 

241 # locks - which is good, because SQLite doesn't have table-level 

242 # locking. 

243 pass 

244 

245 # MyPy claims that the return type here isn't covariant with the return 

246 # type of the base class method, which is formally correct but irrelevant 

247 # - the base class return type is _GeneratorContextManager, but only 

248 # because it's generated by the contextmanager decorator. 

249 def declareStaticTables( # type: ignore 

250 self, *, create: bool 

251 ) -> AbstractContextManager[StaticTablesContext]: 

252 # If the user asked for an in-memory, writeable database, then we may 

253 # need to re-create schema even if create=False because schema can be 

254 # lost on re-connect. This is only really relevant for tests, and it's 

255 # convenient there. 

256 if self.filename is None and self.isWriteable(): 

257 inspector = sqlalchemy.inspect(self._engine) 

258 tables = inspector.get_table_names(schema=self.namespace) 

259 if not tables: 

260 create = True 

261 return super().declareStaticTables(create=create) 

262 

263 def _convertFieldSpec( 

264 self, table: str, spec: ddl.FieldSpec, metadata: sqlalchemy.MetaData, **kwargs: Any 

265 ) -> sqlalchemy.schema.Column: 

266 if spec.autoincrement: 

267 if not spec.primaryKey: 

268 raise RuntimeError( 

269 f"Autoincrement field {table}.{spec.name} that is not a primary key is not supported." 

270 ) 

271 if spec.dtype != sqlalchemy.Integer: 

272 # SQLite's autoincrement is really limited; it only works if 

273 # the column type is exactly "INTEGER". But it also doesn't 

274 # care about the distinctions between different integer types, 

275 # so it's safe to change it. 

276 spec = copy.copy(spec) 

277 spec.dtype = sqlalchemy.Integer 

278 return super()._convertFieldSpec(table, spec, metadata, **kwargs) 

279 

280 def _makeColumnConstraints(self, table: str, spec: ddl.FieldSpec) -> list[sqlalchemy.CheckConstraint]: 

281 # For sqlite we force constraints on all string columns since sqlite 

282 # ignores everything otherwise and this leads to problems with 

283 # other databases. 

284 

285 constraints = [] 

286 if spec.isStringType(): 

287 name = self.shrinkDatabaseEntityName("_".join([table, "len", spec.name])) 

288 constraints.append( 

289 sqlalchemy.CheckConstraint( 

290 f'length("{spec.name}")<={spec.length}' 

291 # Oracle converts 

292 # empty strings to 

293 # NULL so check 

294 f' AND length("{spec.name}")>=1', 

295 name=name, 

296 ) 

297 ) 

298 

299 constraints.extend(super()._makeColumnConstraints(table, spec)) 

300 return constraints 

301 

302 def _convertTableSpec( 

303 self, name: str, spec: ddl.TableSpec, metadata: sqlalchemy.MetaData, **kwargs: Any 

304 ) -> sqlalchemy.schema.Table: 

305 primaryKeyFieldNames = {field.name for field in spec.fields if field.primaryKey} 

306 autoincrFieldNames = {field.name for field in spec.fields if field.autoincrement} 

307 if len(autoincrFieldNames) > 1: 

308 raise RuntimeError("At most one autoincrement field per table is allowed.") 

309 if len(primaryKeyFieldNames) > 1 and len(autoincrFieldNames) > 0: 

310 # SQLite's default rowid-based autoincrement doesn't work if the 

311 # field is just one field in a compound primary key. As a 

312 # workaround, we create an extra table with just one column that 

313 # we'll insert into to generate those IDs. That's only safe if 

314 # that single-column table's records are already unique with just 

315 # the autoincrement field, not the rest of the primary key. In 

316 # practice, that means the single-column table's records are those 

317 # for which origin == self.origin. 

318 (autoincrFieldName,) = autoincrFieldNames 

319 otherPrimaryKeyFieldNames = primaryKeyFieldNames - autoincrFieldNames 

320 if otherPrimaryKeyFieldNames != {"origin"}: 

321 # We need the only other field in the key to be 'origin'. 

322 raise NotImplementedError( 

323 "Compound primary keys with an autoincrement are only supported in SQLite " 

324 "if the only non-autoincrement primary key field is 'origin'." 

325 ) 

326 if not spec.recycleIds: 

327 kwargs = dict(kwargs, sqlite_autoincrement=True) 

328 return super()._convertTableSpec(name, spec, metadata, **kwargs) 

329 

330 def replace(self, table: sqlalchemy.schema.Table, *rows: dict) -> None: 

331 self.assertTableWriteable(table, f"Cannot replace into read-only table {table}.") 

332 if not rows: 

333 return 

334 query = sqlalchemy.dialects.sqlite.insert(table) 

335 excluded = query.excluded 

336 data = { 

337 column.name: getattr(excluded, column.name) 

338 for column in table.columns 

339 if column.name not in table.primary_key 

340 } 

341 query = query.on_conflict_do_update(index_elements=table.primary_key, set_=data) 

342 with self._transaction() as (_, connection): 

343 connection.execute(query, rows) 

344 

345 def ensure(self, table: sqlalchemy.schema.Table, *rows: dict, primary_key_only: bool = False) -> int: 

346 self.assertTableWriteable(table, f"Cannot ensure into read-only table {table}.") 

347 if not rows: 

348 return 0 

349 query = sqlalchemy.dialects.sqlite.insert(table) 

350 if primary_key_only: 

351 query = query.on_conflict_do_nothing(index_elements=table.primary_key) 

352 else: 

353 query = query.on_conflict_do_nothing() 

354 with self._transaction() as (_, connection): 

355 return connection.execute(query, rows).rowcount 

356 

357 def constant_rows( 

358 self, 

359 fields: NamedValueAbstractSet[ddl.FieldSpec], 

360 *rows: dict, 

361 name: str | None = None, 

362 ) -> sqlalchemy.sql.FromClause: 

363 # Docstring inherited. 

364 # While SQLite supports VALUES, it doesn't support assigning a name 

365 # to that construct or the names of its columns, and hence there's no 

366 # way to actually join it into a SELECT query. It seems the only 

367 # alternative is something like: 

368 # 

369 # SELECT ? AS a, ? AS b 

370 # UNION ALL 

371 # SELECT ? AS a, ? AS b 

372 # 

373 selects = [ 

374 sqlalchemy.sql.select( 

375 *[sqlalchemy.sql.literal(row[field.name], field.dtype).label(field.name) for field in fields] 

376 ) 

377 for row in rows 

378 ] 

379 return sqlalchemy.sql.union_all(*selects).alias(name) 

380 

381 filename: str | None 

382 """Name of the file this database is connected to (`str` or `None`). 

383 

384 Set to `None` for in-memory databases. 

385 """