Coverage for python/lsst/daf/butler/registry/databases/sqlite.py: 17%

147 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-07-12 10:56 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ["SqliteDatabase"] 

24 

25import copy 

26import os 

27import sqlite3 

28import urllib.parse 

29from collections.abc import Iterable 

30from contextlib import closing 

31from typing import Any, ContextManager 

32 

33import sqlalchemy 

34import sqlalchemy.dialects.sqlite 

35import sqlalchemy.ext.compiler 

36 

37from ...core import ddl 

38from ...core.named import NamedValueAbstractSet 

39from ..interfaces import Database, StaticTablesContext 

40 

41 

42def _onSqlite3Connect( 

43 dbapiConnection: sqlite3.Connection, connectionRecord: sqlalchemy.pool._ConnectionRecord 

44) -> None: 

45 assert isinstance(dbapiConnection, sqlite3.Connection) 

46 # Prevent pysqlite from emitting BEGIN and COMMIT statements. 

47 dbapiConnection.isolation_level = None 

48 # Enable foreign keys 

49 with closing(dbapiConnection.cursor()) as cursor: 

50 cursor.execute("PRAGMA foreign_keys=ON;") 

51 cursor.execute("PRAGMA busy_timeout = 300000;") # in ms, so 5min (way longer than should be needed) 

52 

53 

54class SqliteDatabase(Database): 

55 """An implementation of the `Database` interface for SQLite3. 

56 

57 Parameters 

58 ---------- 

59 connection : `sqlalchemy.engine.Connection` 

60 An existing connection created by a previous call to `connect`. 

61 origin : `int` 

62 An integer ID that should be used as the default for any datasets, 

63 quanta, or other entities that use a (autoincrement, origin) compound 

64 primary key. 

65 namespace : `str`, optional 

66 The namespace (schema) this database is associated with. If `None`, 

67 the default schema for the connection is used (which may be `None`). 

68 writeable : `bool`, optional 

69 If `True`, allow write operations on the database, including 

70 ``CREATE TABLE``. 

71 

72 Notes 

73 ----- 

74 The case where ``namespace is not None`` is not yet tested, and may be 

75 broken; we need an API for attaching to different databases in order to 

76 write those tests, but haven't yet worked out what is common/different 

77 across databases well enough to define it. 

78 """ 

79 

80 def __init__( 

81 self, 

82 *, 

83 engine: sqlalchemy.engine.Engine, 

84 origin: int, 

85 namespace: str | None = None, 

86 writeable: bool = True, 

87 ): 

88 super().__init__(origin=origin, engine=engine, namespace=namespace) 

89 # Get the filename from a call to 'PRAGMA database_list'. 

90 with engine.connect() as connection: 

91 with closing(connection.connection.cursor()) as cursor: 

92 dbList = list(cursor.execute("PRAGMA database_list").fetchall()) 

93 if len(dbList) == 0: 

94 raise RuntimeError("No database in connection.") 

95 if namespace is None: 

96 namespace = "main" 

97 for _, dbname, filename in dbList: 

98 if dbname == namespace: 

99 break 

100 else: 

101 raise RuntimeError(f"No '{namespace}' database in connection.") 

102 if not filename: 

103 self.filename = None 

104 else: 

105 self.filename = filename 

106 self._writeable = writeable 

107 

108 @classmethod 

109 def makeDefaultUri(cls, root: str) -> str | None: 

110 return "sqlite:///" + os.path.join(root, "gen3.sqlite3") 

111 

112 @classmethod 

113 def makeEngine( 

114 cls, 

115 uri: str | sqlalchemy.engine.URL | None = None, 

116 *, 

117 filename: str | None = None, 

118 writeable: bool = True, 

119 ) -> sqlalchemy.engine.Engine: 

120 """Create a `sqlalchemy.engine.Engine` from a SQLAlchemy URI or 

121 filename. 

122 

123 Parameters 

124 ---------- 

125 uri : `str` or `sqlalchemy.engine.URL`, optional 

126 A SQLAlchemy URI connection string. 

127 filename : `str` 

128 Name of the SQLite database file, or `None` to use an in-memory 

129 database. Ignored if ``uri is not None``. 

130 writeable : `bool`, optional 

131 If `True`, allow write operations on the database, including 

132 ``CREATE TABLE``. 

133 

134 Returns 

135 ------- 

136 engine : `sqlalchemy.engine.Engine` 

137 A database engine. 

138 """ 

139 # In order to be able to tell SQLite that we want a read-only or 

140 # read-write connection, we need to make the SQLite DBAPI connection 

141 # with a "URI"-based connection string. SQLAlchemy claims it can do 

142 # this 

143 # (https://docs.sqlalchemy.org/en/13/dialects/sqlite.html#uri-connections), 

144 # but it doesn't seem to work as advertised. To work around this, we 

145 # use the 'creator' argument to sqlalchemy.engine.create_engine, which 

146 # lets us pass a callable that creates the DBAPI connection. 

147 if uri is None: 

148 if filename is None: 

149 target = ":memory:" 

150 uri = "sqlite://" 

151 else: 

152 target = f"file:{filename}" 

153 uri = f"sqlite:///{filename}" 

154 else: 

155 if isinstance(uri, sqlalchemy.engine.URL): 

156 # We have to parse strings anyway, so convert it to string. 

157 uri = uri.render_as_string(hide_password=False) 

158 parsed = urllib.parse.urlparse(uri) 

159 queries = parsed.query.split("&") 

160 if "uri=true" in queries: 

161 # This is a SQLAlchemy URI that is already trying to make a 

162 # SQLite connection via a SQLite URI, and hence there may 

163 # be URI components for both SQLite and SQLAlchemy. We 

164 # don't need to support that, and it'd be a 

165 # reimplementation of all of the (broken) logic in 

166 # SQLAlchemy for doing this, so we just don't. 

167 raise NotImplementedError("SQLite connection strings with 'uri=true' are not supported.") 

168 # This is just a SQLAlchemy URI with a non-URI SQLite 

169 # connection string inside it. Pull that out so we can use it 

170 # in the creator call. 

171 if parsed.path.startswith("/"): 

172 filename = parsed.path[1:] 

173 target = f"file:{filename}" 

174 else: 

175 filename = None 

176 target = ":memory:" 

177 if filename is None: 

178 if not writeable: 

179 raise NotImplementedError("Read-only :memory: databases are not supported.") 

180 else: 

181 if writeable: 

182 target += "?mode=rwc&uri=true" 

183 else: 

184 target += "?mode=ro&uri=true" 

185 

186 def creator() -> sqlite3.Connection: 

187 return sqlite3.connect(target, check_same_thread=False, uri=True) 

188 

189 engine = sqlalchemy.engine.create_engine(uri, creator=creator) 

190 

191 sqlalchemy.event.listen(engine, "connect", _onSqlite3Connect) 

192 

193 def _onSqlite3Begin(connection: sqlalchemy.engine.Connection) -> sqlalchemy.engine.Connection: 

194 assert connection.dialect.name == "sqlite" 

195 # Replace pysqlite's buggy transaction handling that never BEGINs 

196 # with our own that does, and tell SQLite to try to acquire a lock 

197 # as soon as we start a transaction that might involve writes (this 

198 # should lead to more blocking and fewer deadlocks). 

199 if writeable: 

200 connection.execute(sqlalchemy.text("BEGIN IMMEDIATE")) 

201 else: 

202 connection.execute(sqlalchemy.text("BEGIN")) 

203 return connection 

204 

205 sqlalchemy.event.listen(engine, "begin", _onSqlite3Begin) 

206 

207 return engine 

208 

209 @classmethod 

210 def fromEngine( 

211 cls, 

212 engine: sqlalchemy.engine.Engine, 

213 *, 

214 origin: int, 

215 namespace: str | None = None, 

216 writeable: bool = True, 

217 ) -> Database: 

218 return cls(engine=engine, origin=origin, writeable=writeable, namespace=namespace) 

219 

220 def isWriteable(self) -> bool: 

221 return self._writeable 

222 

223 def __str__(self) -> str: 

224 if self.filename: 

225 return f"SQLite3@{self.filename}" 

226 else: 

227 return "SQLite3@:memory:" 

228 

229 def _lockTables( 

230 self, connection: sqlalchemy.engine.Connection, tables: Iterable[sqlalchemy.schema.Table] = () 

231 ) -> None: 

232 # Docstring inherited. 

233 # Our SQLite database always acquires full-database locks at the 

234 # beginning of a transaction, so there's no need to acquire table-level 

235 # locks - which is good, because SQLite doesn't have table-level 

236 # locking. 

237 pass 

238 

239 # MyPy claims that the return type here isn't covariant with the return 

240 # type of the base class method, which is formally correct but irrelevant 

241 # - the base class return type is _GeneratorContextManager, but only 

242 # because it's generated by the contextmanager decorator. 

243 def declareStaticTables(self, *, create: bool) -> ContextManager[StaticTablesContext]: # type: ignore 

244 # If the user asked for an in-memory, writeable database, then we may 

245 # need to re-create schema even if create=False because schema can be 

246 # lost on re-connect. This is only really relevant for tests, and it's 

247 # convenient there. 

248 if self.filename is None and self.isWriteable(): 

249 inspector = sqlalchemy.inspect(self._engine) 

250 tables = inspector.get_table_names(schema=self.namespace) 

251 if not tables: 

252 create = True 

253 return super().declareStaticTables(create=create) 

254 

255 def _convertFieldSpec( 

256 self, table: str, spec: ddl.FieldSpec, metadata: sqlalchemy.MetaData, **kwargs: Any 

257 ) -> sqlalchemy.schema.Column: 

258 if spec.autoincrement: 

259 if not spec.primaryKey: 

260 raise RuntimeError( 

261 f"Autoincrement field {table}.{spec.name} that is not a primary key is not supported." 

262 ) 

263 if spec.dtype != sqlalchemy.Integer: 

264 # SQLite's autoincrement is really limited; it only works if 

265 # the column type is exactly "INTEGER". But it also doesn't 

266 # care about the distinctions between different integer types, 

267 # so it's safe to change it. 

268 spec = copy.copy(spec) 

269 spec.dtype = sqlalchemy.Integer 

270 return super()._convertFieldSpec(table, spec, metadata, **kwargs) 

271 

272 def _makeColumnConstraints(self, table: str, spec: ddl.FieldSpec) -> list[sqlalchemy.CheckConstraint]: 

273 # For sqlite we force constraints on all string columns since sqlite 

274 # ignores everything otherwise and this leads to problems with 

275 # other databases. 

276 

277 constraints = [] 

278 if spec.isStringType(): 

279 name = self.shrinkDatabaseEntityName("_".join([table, "len", spec.name])) 

280 constraints.append( 

281 sqlalchemy.CheckConstraint( 

282 f'length("{spec.name}")<={spec.length}' 

283 # Oracle converts 

284 # empty strings to 

285 # NULL so check 

286 f' AND length("{spec.name}")>=1', 

287 name=name, 

288 ) 

289 ) 

290 

291 constraints.extend(super()._makeColumnConstraints(table, spec)) 

292 return constraints 

293 

294 def _convertTableSpec( 

295 self, name: str, spec: ddl.TableSpec, metadata: sqlalchemy.MetaData, **kwargs: Any 

296 ) -> sqlalchemy.schema.Table: 

297 primaryKeyFieldNames = {field.name for field in spec.fields if field.primaryKey} 

298 autoincrFieldNames = {field.name for field in spec.fields if field.autoincrement} 

299 if len(autoincrFieldNames) > 1: 

300 raise RuntimeError("At most one autoincrement field per table is allowed.") 

301 if len(primaryKeyFieldNames) > 1 and len(autoincrFieldNames) > 0: 

302 # SQLite's default rowid-based autoincrement doesn't work if the 

303 # field is just one field in a compound primary key. As a 

304 # workaround, we create an extra table with just one column that 

305 # we'll insert into to generate those IDs. That's only safe if 

306 # that single-column table's records are already unique with just 

307 # the autoincrement field, not the rest of the primary key. In 

308 # practice, that means the single-column table's records are those 

309 # for which origin == self.origin. 

310 (autoincrFieldName,) = autoincrFieldNames 

311 otherPrimaryKeyFieldNames = primaryKeyFieldNames - autoincrFieldNames 

312 if otherPrimaryKeyFieldNames != {"origin"}: 

313 # We need the only other field in the key to be 'origin'. 

314 raise NotImplementedError( 

315 "Compound primary keys with an autoincrement are only supported in SQLite " 

316 "if the only non-autoincrement primary key field is 'origin'." 

317 ) 

318 if not spec.recycleIds: 

319 kwargs = dict(kwargs, sqlite_autoincrement=True) 

320 return super()._convertTableSpec(name, spec, metadata, **kwargs) 

321 

322 def replace(self, table: sqlalchemy.schema.Table, *rows: dict) -> None: 

323 self.assertTableWriteable(table, f"Cannot replace into read-only table {table}.") 

324 if not rows: 

325 return 

326 query = sqlalchemy.dialects.sqlite.insert(table) 

327 excluded = query.excluded 

328 data = { 

329 column.name: getattr(excluded, column.name) 

330 for column in table.columns 

331 if column.name not in table.primary_key 

332 } 

333 query = query.on_conflict_do_update(index_elements=table.primary_key, set_=data) 

334 with self._transaction() as (_, connection): 

335 connection.execute(query, rows) 

336 

337 def ensure(self, table: sqlalchemy.schema.Table, *rows: dict, primary_key_only: bool = False) -> int: 

338 self.assertTableWriteable(table, f"Cannot ensure into read-only table {table}.") 

339 if not rows: 

340 return 0 

341 query = sqlalchemy.dialects.sqlite.insert(table) 

342 if primary_key_only: 

343 query = query.on_conflict_do_nothing(index_elements=table.primary_key) 

344 else: 

345 query = query.on_conflict_do_nothing() 

346 with self._transaction() as (_, connection): 

347 return connection.execute(query, rows).rowcount 

348 

349 def constant_rows( 

350 self, 

351 fields: NamedValueAbstractSet[ddl.FieldSpec], 

352 *rows: dict, 

353 name: str | None = None, 

354 ) -> sqlalchemy.sql.FromClause: 

355 # Docstring inherited. 

356 # While SQLite supports VALUES, it doesn't support assigning a name 

357 # to that construct or the names of its columns, and hence there's no 

358 # way to actually join it into a SELECT query. It seems the only 

359 # alternative is something like: 

360 # 

361 # SELECT ? AS a, ? AS b 

362 # UNION ALL 

363 # SELECT ? AS a, ? AS b 

364 # 

365 selects = [ 

366 sqlalchemy.sql.select( 

367 *[sqlalchemy.sql.literal(row[field.name], field.dtype).label(field.name) for field in fields] 

368 ) 

369 for row in rows 

370 ] 

371 return sqlalchemy.sql.union_all(*selects).alias(name) 

372 

373 filename: str | None 

374 """Name of the file this database is connected to (`str` or `None`). 

375 

376 Set to `None` for in-memory databases. 

377 """