Coverage for python/lsst/daf/butler/registry/databases/sqlite.py: 15%

143 statements  

« prev     ^ index     » next       coverage.py v7.5.0, created at 2024-04-24 23:50 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ["SqliteDatabase"] 

24 

25import copy 

26import os 

27import sqlite3 

28import urllib.parse 

29from contextlib import closing 

30from typing import Any, ContextManager, Iterable, List, Optional 

31 

32import sqlalchemy 

33import sqlalchemy.dialects.sqlite 

34import sqlalchemy.ext.compiler 

35 

36from ...core import ddl 

37from ..interfaces import Database, StaticTablesContext 

38 

39 

40def _onSqlite3Connect( 

41 dbapiConnection: sqlite3.Connection, connectionRecord: sqlalchemy.pool._ConnectionRecord 

42) -> None: 

43 assert isinstance(dbapiConnection, sqlite3.Connection) 

44 # Prevent pysqlite from emitting BEGIN and COMMIT statements. 

45 dbapiConnection.isolation_level = None 

46 # Enable foreign keys 

47 with closing(dbapiConnection.cursor()) as cursor: 

48 cursor.execute("PRAGMA foreign_keys=ON;") 

49 cursor.execute("PRAGMA busy_timeout = 300000;") # in ms, so 5min (way longer than should be needed) 

50 

51 

52class SqliteDatabase(Database): 

53 """An implementation of the `Database` interface for SQLite3. 

54 

55 Parameters 

56 ---------- 

57 connection : `sqlalchemy.engine.Connection` 

58 An existing connection created by a previous call to `connect`. 

59 origin : `int` 

60 An integer ID that should be used as the default for any datasets, 

61 quanta, or other entities that use a (autoincrement, origin) compound 

62 primary key. 

63 namespace : `str`, optional 

64 The namespace (schema) this database is associated with. If `None`, 

65 the default schema for the connection is used (which may be `None`). 

66 writeable : `bool`, optional 

67 If `True`, allow write operations on the database, including 

68 ``CREATE TABLE``. 

69 

70 Notes 

71 ----- 

72 The case where ``namespace is not None`` is not yet tested, and may be 

73 broken; we need an API for attaching to different databases in order to 

74 write those tests, but haven't yet worked out what is common/different 

75 across databases well enough to define it. 

76 """ 

77 

78 def __init__( 

79 self, 

80 *, 

81 engine: sqlalchemy.engine.Engine, 

82 origin: int, 

83 namespace: Optional[str] = None, 

84 writeable: bool = True, 

85 ): 

86 super().__init__(origin=origin, engine=engine, namespace=namespace) 

87 # Get the filename from a call to 'PRAGMA database_list'. 

88 with engine.connect() as connection: 

89 with closing(connection.connection.cursor()) as cursor: 

90 dbList = list(cursor.execute("PRAGMA database_list").fetchall()) 

91 if len(dbList) == 0: 

92 raise RuntimeError("No database in connection.") 

93 if namespace is None: 

94 namespace = "main" 

95 for _, dbname, filename in dbList: 

96 if dbname == namespace: 

97 break 

98 else: 

99 raise RuntimeError(f"No '{namespace}' database in connection.") 

100 if not filename: 

101 self.filename = None 

102 else: 

103 self.filename = filename 

104 self._writeable = writeable 

105 

106 @classmethod 

107 def makeDefaultUri(cls, root: str) -> Optional[str]: 

108 return "sqlite:///" + os.path.join(root, "gen3.sqlite3") 

109 

110 @classmethod 

111 def makeEngine( 

112 cls, uri: Optional[str] = None, *, filename: Optional[str] = None, writeable: bool = True 

113 ) -> sqlalchemy.engine.Engine: 

114 """Create a `sqlalchemy.engine.Engine` from a SQLAlchemy URI or 

115 filename. 

116 

117 Parameters 

118 ---------- 

119 uri : `str` 

120 A SQLAlchemy URI connection string. 

121 filename : `str` 

122 Name of the SQLite database file, or `None` to use an in-memory 

123 database. Ignored if ``uri is not None``. 

124 writeable : `bool`, optional 

125 If `True`, allow write operations on the database, including 

126 ``CREATE TABLE``. 

127 

128 Returns 

129 ------- 

130 engine : `sqlalchemy.engine.Engine` 

131 A database engine. 

132 """ 

133 # In order to be able to tell SQLite that we want a read-only or 

134 # read-write connection, we need to make the SQLite DBAPI connection 

135 # with a "URI"-based connection string. SQLAlchemy claims it can do 

136 # this 

137 # (https://docs.sqlalchemy.org/en/13/dialects/sqlite.html#uri-connections), 

138 # but it doesn't seem to work as advertised. To work around this, we 

139 # use the 'creator' argument to sqlalchemy.engine.create_engine, which 

140 # lets us pass a callable that creates the DBAPI connection. 

141 if uri is None: 

142 if filename is None: 

143 target = ":memory:" 

144 uri = "sqlite://" 

145 else: 

146 target = f"file:{filename}" 

147 uri = f"sqlite:///{filename}" 

148 else: 

149 parsed = urllib.parse.urlparse(uri) 

150 queries = parsed.query.split("&") 

151 if "uri=true" in queries: 

152 # This is a SQLAlchemy URI that is already trying to make a 

153 # SQLite connection via a SQLite URI, and hence there may 

154 # be URI components for both SQLite and SQLAlchemy. We 

155 # don't need to support that, and it'd be a 

156 # reimplementation of all of the (broken) logic in 

157 # SQLAlchemy for doing this, so we just don't. 

158 raise NotImplementedError("SQLite connection strings with 'uri=true' are not supported.") 

159 # This is just a SQLAlchemy URI with a non-URI SQLite 

160 # connection string inside it. Pull that out so we can use it 

161 # in the creator call. 

162 if parsed.path.startswith("/"): 

163 filename = parsed.path[1:] 

164 target = f"file:{filename}" 

165 else: 

166 filename = None 

167 target = ":memory:" 

168 if filename is None: 

169 if not writeable: 

170 raise NotImplementedError("Read-only :memory: databases are not supported.") 

171 else: 

172 if writeable: 

173 target += "?mode=rwc&uri=true" 

174 else: 

175 target += "?mode=ro&uri=true" 

176 

177 def creator() -> sqlite3.Connection: 

178 return sqlite3.connect(target, check_same_thread=False, uri=True) 

179 

180 engine = sqlalchemy.engine.create_engine(uri, creator=creator) 

181 

182 sqlalchemy.event.listen(engine, "connect", _onSqlite3Connect) 

183 

184 def _onSqlite3Begin(connection: sqlalchemy.engine.Connection) -> sqlalchemy.engine.Connection: 

185 assert connection.dialect.name == "sqlite" 

186 # Replace pysqlite's buggy transaction handling that never BEGINs 

187 # with our own that does, and tell SQLite to try to acquire a lock 

188 # as soon as we start a transaction that might involve writes (this 

189 # should lead to more blocking and fewer deadlocks). 

190 if writeable: 

191 connection.execute(sqlalchemy.text("BEGIN IMMEDIATE")) 

192 else: 

193 connection.execute(sqlalchemy.text("BEGIN")) 

194 return connection 

195 

196 sqlalchemy.event.listen(engine, "begin", _onSqlite3Begin) 

197 

198 return engine 

199 

200 @classmethod 

201 def fromEngine( 

202 cls, 

203 engine: sqlalchemy.engine.Engine, 

204 *, 

205 origin: int, 

206 namespace: Optional[str] = None, 

207 writeable: bool = True, 

208 ) -> Database: 

209 return cls(engine=engine, origin=origin, writeable=writeable, namespace=namespace) 

210 

211 def isWriteable(self) -> bool: 

212 return self._writeable 

213 

214 def __str__(self) -> str: 

215 if self.filename: 

216 return f"SQLite3@{self.filename}" 

217 else: 

218 return "SQLite3@:memory:" 

219 

220 def _lockTables( 

221 self, connection: sqlalchemy.engine.Connection, tables: Iterable[sqlalchemy.schema.Table] = () 

222 ) -> None: 

223 # Docstring inherited. 

224 # Our SQLite database always acquires full-database locks at the 

225 # beginning of a transaction, so there's no need to acquire table-level 

226 # locks - which is good, because SQLite doesn't have table-level 

227 # locking. 

228 pass 

229 

230 # MyPy claims that the return type here isn't covariant with the return 

231 # type of the base class method, which is formally correct but irrelevant 

232 # - the base class return type is _GeneratorContextManager, but only 

233 # because it's generated by the contextmanager decorator. 

234 def declareStaticTables(self, *, create: bool) -> ContextManager[StaticTablesContext]: # type: ignore 

235 # If the user asked for an in-memory, writeable database, then we may 

236 # need to re-create schema even if create=False because schema can be 

237 # lost on re-connect. This is only really relevant for tests, and it's 

238 # convenient there. 

239 if self.filename is None and self.isWriteable(): 

240 inspector = sqlalchemy.inspect(self._engine) 

241 tables = inspector.get_table_names(schema=self.namespace) 

242 if not tables: 

243 create = True 

244 return super().declareStaticTables(create=create) 

245 

246 def _convertFieldSpec( 

247 self, table: str, spec: ddl.FieldSpec, metadata: sqlalchemy.MetaData, **kwargs: Any 

248 ) -> sqlalchemy.schema.Column: 

249 if spec.autoincrement: 

250 if not spec.primaryKey: 

251 raise RuntimeError( 

252 f"Autoincrement field {table}.{spec.name} that is not a primary key is not supported." 

253 ) 

254 if spec.dtype != sqlalchemy.Integer: 

255 # SQLite's autoincrement is really limited; it only works if 

256 # the column type is exactly "INTEGER". But it also doesn't 

257 # care about the distinctions between different integer types, 

258 # so it's safe to change it. 

259 spec = copy.copy(spec) 

260 spec.dtype = sqlalchemy.Integer 

261 return super()._convertFieldSpec(table, spec, metadata, **kwargs) 

262 

263 def _makeColumnConstraints(self, table: str, spec: ddl.FieldSpec) -> List[sqlalchemy.CheckConstraint]: 

264 # For sqlite we force constraints on all string columns since sqlite 

265 # ignores everything otherwise and this leads to problems with 

266 # other databases. 

267 

268 constraints = [] 

269 if spec.isStringType(): 

270 name = self.shrinkDatabaseEntityName("_".join([table, "len", spec.name])) 

271 constraints.append( 

272 sqlalchemy.CheckConstraint( 

273 f"length({spec.name})<={spec.length}" 

274 # Oracle converts 

275 # empty strings to 

276 # NULL so check 

277 f" AND length({spec.name})>=1", 

278 name=name, 

279 ) 

280 ) 

281 

282 constraints.extend(super()._makeColumnConstraints(table, spec)) 

283 return constraints 

284 

285 def _convertTableSpec( 

286 self, name: str, spec: ddl.TableSpec, metadata: sqlalchemy.MetaData, **kwargs: Any 

287 ) -> sqlalchemy.schema.Table: 

288 primaryKeyFieldNames = set(field.name for field in spec.fields if field.primaryKey) 

289 autoincrFieldNames = set(field.name for field in spec.fields if field.autoincrement) 

290 if len(autoincrFieldNames) > 1: 

291 raise RuntimeError("At most one autoincrement field per table is allowed.") 

292 if len(primaryKeyFieldNames) > 1 and len(autoincrFieldNames) > 0: 

293 # SQLite's default rowid-based autoincrement doesn't work if the 

294 # field is just one field in a compound primary key. As a 

295 # workaround, we create an extra table with just one column that 

296 # we'll insert into to generate those IDs. That's only safe if 

297 # that single-column table's records are already unique with just 

298 # the autoincrement field, not the rest of the primary key. In 

299 # practice, that means the single-column table's records are those 

300 # for which origin == self.origin. 

301 (autoincrFieldName,) = autoincrFieldNames 

302 otherPrimaryKeyFieldNames = primaryKeyFieldNames - autoincrFieldNames 

303 if otherPrimaryKeyFieldNames != {"origin"}: 

304 # We need the only other field in the key to be 'origin'. 

305 raise NotImplementedError( 

306 "Compound primary keys with an autoincrement are only supported in SQLite " 

307 "if the only non-autoincrement primary key field is 'origin'." 

308 ) 

309 if not spec.recycleIds: 

310 kwargs = dict(kwargs, sqlite_autoincrement=True) 

311 return super()._convertTableSpec(name, spec, metadata, **kwargs) 

312 

313 def replace(self, table: sqlalchemy.schema.Table, *rows: dict) -> None: 

314 self.assertTableWriteable(table, f"Cannot replace into read-only table {table}.") 

315 if not rows: 

316 return 

317 query = sqlalchemy.dialects.sqlite.insert(table) 

318 excluded = query.excluded 

319 data = { 

320 column.name: getattr(excluded, column.name) 

321 for column in table.columns 

322 if column.name not in table.primary_key 

323 } 

324 query = query.on_conflict_do_update(index_elements=table.primary_key, set_=data) 

325 with self._transaction() as (_, connection): 

326 connection.execute(query, rows) 

327 

328 def ensure(self, table: sqlalchemy.schema.Table, *rows: dict, primary_key_only: bool = False) -> int: 

329 self.assertTableWriteable(table, f"Cannot ensure into read-only table {table}.") 

330 if not rows: 

331 return 0 

332 query = sqlalchemy.dialects.sqlite.insert(table) 

333 if primary_key_only: 

334 query = query.on_conflict_do_nothing(index_elements=table.primary_key) 

335 else: 

336 query = query.on_conflict_do_nothing() 

337 with self._transaction() as (_, connection): 

338 return connection.execute(query, rows).rowcount 

339 

340 filename: Optional[str] 

341 """Name of the file this database is connected to (`str` or `None`). 

342 

343 Set to `None` for in-memory databases. 

344 """