Coverage for python/lsst/daf/butler/registry/databases/sqlite.py: 17%

144 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-04-07 00:58 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ["SqliteDatabase"] 

24 

25import copy 

26import os 

27import sqlite3 

28import urllib.parse 

29from contextlib import closing 

30from typing import Any, ContextManager, Iterable, List, Optional 

31 

32import sqlalchemy 

33import sqlalchemy.dialects.sqlite 

34import sqlalchemy.ext.compiler 

35 

36from ...core import ddl 

37from ...core.named import NamedValueAbstractSet 

38from ..interfaces import Database, StaticTablesContext 

39 

40 

41def _onSqlite3Connect( 

42 dbapiConnection: sqlite3.Connection, connectionRecord: sqlalchemy.pool._ConnectionRecord 

43) -> None: 

44 assert isinstance(dbapiConnection, sqlite3.Connection) 

45 # Prevent pysqlite from emitting BEGIN and COMMIT statements. 

46 dbapiConnection.isolation_level = None 

47 # Enable foreign keys 

48 with closing(dbapiConnection.cursor()) as cursor: 

49 cursor.execute("PRAGMA foreign_keys=ON;") 

50 cursor.execute("PRAGMA busy_timeout = 300000;") # in ms, so 5min (way longer than should be needed) 

51 

52 

53class SqliteDatabase(Database): 

54 """An implementation of the `Database` interface for SQLite3. 

55 

56 Parameters 

57 ---------- 

58 connection : `sqlalchemy.engine.Connection` 

59 An existing connection created by a previous call to `connect`. 

60 origin : `int` 

61 An integer ID that should be used as the default for any datasets, 

62 quanta, or other entities that use a (autoincrement, origin) compound 

63 primary key. 

64 namespace : `str`, optional 

65 The namespace (schema) this database is associated with. If `None`, 

66 the default schema for the connection is used (which may be `None`). 

67 writeable : `bool`, optional 

68 If `True`, allow write operations on the database, including 

69 ``CREATE TABLE``. 

70 

71 Notes 

72 ----- 

73 The case where ``namespace is not None`` is not yet tested, and may be 

74 broken; we need an API for attaching to different databases in order to 

75 write those tests, but haven't yet worked out what is common/different 

76 across databases well enough to define it. 

77 """ 

78 

79 def __init__( 

80 self, 

81 *, 

82 engine: sqlalchemy.engine.Engine, 

83 origin: int, 

84 namespace: Optional[str] = None, 

85 writeable: bool = True, 

86 ): 

87 super().__init__(origin=origin, engine=engine, namespace=namespace) 

88 # Get the filename from a call to 'PRAGMA database_list'. 

89 with engine.connect() as connection: 

90 with closing(connection.connection.cursor()) as cursor: 

91 dbList = list(cursor.execute("PRAGMA database_list").fetchall()) 

92 if len(dbList) == 0: 

93 raise RuntimeError("No database in connection.") 

94 if namespace is None: 

95 namespace = "main" 

96 for _, dbname, filename in dbList: 

97 if dbname == namespace: 

98 break 

99 else: 

100 raise RuntimeError(f"No '{namespace}' database in connection.") 

101 if not filename: 

102 self.filename = None 

103 else: 

104 self.filename = filename 

105 self._writeable = writeable 

106 

107 @classmethod 

108 def makeDefaultUri(cls, root: str) -> Optional[str]: 

109 return "sqlite:///" + os.path.join(root, "gen3.sqlite3") 

110 

111 @classmethod 

112 def makeEngine( 

113 cls, uri: Optional[str] = None, *, filename: Optional[str] = None, writeable: bool = True 

114 ) -> sqlalchemy.engine.Engine: 

115 """Create a `sqlalchemy.engine.Engine` from a SQLAlchemy URI or 

116 filename. 

117 

118 Parameters 

119 ---------- 

120 uri : `str` 

121 A SQLAlchemy URI connection string. 

122 filename : `str` 

123 Name of the SQLite database file, or `None` to use an in-memory 

124 database. Ignored if ``uri is not None``. 

125 writeable : `bool`, optional 

126 If `True`, allow write operations on the database, including 

127 ``CREATE TABLE``. 

128 

129 Returns 

130 ------- 

131 engine : `sqlalchemy.engine.Engine` 

132 A database engine. 

133 """ 

134 # In order to be able to tell SQLite that we want a read-only or 

135 # read-write connection, we need to make the SQLite DBAPI connection 

136 # with a "URI"-based connection string. SQLAlchemy claims it can do 

137 # this 

138 # (https://docs.sqlalchemy.org/en/13/dialects/sqlite.html#uri-connections), 

139 # but it doesn't seem to work as advertised. To work around this, we 

140 # use the 'creator' argument to sqlalchemy.engine.create_engine, which 

141 # lets us pass a callable that creates the DBAPI connection. 

142 if uri is None: 

143 if filename is None: 

144 target = ":memory:" 

145 uri = "sqlite://" 

146 else: 

147 target = f"file:{filename}" 

148 uri = f"sqlite:///{filename}" 

149 else: 

150 parsed = urllib.parse.urlparse(uri) 

151 queries = parsed.query.split("&") 

152 if "uri=true" in queries: 

153 # This is a SQLAlchemy URI that is already trying to make a 

154 # SQLite connection via a SQLite URI, and hence there may 

155 # be URI components for both SQLite and SQLAlchemy. We 

156 # don't need to support that, and it'd be a 

157 # reimplementation of all of the (broken) logic in 

158 # SQLAlchemy for doing this, so we just don't. 

159 raise NotImplementedError("SQLite connection strings with 'uri=true' are not supported.") 

160 # This is just a SQLAlchemy URI with a non-URI SQLite 

161 # connection string inside it. Pull that out so we can use it 

162 # in the creator call. 

163 if parsed.path.startswith("/"): 

164 filename = parsed.path[1:] 

165 target = f"file:{filename}" 

166 else: 

167 filename = None 

168 target = ":memory:" 

169 if filename is None: 

170 if not writeable: 

171 raise NotImplementedError("Read-only :memory: databases are not supported.") 

172 else: 

173 if writeable: 

174 target += "?mode=rwc&uri=true" 

175 else: 

176 target += "?mode=ro&uri=true" 

177 

178 def creator() -> sqlite3.Connection: 

179 return sqlite3.connect(target, check_same_thread=False, uri=True) 

180 

181 engine = sqlalchemy.engine.create_engine(uri, creator=creator) 

182 

183 sqlalchemy.event.listen(engine, "connect", _onSqlite3Connect) 

184 

185 def _onSqlite3Begin(connection: sqlalchemy.engine.Connection) -> sqlalchemy.engine.Connection: 

186 assert connection.dialect.name == "sqlite" 

187 # Replace pysqlite's buggy transaction handling that never BEGINs 

188 # with our own that does, and tell SQLite to try to acquire a lock 

189 # as soon as we start a transaction that might involve writes (this 

190 # should lead to more blocking and fewer deadlocks). 

191 if writeable: 

192 connection.execute(sqlalchemy.text("BEGIN IMMEDIATE")) 

193 else: 

194 connection.execute(sqlalchemy.text("BEGIN")) 

195 return connection 

196 

197 sqlalchemy.event.listen(engine, "begin", _onSqlite3Begin) 

198 

199 return engine 

200 

201 @classmethod 

202 def fromEngine( 

203 cls, 

204 engine: sqlalchemy.engine.Engine, 

205 *, 

206 origin: int, 

207 namespace: Optional[str] = None, 

208 writeable: bool = True, 

209 ) -> Database: 

210 return cls(engine=engine, origin=origin, writeable=writeable, namespace=namespace) 

211 

212 def isWriteable(self) -> bool: 

213 return self._writeable 

214 

215 def __str__(self) -> str: 

216 if self.filename: 

217 return f"SQLite3@{self.filename}" 

218 else: 

219 return "SQLite3@:memory:" 

220 

221 def _lockTables( 

222 self, connection: sqlalchemy.engine.Connection, tables: Iterable[sqlalchemy.schema.Table] = () 

223 ) -> None: 

224 # Docstring inherited. 

225 # Our SQLite database always acquires full-database locks at the 

226 # beginning of a transaction, so there's no need to acquire table-level 

227 # locks - which is good, because SQLite doesn't have table-level 

228 # locking. 

229 pass 

230 

231 # MyPy claims that the return type here isn't covariant with the return 

232 # type of the base class method, which is formally correct but irrelevant 

233 # - the base class return type is _GeneratorContextManager, but only 

234 # because it's generated by the contextmanager decorator. 

235 def declareStaticTables(self, *, create: bool) -> ContextManager[StaticTablesContext]: # type: ignore 

236 # If the user asked for an in-memory, writeable database, then we may 

237 # need to re-create schema even if create=False because schema can be 

238 # lost on re-connect. This is only really relevant for tests, and it's 

239 # convenient there. 

240 if self.filename is None and self.isWriteable(): 

241 inspector = sqlalchemy.inspect(self._engine) 

242 tables = inspector.get_table_names(schema=self.namespace) 

243 if not tables: 

244 create = True 

245 return super().declareStaticTables(create=create) 

246 

247 def _convertFieldSpec( 

248 self, table: str, spec: ddl.FieldSpec, metadata: sqlalchemy.MetaData, **kwargs: Any 

249 ) -> sqlalchemy.schema.Column: 

250 if spec.autoincrement: 

251 if not spec.primaryKey: 

252 raise RuntimeError( 

253 f"Autoincrement field {table}.{spec.name} that is not a primary key is not supported." 

254 ) 

255 if spec.dtype != sqlalchemy.Integer: 

256 # SQLite's autoincrement is really limited; it only works if 

257 # the column type is exactly "INTEGER". But it also doesn't 

258 # care about the distinctions between different integer types, 

259 # so it's safe to change it. 

260 spec = copy.copy(spec) 

261 spec.dtype = sqlalchemy.Integer 

262 return super()._convertFieldSpec(table, spec, metadata, **kwargs) 

263 

264 def _makeColumnConstraints(self, table: str, spec: ddl.FieldSpec) -> List[sqlalchemy.CheckConstraint]: 

265 # For sqlite we force constraints on all string columns since sqlite 

266 # ignores everything otherwise and this leads to problems with 

267 # other databases. 

268 

269 constraints = [] 

270 if spec.isStringType(): 

271 name = self.shrinkDatabaseEntityName("_".join([table, "len", spec.name])) 

272 constraints.append( 

273 sqlalchemy.CheckConstraint( 

274 f'length("{spec.name}")<={spec.length}' 

275 # Oracle converts 

276 # empty strings to 

277 # NULL so check 

278 f' AND length("{spec.name}")>=1', 

279 name=name, 

280 ) 

281 ) 

282 

283 constraints.extend(super()._makeColumnConstraints(table, spec)) 

284 return constraints 

285 

286 def _convertTableSpec( 

287 self, name: str, spec: ddl.TableSpec, metadata: sqlalchemy.MetaData, **kwargs: Any 

288 ) -> sqlalchemy.schema.Table: 

289 primaryKeyFieldNames = set(field.name for field in spec.fields if field.primaryKey) 

290 autoincrFieldNames = set(field.name for field in spec.fields if field.autoincrement) 

291 if len(autoincrFieldNames) > 1: 

292 raise RuntimeError("At most one autoincrement field per table is allowed.") 

293 if len(primaryKeyFieldNames) > 1 and len(autoincrFieldNames) > 0: 

294 # SQLite's default rowid-based autoincrement doesn't work if the 

295 # field is just one field in a compound primary key. As a 

296 # workaround, we create an extra table with just one column that 

297 # we'll insert into to generate those IDs. That's only safe if 

298 # that single-column table's records are already unique with just 

299 # the autoincrement field, not the rest of the primary key. In 

300 # practice, that means the single-column table's records are those 

301 # for which origin == self.origin. 

302 (autoincrFieldName,) = autoincrFieldNames 

303 otherPrimaryKeyFieldNames = primaryKeyFieldNames - autoincrFieldNames 

304 if otherPrimaryKeyFieldNames != {"origin"}: 

305 # We need the only other field in the key to be 'origin'. 

306 raise NotImplementedError( 

307 "Compound primary keys with an autoincrement are only supported in SQLite " 

308 "if the only non-autoincrement primary key field is 'origin'." 

309 ) 

310 if not spec.recycleIds: 

311 kwargs = dict(kwargs, sqlite_autoincrement=True) 

312 return super()._convertTableSpec(name, spec, metadata, **kwargs) 

313 

314 def replace(self, table: sqlalchemy.schema.Table, *rows: dict) -> None: 

315 self.assertTableWriteable(table, f"Cannot replace into read-only table {table}.") 

316 if not rows: 

317 return 

318 query = sqlalchemy.dialects.sqlite.insert(table) 

319 excluded = query.excluded 

320 data = { 

321 column.name: getattr(excluded, column.name) 

322 for column in table.columns 

323 if column.name not in table.primary_key 

324 } 

325 query = query.on_conflict_do_update(index_elements=table.primary_key, set_=data) 

326 with self._transaction() as (_, connection): 

327 connection.execute(query, rows) 

328 

329 def ensure(self, table: sqlalchemy.schema.Table, *rows: dict, primary_key_only: bool = False) -> int: 

330 self.assertTableWriteable(table, f"Cannot ensure into read-only table {table}.") 

331 if not rows: 

332 return 0 

333 query = sqlalchemy.dialects.sqlite.insert(table) 

334 if primary_key_only: 

335 query = query.on_conflict_do_nothing(index_elements=table.primary_key) 

336 else: 

337 query = query.on_conflict_do_nothing() 

338 with self._transaction() as (_, connection): 

339 return connection.execute(query, rows).rowcount 

340 

341 def constant_rows( 

342 self, 

343 fields: NamedValueAbstractSet[ddl.FieldSpec], 

344 *rows: dict, 

345 name: Optional[str] = None, 

346 ) -> sqlalchemy.sql.FromClause: 

347 # Docstring inherited. 

348 # While SQLite supports VALUES, it doesn't support assigning a name 

349 # to that construct or the names of its columns, and hence there's no 

350 # way to actually join it into a SELECT query. It seems the only 

351 # alternative is something like: 

352 # 

353 # SELECT ? AS a, ? AS b 

354 # UNION ALL 

355 # SELECT ? AS a, ? AS b 

356 # 

357 selects = [ 

358 sqlalchemy.sql.select( 

359 *[sqlalchemy.sql.literal(row[field.name], field.dtype).label(field.name) for field in fields] 

360 ) 

361 for row in rows 

362 ] 

363 return sqlalchemy.sql.union_all(*selects).alias(name) 

364 

365 filename: Optional[str] 

366 """Name of the file this database is connected to (`str` or `None`). 

367 

368 Set to `None` for in-memory databases. 

369 """