Coverage for python/lsst/daf/butler/registry/databases/sqlite.py: 16%

146 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-06 02:34 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ["SqliteDatabase"] 

24 

25import copy 

26import os 

27import sqlite3 

28import urllib.parse 

29from contextlib import closing 

30from typing import Any, ContextManager, Iterable, List, Optional 

31 

32import sqlalchemy 

33import sqlalchemy.dialects.sqlite 

34import sqlalchemy.ext.compiler 

35 

36from ...core import ddl 

37from ...core.named import NamedValueAbstractSet 

38from ..interfaces import Database, StaticTablesContext 

39 

40 

41def _onSqlite3Connect( 

42 dbapiConnection: sqlite3.Connection, connectionRecord: sqlalchemy.pool._ConnectionRecord 

43) -> None: 

44 assert isinstance(dbapiConnection, sqlite3.Connection) 

45 # Prevent pysqlite from emitting BEGIN and COMMIT statements. 

46 dbapiConnection.isolation_level = None 

47 # Enable foreign keys 

48 with closing(dbapiConnection.cursor()) as cursor: 

49 cursor.execute("PRAGMA foreign_keys=ON;") 

50 cursor.execute("PRAGMA busy_timeout = 300000;") # in ms, so 5min (way longer than should be needed) 

51 

52 

53class SqliteDatabase(Database): 

54 """An implementation of the `Database` interface for SQLite3. 

55 

56 Parameters 

57 ---------- 

58 connection : `sqlalchemy.engine.Connection` 

59 An existing connection created by a previous call to `connect`. 

60 origin : `int` 

61 An integer ID that should be used as the default for any datasets, 

62 quanta, or other entities that use a (autoincrement, origin) compound 

63 primary key. 

64 namespace : `str`, optional 

65 The namespace (schema) this database is associated with. If `None`, 

66 the default schema for the connection is used (which may be `None`). 

67 writeable : `bool`, optional 

68 If `True`, allow write operations on the database, including 

69 ``CREATE TABLE``. 

70 

71 Notes 

72 ----- 

73 The case where ``namespace is not None`` is not yet tested, and may be 

74 broken; we need an API for attaching to different databases in order to 

75 write those tests, but haven't yet worked out what is common/different 

76 across databases well enough to define it. 

77 """ 

78 

79 def __init__( 

80 self, 

81 *, 

82 engine: sqlalchemy.engine.Engine, 

83 origin: int, 

84 namespace: Optional[str] = None, 

85 writeable: bool = True, 

86 ): 

87 super().__init__(origin=origin, engine=engine, namespace=namespace) 

88 # Get the filename from a call to 'PRAGMA database_list'. 

89 with engine.connect() as connection: 

90 with closing(connection.connection.cursor()) as cursor: 

91 dbList = list(cursor.execute("PRAGMA database_list").fetchall()) 

92 if len(dbList) == 0: 

93 raise RuntimeError("No database in connection.") 

94 if namespace is None: 

95 namespace = "main" 

96 for _, dbname, filename in dbList: 

97 if dbname == namespace: 

98 break 

99 else: 

100 raise RuntimeError(f"No '{namespace}' database in connection.") 

101 if not filename: 

102 self.filename = None 

103 else: 

104 self.filename = filename 

105 self._writeable = writeable 

106 

107 @classmethod 

108 def makeDefaultUri(cls, root: str) -> Optional[str]: 

109 return "sqlite:///" + os.path.join(root, "gen3.sqlite3") 

110 

111 @classmethod 

112 def makeEngine( 

113 cls, 

114 uri: str | sqlalchemy.engine.URL | None = None, 

115 *, 

116 filename: Optional[str] = None, 

117 writeable: bool = True, 

118 ) -> sqlalchemy.engine.Engine: 

119 """Create a `sqlalchemy.engine.Engine` from a SQLAlchemy URI or 

120 filename. 

121 

122 Parameters 

123 ---------- 

124 uri : `str` or `sqlalchemy.engine.URL`, optional 

125 A SQLAlchemy URI connection string. 

126 filename : `str` 

127 Name of the SQLite database file, or `None` to use an in-memory 

128 database. Ignored if ``uri is not None``. 

129 writeable : `bool`, optional 

130 If `True`, allow write operations on the database, including 

131 ``CREATE TABLE``. 

132 

133 Returns 

134 ------- 

135 engine : `sqlalchemy.engine.Engine` 

136 A database engine. 

137 """ 

138 # In order to be able to tell SQLite that we want a read-only or 

139 # read-write connection, we need to make the SQLite DBAPI connection 

140 # with a "URI"-based connection string. SQLAlchemy claims it can do 

141 # this 

142 # (https://docs.sqlalchemy.org/en/13/dialects/sqlite.html#uri-connections), 

143 # but it doesn't seem to work as advertised. To work around this, we 

144 # use the 'creator' argument to sqlalchemy.engine.create_engine, which 

145 # lets us pass a callable that creates the DBAPI connection. 

146 if uri is None: 

147 if filename is None: 

148 target = ":memory:" 

149 uri = "sqlite://" 

150 else: 

151 target = f"file:{filename}" 

152 uri = f"sqlite:///{filename}" 

153 else: 

154 if isinstance(uri, sqlalchemy.engine.URL): 

155 # We have to parse strings anyway, so convert it to string. 

156 uri = uri.render_as_string(hide_password=False) 

157 parsed = urllib.parse.urlparse(uri) 

158 queries = parsed.query.split("&") 

159 if "uri=true" in queries: 

160 # This is a SQLAlchemy URI that is already trying to make a 

161 # SQLite connection via a SQLite URI, and hence there may 

162 # be URI components for both SQLite and SQLAlchemy. We 

163 # don't need to support that, and it'd be a 

164 # reimplementation of all of the (broken) logic in 

165 # SQLAlchemy for doing this, so we just don't. 

166 raise NotImplementedError("SQLite connection strings with 'uri=true' are not supported.") 

167 # This is just a SQLAlchemy URI with a non-URI SQLite 

168 # connection string inside it. Pull that out so we can use it 

169 # in the creator call. 

170 if parsed.path.startswith("/"): 

171 filename = parsed.path[1:] 

172 target = f"file:{filename}" 

173 else: 

174 filename = None 

175 target = ":memory:" 

176 if filename is None: 

177 if not writeable: 

178 raise NotImplementedError("Read-only :memory: databases are not supported.") 

179 else: 

180 if writeable: 

181 target += "?mode=rwc&uri=true" 

182 else: 

183 target += "?mode=ro&uri=true" 

184 

185 def creator() -> sqlite3.Connection: 

186 return sqlite3.connect(target, check_same_thread=False, uri=True) 

187 

188 engine = sqlalchemy.engine.create_engine(uri, creator=creator) 

189 

190 sqlalchemy.event.listen(engine, "connect", _onSqlite3Connect) 

191 

192 def _onSqlite3Begin(connection: sqlalchemy.engine.Connection) -> sqlalchemy.engine.Connection: 

193 assert connection.dialect.name == "sqlite" 

194 # Replace pysqlite's buggy transaction handling that never BEGINs 

195 # with our own that does, and tell SQLite to try to acquire a lock 

196 # as soon as we start a transaction that might involve writes (this 

197 # should lead to more blocking and fewer deadlocks). 

198 if writeable: 

199 connection.execute(sqlalchemy.text("BEGIN IMMEDIATE")) 

200 else: 

201 connection.execute(sqlalchemy.text("BEGIN")) 

202 return connection 

203 

204 sqlalchemy.event.listen(engine, "begin", _onSqlite3Begin) 

205 

206 return engine 

207 

208 @classmethod 

209 def fromEngine( 

210 cls, 

211 engine: sqlalchemy.engine.Engine, 

212 *, 

213 origin: int, 

214 namespace: Optional[str] = None, 

215 writeable: bool = True, 

216 ) -> Database: 

217 return cls(engine=engine, origin=origin, writeable=writeable, namespace=namespace) 

218 

219 def isWriteable(self) -> bool: 

220 return self._writeable 

221 

222 def __str__(self) -> str: 

223 if self.filename: 

224 return f"SQLite3@{self.filename}" 

225 else: 

226 return "SQLite3@:memory:" 

227 

228 def _lockTables( 

229 self, connection: sqlalchemy.engine.Connection, tables: Iterable[sqlalchemy.schema.Table] = () 

230 ) -> None: 

231 # Docstring inherited. 

232 # Our SQLite database always acquires full-database locks at the 

233 # beginning of a transaction, so there's no need to acquire table-level 

234 # locks - which is good, because SQLite doesn't have table-level 

235 # locking. 

236 pass 

237 

238 # MyPy claims that the return type here isn't covariant with the return 

239 # type of the base class method, which is formally correct but irrelevant 

240 # - the base class return type is _GeneratorContextManager, but only 

241 # because it's generated by the contextmanager decorator. 

242 def declareStaticTables(self, *, create: bool) -> ContextManager[StaticTablesContext]: # type: ignore 

243 # If the user asked for an in-memory, writeable database, then we may 

244 # need to re-create schema even if create=False because schema can be 

245 # lost on re-connect. This is only really relevant for tests, and it's 

246 # convenient there. 

247 if self.filename is None and self.isWriteable(): 

248 inspector = sqlalchemy.inspect(self._engine) 

249 tables = inspector.get_table_names(schema=self.namespace) 

250 if not tables: 

251 create = True 

252 return super().declareStaticTables(create=create) 

253 

254 def _convertFieldSpec( 

255 self, table: str, spec: ddl.FieldSpec, metadata: sqlalchemy.MetaData, **kwargs: Any 

256 ) -> sqlalchemy.schema.Column: 

257 if spec.autoincrement: 

258 if not spec.primaryKey: 

259 raise RuntimeError( 

260 f"Autoincrement field {table}.{spec.name} that is not a primary key is not supported." 

261 ) 

262 if spec.dtype != sqlalchemy.Integer: 

263 # SQLite's autoincrement is really limited; it only works if 

264 # the column type is exactly "INTEGER". But it also doesn't 

265 # care about the distinctions between different integer types, 

266 # so it's safe to change it. 

267 spec = copy.copy(spec) 

268 spec.dtype = sqlalchemy.Integer 

269 return super()._convertFieldSpec(table, spec, metadata, **kwargs) 

270 

271 def _makeColumnConstraints(self, table: str, spec: ddl.FieldSpec) -> List[sqlalchemy.CheckConstraint]: 

272 # For sqlite we force constraints on all string columns since sqlite 

273 # ignores everything otherwise and this leads to problems with 

274 # other databases. 

275 

276 constraints = [] 

277 if spec.isStringType(): 

278 name = self.shrinkDatabaseEntityName("_".join([table, "len", spec.name])) 

279 constraints.append( 

280 sqlalchemy.CheckConstraint( 

281 f'length("{spec.name}")<={spec.length}' 

282 # Oracle converts 

283 # empty strings to 

284 # NULL so check 

285 f' AND length("{spec.name}")>=1', 

286 name=name, 

287 ) 

288 ) 

289 

290 constraints.extend(super()._makeColumnConstraints(table, spec)) 

291 return constraints 

292 

293 def _convertTableSpec( 

294 self, name: str, spec: ddl.TableSpec, metadata: sqlalchemy.MetaData, **kwargs: Any 

295 ) -> sqlalchemy.schema.Table: 

296 primaryKeyFieldNames = set(field.name for field in spec.fields if field.primaryKey) 

297 autoincrFieldNames = set(field.name for field in spec.fields if field.autoincrement) 

298 if len(autoincrFieldNames) > 1: 

299 raise RuntimeError("At most one autoincrement field per table is allowed.") 

300 if len(primaryKeyFieldNames) > 1 and len(autoincrFieldNames) > 0: 

301 # SQLite's default rowid-based autoincrement doesn't work if the 

302 # field is just one field in a compound primary key. As a 

303 # workaround, we create an extra table with just one column that 

304 # we'll insert into to generate those IDs. That's only safe if 

305 # that single-column table's records are already unique with just 

306 # the autoincrement field, not the rest of the primary key. In 

307 # practice, that means the single-column table's records are those 

308 # for which origin == self.origin. 

309 (autoincrFieldName,) = autoincrFieldNames 

310 otherPrimaryKeyFieldNames = primaryKeyFieldNames - autoincrFieldNames 

311 if otherPrimaryKeyFieldNames != {"origin"}: 

312 # We need the only other field in the key to be 'origin'. 

313 raise NotImplementedError( 

314 "Compound primary keys with an autoincrement are only supported in SQLite " 

315 "if the only non-autoincrement primary key field is 'origin'." 

316 ) 

317 if not spec.recycleIds: 

318 kwargs = dict(kwargs, sqlite_autoincrement=True) 

319 return super()._convertTableSpec(name, spec, metadata, **kwargs) 

320 

321 def replace(self, table: sqlalchemy.schema.Table, *rows: dict) -> None: 

322 self.assertTableWriteable(table, f"Cannot replace into read-only table {table}.") 

323 if not rows: 

324 return 

325 query = sqlalchemy.dialects.sqlite.insert(table) 

326 excluded = query.excluded 

327 data = { 

328 column.name: getattr(excluded, column.name) 

329 for column in table.columns 

330 if column.name not in table.primary_key 

331 } 

332 query = query.on_conflict_do_update(index_elements=table.primary_key, set_=data) 

333 with self._transaction() as (_, connection): 

334 connection.execute(query, rows) 

335 

336 def ensure(self, table: sqlalchemy.schema.Table, *rows: dict, primary_key_only: bool = False) -> int: 

337 self.assertTableWriteable(table, f"Cannot ensure into read-only table {table}.") 

338 if not rows: 

339 return 0 

340 query = sqlalchemy.dialects.sqlite.insert(table) 

341 if primary_key_only: 

342 query = query.on_conflict_do_nothing(index_elements=table.primary_key) 

343 else: 

344 query = query.on_conflict_do_nothing() 

345 with self._transaction() as (_, connection): 

346 return connection.execute(query, rows).rowcount 

347 

348 def constant_rows( 

349 self, 

350 fields: NamedValueAbstractSet[ddl.FieldSpec], 

351 *rows: dict, 

352 name: Optional[str] = None, 

353 ) -> sqlalchemy.sql.FromClause: 

354 # Docstring inherited. 

355 # While SQLite supports VALUES, it doesn't support assigning a name 

356 # to that construct or the names of its columns, and hence there's no 

357 # way to actually join it into a SELECT query. It seems the only 

358 # alternative is something like: 

359 # 

360 # SELECT ? AS a, ? AS b 

361 # UNION ALL 

362 # SELECT ? AS a, ? AS b 

363 # 

364 selects = [ 

365 sqlalchemy.sql.select( 

366 *[sqlalchemy.sql.literal(row[field.name], field.dtype).label(field.name) for field in fields] 

367 ) 

368 for row in rows 

369 ] 

370 return sqlalchemy.sql.union_all(*selects).alias(name) 

371 

372 filename: Optional[str] 

373 """Name of the file this database is connected to (`str` or `None`). 

374 

375 Set to `None` for in-memory databases. 

376 """