Coverage for python/lsst/daf/butler/registry/databases/sqlite.py: 17%

141 statements  

« prev     ^ index     » next       coverage.py v6.4.1, created at 2022-06-28 09:25 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ["SqliteDatabase"] 

24 

25import copy 

26import os 

27import sqlite3 

28import urllib.parse 

29from contextlib import closing 

30from typing import Any, ContextManager, Iterable, List, Optional 

31 

32import sqlalchemy 

33import sqlalchemy.dialects.sqlite 

34import sqlalchemy.ext.compiler 

35 

36from ...core import ddl 

37from ..interfaces import Database, StaticTablesContext 

38 

39 

40def _onSqlite3Connect( 

41 dbapiConnection: sqlite3.Connection, connectionRecord: sqlalchemy.pool._ConnectionRecord 

42) -> None: 

43 assert isinstance(dbapiConnection, sqlite3.Connection) 

44 # Prevent pysqlite from emitting BEGIN and COMMIT statements. 

45 dbapiConnection.isolation_level = None 

46 # Enable foreign keys 

47 with closing(dbapiConnection.cursor()) as cursor: 

48 cursor.execute("PRAGMA foreign_keys=ON;") 

49 cursor.execute("PRAGMA busy_timeout = 300000;") # in ms, so 5min (way longer than should be needed) 

50 

51 

52def _onSqlite3Begin(connection: sqlalchemy.engine.Connection) -> sqlalchemy.engine.Connection: 

53 assert connection.dialect.name == "sqlite" 

54 # Replace pysqlite's buggy transaction handling that never BEGINs with our 

55 # own that does, and tell SQLite to try to acquire a lock as soon as we 

56 # start a transaction (this should lead to more blocking and fewer 

57 # deadlocks). 

58 connection.execute(sqlalchemy.text("BEGIN IMMEDIATE")) 

59 return connection 

60 

61 

62class SqliteDatabase(Database): 

63 """An implementation of the `Database` interface for SQLite3. 

64 

65 Parameters 

66 ---------- 

67 connection : `sqlalchemy.engine.Connection` 

68 An existing connection created by a previous call to `connect`. 

69 origin : `int` 

70 An integer ID that should be used as the default for any datasets, 

71 quanta, or other entities that use a (autoincrement, origin) compound 

72 primary key. 

73 namespace : `str`, optional 

74 The namespace (schema) this database is associated with. If `None`, 

75 the default schema for the connection is used (which may be `None`). 

76 writeable : `bool`, optional 

77 If `True`, allow write operations on the database, including 

78 ``CREATE TABLE``. 

79 

80 Notes 

81 ----- 

82 The case where ``namespace is not None`` is not yet tested, and may be 

83 broken; we need an API for attaching to different databases in order to 

84 write those tests, but haven't yet worked out what is common/different 

85 across databases well enough to define it. 

86 """ 

87 

88 def __init__( 

89 self, 

90 *, 

91 engine: sqlalchemy.engine.Engine, 

92 origin: int, 

93 namespace: Optional[str] = None, 

94 writeable: bool = True, 

95 ): 

96 super().__init__(origin=origin, engine=engine, namespace=namespace) 

97 # Get the filename from a call to 'PRAGMA database_list'. 

98 with engine.connect() as connection: 

99 with closing(connection.connection.cursor()) as cursor: 

100 dbList = list(cursor.execute("PRAGMA database_list").fetchall()) 

101 if len(dbList) == 0: 

102 raise RuntimeError("No database in connection.") 

103 if namespace is None: 

104 namespace = "main" 

105 for _, dbname, filename in dbList: 

106 if dbname == namespace: 

107 break 

108 else: 

109 raise RuntimeError(f"No '{namespace}' database in connection.") 

110 if not filename: 

111 self.filename = None 

112 else: 

113 self.filename = filename 

114 self._writeable = writeable 

115 

116 @classmethod 

117 def makeDefaultUri(cls, root: str) -> Optional[str]: 

118 return "sqlite:///" + os.path.join(root, "gen3.sqlite3") 

119 

120 @classmethod 

121 def makeEngine( 

122 cls, uri: Optional[str] = None, *, filename: Optional[str] = None, writeable: bool = True 

123 ) -> sqlalchemy.engine.Engine: 

124 """Create a `sqlalchemy.engine.Engine` from a SQLAlchemy URI or 

125 filename. 

126 

127 Parameters 

128 ---------- 

129 uri : `str` 

130 A SQLAlchemy URI connection string. 

131 filename : `str` 

132 Name of the SQLite database file, or `None` to use an in-memory 

133 database. Ignored if ``uri is not None``. 

134 writeable : `bool`, optional 

135 If `True`, allow write operations on the database, including 

136 ``CREATE TABLE``. 

137 

138 Returns 

139 ------- 

140 engine : `sqlalchemy.engine.Engine` 

141 A database engine. 

142 """ 

143 # In order to be able to tell SQLite that we want a read-only or 

144 # read-write connection, we need to make the SQLite DBAPI connection 

145 # with a "URI"-based connection string. SQLAlchemy claims it can do 

146 # this 

147 # (https://docs.sqlalchemy.org/en/13/dialects/sqlite.html#uri-connections), 

148 # but it doesn't seem to work as advertised. To work around this, we 

149 # use the 'creator' argument to sqlalchemy.engine.create_engine, which 

150 # lets us pass a callable that creates the DBAPI connection. 

151 if uri is None: 

152 if filename is None: 

153 target = ":memory:" 

154 uri = "sqlite://" 

155 else: 

156 target = f"file:{filename}" 

157 uri = f"sqlite:///{filename}" 

158 else: 

159 parsed = urllib.parse.urlparse(uri) 

160 queries = parsed.query.split("&") 

161 if "uri=true" in queries: 

162 # This is a SQLAlchemy URI that is already trying to make a 

163 # SQLite connection via a SQLite URI, and hence there may 

164 # be URI components for both SQLite and SQLAlchemy. We 

165 # don't need to support that, and it'd be a 

166 # reimplementation of all of the (broken) logic in 

167 # SQLAlchemy for doing this, so we just don't. 

168 raise NotImplementedError("SQLite connection strings with 'uri=true' are not supported.") 

169 # This is just a SQLAlchemy URI with a non-URI SQLite 

170 # connection string inside it. Pull that out so we can use it 

171 # in the creator call. 

172 if parsed.path.startswith("/"): 

173 filename = parsed.path[1:] 

174 target = f"file:{filename}" 

175 else: 

176 filename = None 

177 target = ":memory:" 

178 if filename is None: 

179 if not writeable: 

180 raise NotImplementedError("Read-only :memory: databases are not supported.") 

181 else: 

182 if writeable: 

183 target += "?mode=rwc&uri=true" 

184 else: 

185 target += "?mode=ro&uri=true" 

186 

187 def creator() -> sqlite3.Connection: 

188 return sqlite3.connect(target, check_same_thread=False, uri=True) 

189 

190 engine = sqlalchemy.engine.create_engine(uri, creator=creator) 

191 

192 sqlalchemy.event.listen(engine, "connect", _onSqlite3Connect) 

193 sqlalchemy.event.listen(engine, "begin", _onSqlite3Begin) 

194 

195 return engine 

196 

197 @classmethod 

198 def fromEngine( 

199 cls, 

200 engine: sqlalchemy.engine.Engine, 

201 *, 

202 origin: int, 

203 namespace: Optional[str] = None, 

204 writeable: bool = True, 

205 ) -> Database: 

206 return cls(engine=engine, origin=origin, writeable=writeable, namespace=namespace) 

207 

208 def isWriteable(self) -> bool: 

209 return self._writeable 

210 

211 def __str__(self) -> str: 

212 if self.filename: 

213 return f"SQLite3@{self.filename}" 

214 else: 

215 return "SQLite3@:memory:" 

216 

217 def _lockTables( 

218 self, connection: sqlalchemy.engine.Connection, tables: Iterable[sqlalchemy.schema.Table] = () 

219 ) -> None: 

220 # Docstring inherited. 

221 # Our SQLite database always acquires full-database locks at the 

222 # beginning of a transaction, so there's no need to acquire table-level 

223 # locks - which is good, because SQLite doesn't have table-level 

224 # locking. 

225 pass 

226 

227 # MyPy claims that the return type here isn't covariant with the return 

228 # type of the base class method, which is formally correct but irrelevant 

229 # - the base class return type is _GeneratorContextManager, but only 

230 # because it's generated by the contextmanager decorator. 

231 def declareStaticTables(self, *, create: bool) -> ContextManager[StaticTablesContext]: # type: ignore 

232 # If the user asked for an in-memory, writeable database, then we may 

233 # need to re-create schema even if create=False because schema can be 

234 # lost on re-connect. This is only really relevant for tests, and it's 

235 # convenient there. 

236 if self.filename is None and self.isWriteable(): 

237 inspector = sqlalchemy.inspect(self._engine) 

238 tables = inspector.get_table_names(schema=self.namespace) 

239 if not tables: 

240 create = True 

241 return super().declareStaticTables(create=create) 

242 

243 def _convertFieldSpec( 

244 self, table: str, spec: ddl.FieldSpec, metadata: sqlalchemy.MetaData, **kwargs: Any 

245 ) -> sqlalchemy.schema.Column: 

246 if spec.autoincrement: 

247 if not spec.primaryKey: 

248 raise RuntimeError( 

249 f"Autoincrement field {table}.{spec.name} that is not a primary key is not supported." 

250 ) 

251 if spec.dtype != sqlalchemy.Integer: 

252 # SQLite's autoincrement is really limited; it only works if 

253 # the column type is exactly "INTEGER". But it also doesn't 

254 # care about the distinctions between different integer types, 

255 # so it's safe to change it. 

256 spec = copy.copy(spec) 

257 spec.dtype = sqlalchemy.Integer 

258 return super()._convertFieldSpec(table, spec, metadata, **kwargs) 

259 

260 def _makeColumnConstraints(self, table: str, spec: ddl.FieldSpec) -> List[sqlalchemy.CheckConstraint]: 

261 # For sqlite we force constraints on all string columns since sqlite 

262 # ignores everything otherwise and this leads to problems with 

263 # other databases. 

264 

265 constraints = [] 

266 if spec.isStringType(): 

267 name = self.shrinkDatabaseEntityName("_".join([table, "len", spec.name])) 

268 constraints.append( 

269 sqlalchemy.CheckConstraint( 

270 f"length({spec.name})<={spec.length}" 

271 # Oracle converts 

272 # empty strings to 

273 # NULL so check 

274 f" AND length({spec.name})>=1", 

275 name=name, 

276 ) 

277 ) 

278 

279 constraints.extend(super()._makeColumnConstraints(table, spec)) 

280 return constraints 

281 

282 def _convertTableSpec( 

283 self, name: str, spec: ddl.TableSpec, metadata: sqlalchemy.MetaData, **kwargs: Any 

284 ) -> sqlalchemy.schema.Table: 

285 primaryKeyFieldNames = set(field.name for field in spec.fields if field.primaryKey) 

286 autoincrFieldNames = set(field.name for field in spec.fields if field.autoincrement) 

287 if len(autoincrFieldNames) > 1: 

288 raise RuntimeError("At most one autoincrement field per table is allowed.") 

289 if len(primaryKeyFieldNames) > 1 and len(autoincrFieldNames) > 0: 

290 # SQLite's default rowid-based autoincrement doesn't work if the 

291 # field is just one field in a compound primary key. As a 

292 # workaround, we create an extra table with just one column that 

293 # we'll insert into to generate those IDs. That's only safe if 

294 # that single-column table's records are already unique with just 

295 # the autoincrement field, not the rest of the primary key. In 

296 # practice, that means the single-column table's records are those 

297 # for which origin == self.origin. 

298 (autoincrFieldName,) = autoincrFieldNames 

299 otherPrimaryKeyFieldNames = primaryKeyFieldNames - autoincrFieldNames 

300 if otherPrimaryKeyFieldNames != {"origin"}: 

301 # We need the only other field in the key to be 'origin'. 

302 raise NotImplementedError( 

303 "Compound primary keys with an autoincrement are only supported in SQLite " 

304 "if the only non-autoincrement primary key field is 'origin'." 

305 ) 

306 if not spec.recycleIds: 

307 kwargs = dict(kwargs, sqlite_autoincrement=True) 

308 return super()._convertTableSpec(name, spec, metadata, **kwargs) 

309 

310 def replace(self, table: sqlalchemy.schema.Table, *rows: dict) -> None: 

311 self.assertTableWriteable(table, f"Cannot replace into read-only table {table}.") 

312 if not rows: 

313 return 

314 query = sqlalchemy.dialects.sqlite.insert(table) 

315 excluded = query.excluded 

316 data = { 

317 column.name: getattr(excluded, column.name) 

318 for column in table.columns 

319 if column.name not in table.primary_key 

320 } 

321 query = query.on_conflict_do_update(index_elements=table.primary_key, set_=data) 

322 with self._connection() as connection: 

323 connection.execute(query, rows) 

324 

325 def ensure(self, table: sqlalchemy.schema.Table, *rows: dict, primary_key_only: bool = False) -> int: 

326 self.assertTableWriteable(table, f"Cannot ensure into read-only table {table}.") 

327 if not rows: 

328 return 0 

329 query = sqlalchemy.dialects.sqlite.insert(table) 

330 if primary_key_only: 

331 query = query.on_conflict_do_nothing(index_elements=table.primary_key) 

332 else: 

333 query = query.on_conflict_do_nothing() 

334 with self._connection() as connection: 

335 return connection.execute(query, rows).rowcount 

336 

337 filename: Optional[str] 

338 """Name of the file this database is connected to (`str` or `None`). 

339 

340 Set to `None` for in-memory databases. 

341 """