Coverage for python/lsst/daf/butler/registry/databases/sqlite.py: 17%

145 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2022-12-15 10:02 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ["SqliteDatabase"] 

24 

25import copy 

26import os 

27import sqlite3 

28import urllib.parse 

29from contextlib import closing 

30from typing import Any, ContextManager, Iterable, List, Optional 

31 

32import sqlalchemy 

33import sqlalchemy.dialects.sqlite 

34import sqlalchemy.ext.compiler 

35 

36from ...core import ddl 

37from ...core.named import NamedValueAbstractSet 

38from ..interfaces import Database, StaticTablesContext 

39 

40 

41def _onSqlite3Connect( 

42 dbapiConnection: sqlite3.Connection, connectionRecord: sqlalchemy.pool._ConnectionRecord 

43) -> None: 

44 assert isinstance(dbapiConnection, sqlite3.Connection) 

45 # Prevent pysqlite from emitting BEGIN and COMMIT statements. 

46 dbapiConnection.isolation_level = None 

47 # Enable foreign keys 

48 with closing(dbapiConnection.cursor()) as cursor: 

49 cursor.execute("PRAGMA foreign_keys=ON;") 

50 cursor.execute("PRAGMA busy_timeout = 300000;") # in ms, so 5min (way longer than should be needed) 

51 

52 

53def _onSqlite3Begin(connection: sqlalchemy.engine.Connection) -> sqlalchemy.engine.Connection: 

54 assert connection.dialect.name == "sqlite" 

55 # Replace pysqlite's buggy transaction handling that never BEGINs with our 

56 # own that does, and tell SQLite to try to acquire a lock as soon as we 

57 # start a transaction (this should lead to more blocking and fewer 

58 # deadlocks). 

59 connection.execute(sqlalchemy.text("BEGIN IMMEDIATE")) 

60 return connection 

61 

62 

63class SqliteDatabase(Database): 

64 """An implementation of the `Database` interface for SQLite3. 

65 

66 Parameters 

67 ---------- 

68 connection : `sqlalchemy.engine.Connection` 

69 An existing connection created by a previous call to `connect`. 

70 origin : `int` 

71 An integer ID that should be used as the default for any datasets, 

72 quanta, or other entities that use a (autoincrement, origin) compound 

73 primary key. 

74 namespace : `str`, optional 

75 The namespace (schema) this database is associated with. If `None`, 

76 the default schema for the connection is used (which may be `None`). 

77 writeable : `bool`, optional 

78 If `True`, allow write operations on the database, including 

79 ``CREATE TABLE``. 

80 

81 Notes 

82 ----- 

83 The case where ``namespace is not None`` is not yet tested, and may be 

84 broken; we need an API for attaching to different databases in order to 

85 write those tests, but haven't yet worked out what is common/different 

86 across databases well enough to define it. 

87 """ 

88 

89 def __init__( 

90 self, 

91 *, 

92 engine: sqlalchemy.engine.Engine, 

93 origin: int, 

94 namespace: Optional[str] = None, 

95 writeable: bool = True, 

96 ): 

97 super().__init__(origin=origin, engine=engine, namespace=namespace) 

98 # Get the filename from a call to 'PRAGMA database_list'. 

99 with engine.connect() as connection: 

100 with closing(connection.connection.cursor()) as cursor: 

101 dbList = list(cursor.execute("PRAGMA database_list").fetchall()) 

102 if len(dbList) == 0: 

103 raise RuntimeError("No database in connection.") 

104 if namespace is None: 

105 namespace = "main" 

106 for _, dbname, filename in dbList: 

107 if dbname == namespace: 

108 break 

109 else: 

110 raise RuntimeError(f"No '{namespace}' database in connection.") 

111 if not filename: 

112 self.filename = None 

113 else: 

114 self.filename = filename 

115 self._writeable = writeable 

116 

117 @classmethod 

118 def makeDefaultUri(cls, root: str) -> Optional[str]: 

119 return "sqlite:///" + os.path.join(root, "gen3.sqlite3") 

120 

121 @classmethod 

122 def makeEngine( 

123 cls, uri: Optional[str] = None, *, filename: Optional[str] = None, writeable: bool = True 

124 ) -> sqlalchemy.engine.Engine: 

125 """Create a `sqlalchemy.engine.Engine` from a SQLAlchemy URI or 

126 filename. 

127 

128 Parameters 

129 ---------- 

130 uri : `str` 

131 A SQLAlchemy URI connection string. 

132 filename : `str` 

133 Name of the SQLite database file, or `None` to use an in-memory 

134 database. Ignored if ``uri is not None``. 

135 writeable : `bool`, optional 

136 If `True`, allow write operations on the database, including 

137 ``CREATE TABLE``. 

138 

139 Returns 

140 ------- 

141 engine : `sqlalchemy.engine.Engine` 

142 A database engine. 

143 """ 

144 # In order to be able to tell SQLite that we want a read-only or 

145 # read-write connection, we need to make the SQLite DBAPI connection 

146 # with a "URI"-based connection string. SQLAlchemy claims it can do 

147 # this 

148 # (https://docs.sqlalchemy.org/en/13/dialects/sqlite.html#uri-connections), 

149 # but it doesn't seem to work as advertised. To work around this, we 

150 # use the 'creator' argument to sqlalchemy.engine.create_engine, which 

151 # lets us pass a callable that creates the DBAPI connection. 

152 if uri is None: 

153 if filename is None: 

154 target = ":memory:" 

155 uri = "sqlite://" 

156 else: 

157 target = f"file:{filename}" 

158 uri = f"sqlite:///{filename}" 

159 else: 

160 parsed = urllib.parse.urlparse(uri) 

161 queries = parsed.query.split("&") 

162 if "uri=true" in queries: 

163 # This is a SQLAlchemy URI that is already trying to make a 

164 # SQLite connection via a SQLite URI, and hence there may 

165 # be URI components for both SQLite and SQLAlchemy. We 

166 # don't need to support that, and it'd be a 

167 # reimplementation of all of the (broken) logic in 

168 # SQLAlchemy for doing this, so we just don't. 

169 raise NotImplementedError("SQLite connection strings with 'uri=true' are not supported.") 

170 # This is just a SQLAlchemy URI with a non-URI SQLite 

171 # connection string inside it. Pull that out so we can use it 

172 # in the creator call. 

173 if parsed.path.startswith("/"): 

174 filename = parsed.path[1:] 

175 target = f"file:{filename}" 

176 else: 

177 filename = None 

178 target = ":memory:" 

179 if filename is None: 

180 if not writeable: 

181 raise NotImplementedError("Read-only :memory: databases are not supported.") 

182 else: 

183 if writeable: 

184 target += "?mode=rwc&uri=true" 

185 else: 

186 target += "?mode=ro&uri=true" 

187 

188 def creator() -> sqlite3.Connection: 

189 return sqlite3.connect(target, check_same_thread=False, uri=True) 

190 

191 engine = sqlalchemy.engine.create_engine(uri, creator=creator) 

192 

193 sqlalchemy.event.listen(engine, "connect", _onSqlite3Connect) 

194 sqlalchemy.event.listen(engine, "begin", _onSqlite3Begin) 

195 

196 return engine 

197 

198 @classmethod 

199 def fromEngine( 

200 cls, 

201 engine: sqlalchemy.engine.Engine, 

202 *, 

203 origin: int, 

204 namespace: Optional[str] = None, 

205 writeable: bool = True, 

206 ) -> Database: 

207 return cls(engine=engine, origin=origin, writeable=writeable, namespace=namespace) 

208 

209 def isWriteable(self) -> bool: 

210 return self._writeable 

211 

212 def __str__(self) -> str: 

213 if self.filename: 

214 return f"SQLite3@{self.filename}" 

215 else: 

216 return "SQLite3@:memory:" 

217 

218 def _lockTables( 

219 self, connection: sqlalchemy.engine.Connection, tables: Iterable[sqlalchemy.schema.Table] = () 

220 ) -> None: 

221 # Docstring inherited. 

222 # Our SQLite database always acquires full-database locks at the 

223 # beginning of a transaction, so there's no need to acquire table-level 

224 # locks - which is good, because SQLite doesn't have table-level 

225 # locking. 

226 pass 

227 

228 # MyPy claims that the return type here isn't covariant with the return 

229 # type of the base class method, which is formally correct but irrelevant 

230 # - the base class return type is _GeneratorContextManager, but only 

231 # because it's generated by the contextmanager decorator. 

232 def declareStaticTables(self, *, create: bool) -> ContextManager[StaticTablesContext]: # type: ignore 

233 # If the user asked for an in-memory, writeable database, then we may 

234 # need to re-create schema even if create=False because schema can be 

235 # lost on re-connect. This is only really relevant for tests, and it's 

236 # convenient there. 

237 if self.filename is None and self.isWriteable(): 

238 inspector = sqlalchemy.inspect(self._engine) 

239 tables = inspector.get_table_names(schema=self.namespace) 

240 if not tables: 

241 create = True 

242 return super().declareStaticTables(create=create) 

243 

244 def _convertFieldSpec( 

245 self, table: str, spec: ddl.FieldSpec, metadata: sqlalchemy.MetaData, **kwargs: Any 

246 ) -> sqlalchemy.schema.Column: 

247 if spec.autoincrement: 

248 if not spec.primaryKey: 

249 raise RuntimeError( 

250 f"Autoincrement field {table}.{spec.name} that is not a primary key is not supported." 

251 ) 

252 if spec.dtype != sqlalchemy.Integer: 

253 # SQLite's autoincrement is really limited; it only works if 

254 # the column type is exactly "INTEGER". But it also doesn't 

255 # care about the distinctions between different integer types, 

256 # so it's safe to change it. 

257 spec = copy.copy(spec) 

258 spec.dtype = sqlalchemy.Integer 

259 return super()._convertFieldSpec(table, spec, metadata, **kwargs) 

260 

261 def _makeColumnConstraints(self, table: str, spec: ddl.FieldSpec) -> List[sqlalchemy.CheckConstraint]: 

262 # For sqlite we force constraints on all string columns since sqlite 

263 # ignores everything otherwise and this leads to problems with 

264 # other databases. 

265 

266 constraints = [] 

267 if spec.isStringType(): 

268 name = self.shrinkDatabaseEntityName("_".join([table, "len", spec.name])) 

269 constraints.append( 

270 sqlalchemy.CheckConstraint( 

271 f'length("{spec.name}")<={spec.length}' 

272 # Oracle converts 

273 # empty strings to 

274 # NULL so check 

275 f' AND length("{spec.name}")>=1', 

276 name=name, 

277 ) 

278 ) 

279 

280 constraints.extend(super()._makeColumnConstraints(table, spec)) 

281 return constraints 

282 

283 def _convertTableSpec( 

284 self, name: str, spec: ddl.TableSpec, metadata: sqlalchemy.MetaData, **kwargs: Any 

285 ) -> sqlalchemy.schema.Table: 

286 primaryKeyFieldNames = set(field.name for field in spec.fields if field.primaryKey) 

287 autoincrFieldNames = set(field.name for field in spec.fields if field.autoincrement) 

288 if len(autoincrFieldNames) > 1: 

289 raise RuntimeError("At most one autoincrement field per table is allowed.") 

290 if len(primaryKeyFieldNames) > 1 and len(autoincrFieldNames) > 0: 

291 # SQLite's default rowid-based autoincrement doesn't work if the 

292 # field is just one field in a compound primary key. As a 

293 # workaround, we create an extra table with just one column that 

294 # we'll insert into to generate those IDs. That's only safe if 

295 # that single-column table's records are already unique with just 

296 # the autoincrement field, not the rest of the primary key. In 

297 # practice, that means the single-column table's records are those 

298 # for which origin == self.origin. 

299 (autoincrFieldName,) = autoincrFieldNames 

300 otherPrimaryKeyFieldNames = primaryKeyFieldNames - autoincrFieldNames 

301 if otherPrimaryKeyFieldNames != {"origin"}: 

302 # We need the only other field in the key to be 'origin'. 

303 raise NotImplementedError( 

304 "Compound primary keys with an autoincrement are only supported in SQLite " 

305 "if the only non-autoincrement primary key field is 'origin'." 

306 ) 

307 if not spec.recycleIds: 

308 kwargs = dict(kwargs, sqlite_autoincrement=True) 

309 return super()._convertTableSpec(name, spec, metadata, **kwargs) 

310 

311 def replace(self, table: sqlalchemy.schema.Table, *rows: dict) -> None: 

312 self.assertTableWriteable(table, f"Cannot replace into read-only table {table}.") 

313 if not rows: 

314 return 

315 query = sqlalchemy.dialects.sqlite.insert(table) 

316 excluded = query.excluded 

317 data = { 

318 column.name: getattr(excluded, column.name) 

319 for column in table.columns 

320 if column.name not in table.primary_key 

321 } 

322 query = query.on_conflict_do_update(index_elements=table.primary_key, set_=data) 

323 with self._connection() as connection: 

324 connection.execute(query, rows) 

325 

326 def ensure(self, table: sqlalchemy.schema.Table, *rows: dict, primary_key_only: bool = False) -> int: 

327 self.assertTableWriteable(table, f"Cannot ensure into read-only table {table}.") 

328 if not rows: 

329 return 0 

330 query = sqlalchemy.dialects.sqlite.insert(table) 

331 if primary_key_only: 

332 query = query.on_conflict_do_nothing(index_elements=table.primary_key) 

333 else: 

334 query = query.on_conflict_do_nothing() 

335 with self._connection() as connection: 

336 return connection.execute(query, rows).rowcount 

337 

338 def constant_rows( 

339 self, 

340 fields: NamedValueAbstractSet[ddl.FieldSpec], 

341 *rows: dict, 

342 name: Optional[str] = None, 

343 ) -> sqlalchemy.sql.FromClause: 

344 # Docstring inherited. 

345 # While SQLite supports VALUES, it doesn't support assigning a name 

346 # to that construct or the names of its columns, and hence there's no 

347 # way to actually join it into a SELECT query. It seems the only 

348 # alternative is something like: 

349 # 

350 # SELECT ? AS a, ? AS b 

351 # UNION ALL 

352 # SELECT ? AS a, ? AS b 

353 # 

354 selects = [ 

355 sqlalchemy.sql.select( 

356 *[sqlalchemy.sql.literal(row[field.name], field.dtype).label(field.name) for field in fields] 

357 ) 

358 for row in rows 

359 ] 

360 return sqlalchemy.sql.union_all(*selects).alias(name) 

361 

362 filename: Optional[str] 

363 """Name of the file this database is connected to (`str` or `None`). 

364 

365 Set to `None` for in-memory databases. 

366 """