Coverage for python/lsst/daf/butler/registry/databases/sqlite.py: 18%

197 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2022-12-01 19:55 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ["SqliteDatabase"] 

24 

25from contextlib import closing 

26import copy 

27from typing import Any, ContextManager, Dict, Iterable, List, Optional 

28from dataclasses import dataclass 

29import os 

30import urllib.parse 

31 

32import sqlite3 

33import sqlalchemy 

34import sqlalchemy.ext.compiler 

35 

36from ..interfaces import Database, StaticTablesContext 

37from ...core import ddl 

38 

39 

40def _onSqlite3Connect(dbapiConnection: sqlite3.Connection, 

41 connectionRecord: sqlalchemy.pool._ConnectionRecord) -> None: 

42 assert isinstance(dbapiConnection, sqlite3.Connection) 

43 # Prevent pysqlite from emitting BEGIN and COMMIT statements. 

44 dbapiConnection.isolation_level = None 

45 # Enable foreign keys 

46 with closing(dbapiConnection.cursor()) as cursor: 

47 cursor.execute("PRAGMA foreign_keys=ON;") 

48 cursor.execute("PRAGMA busy_timeout = 300000;") # in ms, so 5min (way longer than should be needed) 

49 

50 

51def _onSqlite3Begin(connection: sqlalchemy.engine.Connection) -> sqlalchemy.engine.Connection: 

52 assert connection.dialect.name == "sqlite" 

53 # Replace pysqlite's buggy transaction handling that never BEGINs with our 

54 # own that does, and tell SQLite to try to acquire a lock as soon as we 

55 # start a transaction (this should lead to more blocking and fewer 

56 # deadlocks). 

57 connection.execute(sqlalchemy.text("BEGIN IMMEDIATE")) 

58 return connection 

59 

60 

61class _Replace(sqlalchemy.sql.Insert): 

62 """A SQLAlchemy query that compiles to INSERT ... ON CONFLICT REPLACE 

63 on the primary key constraint for the table. 

64 """ 

65 pass 

66 

67 

68# SQLite and PostgreSQL use similar syntax for their ON CONFLICT extension, 

69# but SQLAlchemy only knows about PostgreSQL's, so we have to compile some 

70# custom text SQL ourselves. 

71 

72# Hard to infer what types these should be from SQLAlchemy docs; just disable 

73# static typing by calling everything "Any". 

74@sqlalchemy.ext.compiler.compiles(_Replace, "sqlite") 

75def _replace(insert: Any, compiler: Any, **kwargs: Any) -> Any: 

76 """Generate an INSERT ... ON CONFLICT REPLACE query. 

77 """ 

78 result = compiler.visit_insert(insert, **kwargs) 

79 preparer = compiler.preparer 

80 pk_columns = ", ".join([preparer.format_column(col) for col in insert.table.primary_key]) 

81 result += f" ON CONFLICT ({pk_columns})" 

82 columns = [preparer.format_column(col) for col in insert.table.columns 

83 if col.name not in insert.table.primary_key] 

84 updates = ", ".join([f"{col} = excluded.{col}" for col in columns]) 

85 result += f" DO UPDATE SET {updates}" 

86 return result 

87 

88 

89class _Ensure(sqlalchemy.sql.Insert): 

90 """A SQLAlchemy query that compiles to INSERT ... ON CONFLICT DO NOTHING. 

91 """ 

92 pass 

93 

94 

95@sqlalchemy.ext.compiler.compiles(_Ensure, "sqlite") 

96def _ensure(insert: Any, compiler: Any, **kwargs: Any) -> Any: 

97 """Generate an INSERT ... ON CONFLICT DO NOTHING query. 

98 """ 

99 result = compiler.visit_insert(insert, **kwargs) 

100 result += " ON CONFLICT DO NOTHING" 

101 return result 

102 

103 

104_AUTOINCR_TABLE_SPEC = ddl.TableSpec( 

105 fields=[ddl.FieldSpec(name="id", dtype=sqlalchemy.Integer, primaryKey=True)] 

106) 

107 

108 

109@dataclass 

110class _AutoincrementCompoundKeyWorkaround: 

111 """A workaround for SQLite's lack of support for compound primary keys that 

112 include an autoincrement field. 

113 """ 

114 

115 table: sqlalchemy.schema.Table 

116 """A single-column internal table that can be inserted into to yield 

117 autoincrement values (`sqlalchemy.schema.Table`). 

118 """ 

119 

120 column: str 

121 """The name of the column in the original table that needs to be populated 

122 with values from the internal table (`str`). 

123 """ 

124 

125 

126class SqliteDatabase(Database): 

127 """An implementation of the `Database` interface for SQLite3. 

128 

129 Parameters 

130 ---------- 

131 connection : `sqlalchemy.engine.Connection` 

132 An existing connection created by a previous call to `connect`. 

133 origin : `int` 

134 An integer ID that should be used as the default for any datasets, 

135 quanta, or other entities that use a (autoincrement, origin) compound 

136 primary key. 

137 namespace : `str`, optional 

138 The namespace (schema) this database is associated with. If `None`, 

139 the default schema for the connection is used (which may be `None`). 

140 writeable : `bool`, optional 

141 If `True`, allow write operations on the database, including 

142 ``CREATE TABLE``. 

143 

144 Notes 

145 ----- 

146 The case where ``namespace is not None`` is not yet tested, and may be 

147 broken; we need an API for attaching to different databases in order to 

148 write those tests, but haven't yet worked out what is common/different 

149 across databases well enough to define it. 

150 """ 

151 

152 def __init__(self, *, engine: sqlalchemy.engine.Engine, origin: int, 

153 namespace: Optional[str] = None, writeable: bool = True): 

154 super().__init__(origin=origin, engine=engine, namespace=namespace) 

155 # Get the filename from a call to 'PRAGMA database_list'. 

156 with engine.connect() as connection: 

157 with closing(connection.connection.cursor()) as cursor: 

158 dbList = list(cursor.execute("PRAGMA database_list").fetchall()) 

159 if len(dbList) == 0: 

160 raise RuntimeError("No database in connection.") 

161 if namespace is None: 

162 namespace = "main" 

163 for _, dbname, filename in dbList: 

164 if dbname == namespace: 

165 break 

166 else: 

167 raise RuntimeError(f"No '{namespace}' database in connection.") 

168 if not filename: 

169 self.filename = None 

170 else: 

171 self.filename = filename 

172 self._writeable = writeable 

173 self._autoincr: Dict[str, _AutoincrementCompoundKeyWorkaround] = {} 

174 

175 @classmethod 

176 def makeDefaultUri(cls, root: str) -> Optional[str]: 

177 return "sqlite:///" + os.path.join(root, "gen3.sqlite3") 

178 

179 @classmethod 

180 def makeEngine(cls, uri: Optional[str] = None, *, filename: Optional[str] = None, 

181 writeable: bool = True) -> sqlalchemy.engine.Engine: 

182 """Create a `sqlalchemy.engine.Engine` from a SQLAlchemy URI or 

183 filename. 

184 

185 Parameters 

186 ---------- 

187 uri : `str` 

188 A SQLAlchemy URI connection string. 

189 filename : `str` 

190 Name of the SQLite database file, or `None` to use an in-memory 

191 database. Ignored if ``uri is not None``. 

192 writeable : `bool`, optional 

193 If `True`, allow write operations on the database, including 

194 ``CREATE TABLE``. 

195 

196 Returns 

197 ------- 

198 engine : `sqlalchemy.engine.Engine` 

199 A database engine. 

200 """ 

201 # In order to be able to tell SQLite that we want a read-only or 

202 # read-write connection, we need to make the SQLite DBAPI connection 

203 # with a "URI"-based connection string. SQLAlchemy claims it can do 

204 # this 

205 # (https://docs.sqlalchemy.org/en/13/dialects/sqlite.html#uri-connections), 

206 # but it doesn't seem to work as advertised. To work around this, we 

207 # use the 'creator' argument to sqlalchemy.engine.create_engine, which 

208 # lets us pass a callable that creates the DBAPI connection. 

209 if uri is None: 

210 if filename is None: 

211 target = ":memory:" 

212 uri = "sqlite://" 

213 else: 

214 target = f"file:{filename}" 

215 uri = f"sqlite:///{filename}" 

216 else: 

217 parsed = urllib.parse.urlparse(uri) 

218 queries = parsed.query.split("&") 

219 if "uri=true" in queries: 

220 # This is a SQLAlchemy URI that is already trying to make a 

221 # SQLite connection via a SQLite URI, and hence there may 

222 # be URI components for both SQLite and SQLAlchemy. We 

223 # don't need to support that, and it'd be a 

224 # reimplementation of all of the (broken) logic in 

225 # SQLAlchemy for doing this, so we just don't. 

226 raise NotImplementedError("SQLite connection strings with 'uri=true' are not supported.") 

227 # This is just a SQLAlchemy URI with a non-URI SQLite 

228 # connection string inside it. Pull that out so we can use it 

229 # in the creator call. 

230 if parsed.path.startswith("/"): 

231 filename = parsed.path[1:] 

232 target = f"file:{filename}" 

233 else: 

234 filename = None 

235 target = ":memory:" 

236 if filename is None: 

237 if not writeable: 

238 raise NotImplementedError("Read-only :memory: databases are not supported.") 

239 else: 

240 if writeable: 

241 target += '?mode=rwc&uri=true' 

242 else: 

243 target += '?mode=ro&uri=true' 

244 

245 def creator() -> sqlite3.Connection: 

246 return sqlite3.connect(target, check_same_thread=False, uri=True) 

247 

248 engine = sqlalchemy.engine.create_engine(uri, creator=creator) 

249 

250 sqlalchemy.event.listen(engine, "connect", _onSqlite3Connect) 

251 sqlalchemy.event.listen(engine, "begin", _onSqlite3Begin) 

252 try: 

253 return engine 

254 except sqlalchemy.exc.OperationalError as err: 

255 raise RuntimeError(f"Error creating connection with uri='{uri}', filename='{filename}', " 

256 f"target={target}.") from err 

257 

258 @classmethod 

259 def fromEngine(cls, engine: sqlalchemy.engine.Engine, *, origin: int, 

260 namespace: Optional[str] = None, writeable: bool = True) -> Database: 

261 return cls(engine=engine, origin=origin, writeable=writeable, namespace=namespace) 

262 

263 def isWriteable(self) -> bool: 

264 return self._writeable 

265 

266 def __str__(self) -> str: 

267 if self.filename: 

268 return f"SQLite3@{self.filename}" 

269 else: 

270 return "SQLite3@:memory:" 

271 

272 def _lockTables(self, connection: sqlalchemy.engine.Connection, 

273 tables: Iterable[sqlalchemy.schema.Table] = ()) -> None: 

274 # Docstring inherited. 

275 # Our SQLite database always acquires full-database locks at the 

276 # beginning of a transaction, so there's no need to acquire table-level 

277 # locks - which is good, because SQLite doesn't have table-level 

278 # locking. 

279 pass 

280 

281 # MyPy claims that the return type here isn't covariant with the return 

282 # type of the base class method, which is formally correct but irrelevant 

283 # - the base class return type is _GeneratorContextManager, but only 

284 # because it's generated by the contextmanager decorator. 

285 def declareStaticTables(self, *, create: bool) -> ContextManager[StaticTablesContext]: # type: ignore 

286 # If the user asked for an in-memory, writeable database, then we may 

287 # need to re-create schema even if create=False because schema can be 

288 # lost on re-connect. This is only really relevant for tests, and it's 

289 # convenient there. 

290 if self.filename is None and self.isWriteable(): 

291 inspector = sqlalchemy.inspect(self._engine) 

292 tables = inspector.get_table_names(schema=self.namespace) 

293 if not tables: 

294 create = True 

295 return super().declareStaticTables(create=create) 

296 

297 def _convertFieldSpec(self, table: str, spec: ddl.FieldSpec, metadata: sqlalchemy.MetaData, 

298 **kwargs: Any) -> sqlalchemy.schema.Column: 

299 if spec.autoincrement: 

300 if not spec.primaryKey: 

301 raise RuntimeError(f"Autoincrement field {table}.{spec.name} that is not a " 

302 f"primary key is not supported.") 

303 if spec.dtype != sqlalchemy.Integer: 

304 # SQLite's autoincrement is really limited; it only works if 

305 # the column type is exactly "INTEGER". But it also doesn't 

306 # care about the distinctions between different integer types, 

307 # so it's safe to change it. 

308 spec = copy.copy(spec) 

309 spec.dtype = sqlalchemy.Integer 

310 return super()._convertFieldSpec(table, spec, metadata, **kwargs) 

311 

312 def _makeColumnConstraints(self, table: str, spec: ddl.FieldSpec) -> List[sqlalchemy.CheckConstraint]: 

313 # For sqlite we force constraints on all string columns since sqlite 

314 # ignores everything otherwise and this leads to problems with 

315 # other databases. 

316 

317 constraints = [] 

318 if spec.isStringType(): 

319 name = self.shrinkDatabaseEntityName("_".join([table, "len", spec.name])) 

320 constraints.append(sqlalchemy.CheckConstraint(f"length({spec.name})<={spec.length}" 

321 # Oracle converts 

322 # empty strings to 

323 # NULL so check 

324 f" AND length({spec.name})>=1", 

325 name=name)) 

326 

327 constraints.extend(super()._makeColumnConstraints(table, spec)) 

328 return constraints 

329 

330 def _convertTableSpec(self, name: str, spec: ddl.TableSpec, metadata: sqlalchemy.MetaData, 

331 **kwargs: Any) -> sqlalchemy.schema.Table: 

332 primaryKeyFieldNames = set(field.name for field in spec.fields if field.primaryKey) 

333 autoincrFieldNames = set(field.name for field in spec.fields if field.autoincrement) 

334 if len(autoincrFieldNames) > 1: 

335 raise RuntimeError("At most one autoincrement field per table is allowed.") 

336 if len(primaryKeyFieldNames) > 1 and len(autoincrFieldNames) > 0: 

337 # SQLite's default rowid-based autoincrement doesn't work if the 

338 # field is just one field in a compound primary key. As a 

339 # workaround, we create an extra table with just one column that 

340 # we'll insert into to generate those IDs. That's only safe if 

341 # that single-column table's records are already unique with just 

342 # the autoincrement field, not the rest of the primary key. In 

343 # practice, that means the single-column table's records are those 

344 # for which origin == self.origin. 

345 autoincrFieldName, = autoincrFieldNames 

346 otherPrimaryKeyFieldNames = primaryKeyFieldNames - autoincrFieldNames 

347 if otherPrimaryKeyFieldNames != {"origin"}: 

348 # We need the only other field in the key to be 'origin'. 

349 raise NotImplementedError( 

350 "Compound primary keys with an autoincrement are only supported in SQLite " 

351 "if the only non-autoincrement primary key field is 'origin'." 

352 ) 

353 self._autoincr[name] = _AutoincrementCompoundKeyWorkaround( 

354 table=self._convertTableSpec(f"_autoinc_{name}", _AUTOINCR_TABLE_SPEC, metadata, **kwargs), 

355 column=autoincrFieldName 

356 ) 

357 if not spec.recycleIds: 

358 kwargs = dict(kwargs, sqlite_autoincrement=True) 

359 return super()._convertTableSpec(name, spec, metadata, **kwargs) 

360 

361 def insert(self, table: sqlalchemy.schema.Table, *rows: dict, returnIds: bool = False, 

362 select: Optional[sqlalchemy.sql.Select] = None, 

363 names: Optional[Iterable[str]] = None, 

364 ) -> Optional[List[int]]: 

365 self.assertTableWriteable(table, f"Cannot insert into read-only table {table}.") 

366 autoincr = self._autoincr.get(table.name) 

367 if autoincr is not None: 

368 if select is not None: 

369 raise NotImplementedError( 

370 "Cannot do INSERT INTO ... SELECT on a SQLite table with a simulated autoincrement " 

371 "compound primary key" 

372 ) 

373 # This table has a compound primary key that includes an 

374 # autoincrement. That doesn't work natively in SQLite, so we 

375 # insert into a single-column table and use those IDs. 

376 if not rows: 

377 return [] if returnIds else None 

378 if autoincr.column in rows[0]: 

379 # Caller passed the autoincrement key values explicitly in the 

380 # first row. They had better have done the same for all rows, 

381 # or SQLAlchemy would have a problem, even if we didn't. 

382 assert all(autoincr.column in row for row in rows) 

383 # We need to insert only the values that correspond to 

384 # ``origin == self.origin`` into the single-column table, to 

385 # make sure we don't generate conflicting keys there later. 

386 rowsForAutoincrTable = [dict(id=row[autoincr.column]) 

387 for row in rows if row["origin"] == self.origin] 

388 # Insert into the autoincr table and the target table inside 

389 # a transaction. The main-table insertion can take care of 

390 # returnIds for us. 

391 with self.transaction(), self._connection() as connection: 

392 connection.execute(autoincr.table.insert(), rowsForAutoincrTable) 

393 return super().insert(table, *rows, returnIds=returnIds) 

394 else: 

395 # Caller did not pass autoincrement key values on the first 

396 # row. Make sure they didn't ever do that, and also make 

397 # sure the origin that was passed in is always self.origin, 

398 # because we can't safely generate autoincrement values 

399 # otherwise. 

400 assert all(autoincr.column not in row and row["origin"] == self.origin for row in rows) 

401 # Insert into the autoincr table one by one to get the 

402 # primary key values back, then insert into the target table 

403 # in the same transaction. 

404 with self.transaction(): 

405 newRows = [] 

406 ids = [] 

407 for row in rows: 

408 newRow = row.copy() 

409 with self._connection() as connection: 

410 id = connection.execute(autoincr.table.insert()).inserted_primary_key[0] 

411 newRow[autoincr.column] = id 

412 newRows.append(newRow) 

413 ids.append(id) 

414 # Don't ever ask to returnIds here, because we've already 

415 # got them. 

416 super().insert(table, *newRows) 

417 if returnIds: 

418 return ids 

419 else: 

420 return None 

421 else: 

422 return super().insert(table, *rows, select=select, names=names, returnIds=returnIds) 

423 

424 def replace(self, table: sqlalchemy.schema.Table, *rows: dict) -> None: 

425 self.assertTableWriteable(table, f"Cannot replace into read-only table {table}.") 

426 if not rows: 

427 return 

428 if table.name in self._autoincr: 

429 raise NotImplementedError( 

430 "replace does not support compound primary keys with autoincrement fields." 

431 ) 

432 with self._connection() as connection: 

433 connection.execute(_Replace(table), rows) 

434 

435 def ensure(self, table: sqlalchemy.schema.Table, *rows: dict) -> int: 

436 self.assertTableWriteable(table, f"Cannot ensure into read-only table {table}.") 

437 if not rows: 

438 return 0 

439 if table.name in self._autoincr: 

440 raise NotImplementedError( 

441 "ensure does not support compound primary keys with autoincrement fields." 

442 ) 

443 with self._connection() as connection: 

444 return connection.execute(_Ensure(table), rows).rowcount 

445 

446 filename: Optional[str] 

447 """Name of the file this database is connected to (`str` or `None`). 

448 

449 Set to `None` for in-memory databases. 

450 """