Coverage for python/lsst/daf/butler/registry/databases/sqlite.py: 20%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

197 statements  

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ["SqliteDatabase"] 

24 

25import copy 

26import os 

27import sqlite3 

28import urllib.parse 

29from contextlib import closing 

30from dataclasses import dataclass 

31from typing import Any, ContextManager, Dict, Iterable, List, Optional 

32 

33import sqlalchemy 

34import sqlalchemy.ext.compiler 

35 

36from ...core import ddl 

37from ..interfaces import Database, StaticTablesContext 

38 

39 

40def _onSqlite3Connect( 

41 dbapiConnection: sqlite3.Connection, connectionRecord: sqlalchemy.pool._ConnectionRecord 

42) -> None: 

43 assert isinstance(dbapiConnection, sqlite3.Connection) 

44 # Prevent pysqlite from emitting BEGIN and COMMIT statements. 

45 dbapiConnection.isolation_level = None 

46 # Enable foreign keys 

47 with closing(dbapiConnection.cursor()) as cursor: 

48 cursor.execute("PRAGMA foreign_keys=ON;") 

49 cursor.execute("PRAGMA busy_timeout = 300000;") # in ms, so 5min (way longer than should be needed) 

50 

51 

52def _onSqlite3Begin(connection: sqlalchemy.engine.Connection) -> sqlalchemy.engine.Connection: 

53 assert connection.dialect.name == "sqlite" 

54 # Replace pysqlite's buggy transaction handling that never BEGINs with our 

55 # own that does, and tell SQLite to try to acquire a lock as soon as we 

56 # start a transaction (this should lead to more blocking and fewer 

57 # deadlocks). 

58 connection.execute(sqlalchemy.text("BEGIN IMMEDIATE")) 

59 return connection 

60 

61 

62class _Replace(sqlalchemy.sql.Insert): 

63 """A SQLAlchemy query that compiles to INSERT ... ON CONFLICT REPLACE 

64 on the primary key constraint for the table. 

65 """ 

66 

67 inherit_cache = True # make it cacheable 

68 

69 

70# SQLite and PostgreSQL use similar syntax for their ON CONFLICT extension, 

71# but SQLAlchemy only knows about PostgreSQL's, so we have to compile some 

72# custom text SQL ourselves. 

73 

74# Hard to infer what types these should be from SQLAlchemy docs; just disable 

75# static typing by calling everything "Any". 

76@sqlalchemy.ext.compiler.compiles(_Replace, "sqlite") 

77def _replace(insert: Any, compiler: Any, **kwargs: Any) -> Any: 

78 """Generate an INSERT ... ON CONFLICT REPLACE query.""" 

79 result = compiler.visit_insert(insert, **kwargs) 

80 preparer = compiler.preparer 

81 pk_columns = ", ".join([preparer.format_column(col) for col in insert.table.primary_key]) 

82 result += f" ON CONFLICT ({pk_columns})" 

83 columns = [ 

84 preparer.format_column(col) 

85 for col in insert.table.columns 

86 if col.name not in insert.table.primary_key 

87 ] 

88 updates = ", ".join([f"{col} = excluded.{col}" for col in columns]) 

89 result += f" DO UPDATE SET {updates}" 

90 return result 

91 

92 

93class _Ensure(sqlalchemy.sql.Insert): 

94 """A SQLAlchemy query that compiles to 

95 ``INSERT ... ON CONFLICT DO NOTHING``. 

96 """ 

97 

98 inherit_cache = True # make it cacheable 

99 

100 

101@sqlalchemy.ext.compiler.compiles(_Ensure, "sqlite") 

102def _ensure(insert: Any, compiler: Any, **kwargs: Any) -> Any: 

103 """Generate an INSERT ... ON CONFLICT DO NOTHING query.""" 

104 result = compiler.visit_insert(insert, **kwargs) 

105 result += " ON CONFLICT DO NOTHING" 

106 return result 

107 

108 

109_AUTOINCR_TABLE_SPEC = ddl.TableSpec( 

110 fields=[ddl.FieldSpec(name="id", dtype=sqlalchemy.Integer, primaryKey=True)] 

111) 

112 

113 

114@dataclass 

115class _AutoincrementCompoundKeyWorkaround: 

116 """A workaround for SQLite's lack of support for compound primary keys that 

117 include an autoincrement field. 

118 """ 

119 

120 table: sqlalchemy.schema.Table 

121 """A single-column internal table that can be inserted into to yield 

122 autoincrement values (`sqlalchemy.schema.Table`). 

123 """ 

124 

125 column: str 

126 """The name of the column in the original table that needs to be populated 

127 with values from the internal table (`str`). 

128 """ 

129 

130 

131class SqliteDatabase(Database): 

132 """An implementation of the `Database` interface for SQLite3. 

133 

134 Parameters 

135 ---------- 

136 connection : `sqlalchemy.engine.Connection` 

137 An existing connection created by a previous call to `connect`. 

138 origin : `int` 

139 An integer ID that should be used as the default for any datasets, 

140 quanta, or other entities that use a (autoincrement, origin) compound 

141 primary key. 

142 namespace : `str`, optional 

143 The namespace (schema) this database is associated with. If `None`, 

144 the default schema for the connection is used (which may be `None`). 

145 writeable : `bool`, optional 

146 If `True`, allow write operations on the database, including 

147 ``CREATE TABLE``. 

148 

149 Notes 

150 ----- 

151 The case where ``namespace is not None`` is not yet tested, and may be 

152 broken; we need an API for attaching to different databases in order to 

153 write those tests, but haven't yet worked out what is common/different 

154 across databases well enough to define it. 

155 """ 

156 

157 def __init__( 

158 self, 

159 *, 

160 engine: sqlalchemy.engine.Engine, 

161 origin: int, 

162 namespace: Optional[str] = None, 

163 writeable: bool = True, 

164 ): 

165 super().__init__(origin=origin, engine=engine, namespace=namespace) 

166 # Get the filename from a call to 'PRAGMA database_list'. 

167 with engine.connect() as connection: 

168 with closing(connection.connection.cursor()) as cursor: 

169 dbList = list(cursor.execute("PRAGMA database_list").fetchall()) 

170 if len(dbList) == 0: 

171 raise RuntimeError("No database in connection.") 

172 if namespace is None: 

173 namespace = "main" 

174 for _, dbname, filename in dbList: 

175 if dbname == namespace: 

176 break 

177 else: 

178 raise RuntimeError(f"No '{namespace}' database in connection.") 

179 if not filename: 

180 self.filename = None 

181 else: 

182 self.filename = filename 

183 self._writeable = writeable 

184 self._autoincr: Dict[str, _AutoincrementCompoundKeyWorkaround] = {} 

185 

186 @classmethod 

187 def makeDefaultUri(cls, root: str) -> Optional[str]: 

188 return "sqlite:///" + os.path.join(root, "gen3.sqlite3") 

189 

190 @classmethod 

191 def makeEngine( 

192 cls, uri: Optional[str] = None, *, filename: Optional[str] = None, writeable: bool = True 

193 ) -> sqlalchemy.engine.Engine: 

194 """Create a `sqlalchemy.engine.Engine` from a SQLAlchemy URI or 

195 filename. 

196 

197 Parameters 

198 ---------- 

199 uri : `str` 

200 A SQLAlchemy URI connection string. 

201 filename : `str` 

202 Name of the SQLite database file, or `None` to use an in-memory 

203 database. Ignored if ``uri is not None``. 

204 writeable : `bool`, optional 

205 If `True`, allow write operations on the database, including 

206 ``CREATE TABLE``. 

207 

208 Returns 

209 ------- 

210 engine : `sqlalchemy.engine.Engine` 

211 A database engine. 

212 """ 

213 # In order to be able to tell SQLite that we want a read-only or 

214 # read-write connection, we need to make the SQLite DBAPI connection 

215 # with a "URI"-based connection string. SQLAlchemy claims it can do 

216 # this 

217 # (https://docs.sqlalchemy.org/en/13/dialects/sqlite.html#uri-connections), 

218 # but it doesn't seem to work as advertised. To work around this, we 

219 # use the 'creator' argument to sqlalchemy.engine.create_engine, which 

220 # lets us pass a callable that creates the DBAPI connection. 

221 if uri is None: 

222 if filename is None: 

223 target = ":memory:" 

224 uri = "sqlite://" 

225 else: 

226 target = f"file:{filename}" 

227 uri = f"sqlite:///{filename}" 

228 else: 

229 parsed = urllib.parse.urlparse(uri) 

230 queries = parsed.query.split("&") 

231 if "uri=true" in queries: 

232 # This is a SQLAlchemy URI that is already trying to make a 

233 # SQLite connection via a SQLite URI, and hence there may 

234 # be URI components for both SQLite and SQLAlchemy. We 

235 # don't need to support that, and it'd be a 

236 # reimplementation of all of the (broken) logic in 

237 # SQLAlchemy for doing this, so we just don't. 

238 raise NotImplementedError("SQLite connection strings with 'uri=true' are not supported.") 

239 # This is just a SQLAlchemy URI with a non-URI SQLite 

240 # connection string inside it. Pull that out so we can use it 

241 # in the creator call. 

242 if parsed.path.startswith("/"): 

243 filename = parsed.path[1:] 

244 target = f"file:{filename}" 

245 else: 

246 filename = None 

247 target = ":memory:" 

248 if filename is None: 

249 if not writeable: 

250 raise NotImplementedError("Read-only :memory: databases are not supported.") 

251 else: 

252 if writeable: 

253 target += "?mode=rwc&uri=true" 

254 else: 

255 target += "?mode=ro&uri=true" 

256 

257 def creator() -> sqlite3.Connection: 

258 return sqlite3.connect(target, check_same_thread=False, uri=True) 

259 

260 engine = sqlalchemy.engine.create_engine(uri, creator=creator) 

261 

262 sqlalchemy.event.listen(engine, "connect", _onSqlite3Connect) 

263 sqlalchemy.event.listen(engine, "begin", _onSqlite3Begin) 

264 try: 

265 return engine 

266 except sqlalchemy.exc.OperationalError as err: 

267 raise RuntimeError( 

268 f"Error creating connection with uri='{uri}', filename='{filename}', target={target}." 

269 ) from err 

270 

271 @classmethod 

272 def fromEngine( 

273 cls, 

274 engine: sqlalchemy.engine.Engine, 

275 *, 

276 origin: int, 

277 namespace: Optional[str] = None, 

278 writeable: bool = True, 

279 ) -> Database: 

280 return cls(engine=engine, origin=origin, writeable=writeable, namespace=namespace) 

281 

282 def isWriteable(self) -> bool: 

283 return self._writeable 

284 

285 def __str__(self) -> str: 

286 if self.filename: 

287 return f"SQLite3@{self.filename}" 

288 else: 

289 return "SQLite3@:memory:" 

290 

291 def _lockTables( 

292 self, connection: sqlalchemy.engine.Connection, tables: Iterable[sqlalchemy.schema.Table] = () 

293 ) -> None: 

294 # Docstring inherited. 

295 # Our SQLite database always acquires full-database locks at the 

296 # beginning of a transaction, so there's no need to acquire table-level 

297 # locks - which is good, because SQLite doesn't have table-level 

298 # locking. 

299 pass 

300 

301 # MyPy claims that the return type here isn't covariant with the return 

302 # type of the base class method, which is formally correct but irrelevant 

303 # - the base class return type is _GeneratorContextManager, but only 

304 # because it's generated by the contextmanager decorator. 

305 def declareStaticTables(self, *, create: bool) -> ContextManager[StaticTablesContext]: # type: ignore 

306 # If the user asked for an in-memory, writeable database, then we may 

307 # need to re-create schema even if create=False because schema can be 

308 # lost on re-connect. This is only really relevant for tests, and it's 

309 # convenient there. 

310 if self.filename is None and self.isWriteable(): 

311 inspector = sqlalchemy.inspect(self._engine) 

312 tables = inspector.get_table_names(schema=self.namespace) 

313 if not tables: 

314 create = True 

315 return super().declareStaticTables(create=create) 

316 

317 def _convertFieldSpec( 

318 self, table: str, spec: ddl.FieldSpec, metadata: sqlalchemy.MetaData, **kwargs: Any 

319 ) -> sqlalchemy.schema.Column: 

320 if spec.autoincrement: 

321 if not spec.primaryKey: 

322 raise RuntimeError( 

323 f"Autoincrement field {table}.{spec.name} that is not a primary key is not supported." 

324 ) 

325 if spec.dtype != sqlalchemy.Integer: 

326 # SQLite's autoincrement is really limited; it only works if 

327 # the column type is exactly "INTEGER". But it also doesn't 

328 # care about the distinctions between different integer types, 

329 # so it's safe to change it. 

330 spec = copy.copy(spec) 

331 spec.dtype = sqlalchemy.Integer 

332 return super()._convertFieldSpec(table, spec, metadata, **kwargs) 

333 

334 def _makeColumnConstraints(self, table: str, spec: ddl.FieldSpec) -> List[sqlalchemy.CheckConstraint]: 

335 # For sqlite we force constraints on all string columns since sqlite 

336 # ignores everything otherwise and this leads to problems with 

337 # other databases. 

338 

339 constraints = [] 

340 if spec.isStringType(): 

341 name = self.shrinkDatabaseEntityName("_".join([table, "len", spec.name])) 

342 constraints.append( 

343 sqlalchemy.CheckConstraint( 

344 f"length({spec.name})<={spec.length}" 

345 # Oracle converts 

346 # empty strings to 

347 # NULL so check 

348 f" AND length({spec.name})>=1", 

349 name=name, 

350 ) 

351 ) 

352 

353 constraints.extend(super()._makeColumnConstraints(table, spec)) 

354 return constraints 

355 

356 def _convertTableSpec( 

357 self, name: str, spec: ddl.TableSpec, metadata: sqlalchemy.MetaData, **kwargs: Any 

358 ) -> sqlalchemy.schema.Table: 

359 primaryKeyFieldNames = set(field.name for field in spec.fields if field.primaryKey) 

360 autoincrFieldNames = set(field.name for field in spec.fields if field.autoincrement) 

361 if len(autoincrFieldNames) > 1: 

362 raise RuntimeError("At most one autoincrement field per table is allowed.") 

363 if len(primaryKeyFieldNames) > 1 and len(autoincrFieldNames) > 0: 

364 # SQLite's default rowid-based autoincrement doesn't work if the 

365 # field is just one field in a compound primary key. As a 

366 # workaround, we create an extra table with just one column that 

367 # we'll insert into to generate those IDs. That's only safe if 

368 # that single-column table's records are already unique with just 

369 # the autoincrement field, not the rest of the primary key. In 

370 # practice, that means the single-column table's records are those 

371 # for which origin == self.origin. 

372 (autoincrFieldName,) = autoincrFieldNames 

373 otherPrimaryKeyFieldNames = primaryKeyFieldNames - autoincrFieldNames 

374 if otherPrimaryKeyFieldNames != {"origin"}: 

375 # We need the only other field in the key to be 'origin'. 

376 raise NotImplementedError( 

377 "Compound primary keys with an autoincrement are only supported in SQLite " 

378 "if the only non-autoincrement primary key field is 'origin'." 

379 ) 

380 self._autoincr[name] = _AutoincrementCompoundKeyWorkaround( 

381 table=self._convertTableSpec(f"_autoinc_{name}", _AUTOINCR_TABLE_SPEC, metadata, **kwargs), 

382 column=autoincrFieldName, 

383 ) 

384 if not spec.recycleIds: 

385 kwargs = dict(kwargs, sqlite_autoincrement=True) 

386 return super()._convertTableSpec(name, spec, metadata, **kwargs) 

387 

388 def insert( 

389 self, 

390 table: sqlalchemy.schema.Table, 

391 *rows: dict, 

392 returnIds: bool = False, 

393 select: Optional[sqlalchemy.sql.Select] = None, 

394 names: Optional[Iterable[str]] = None, 

395 ) -> Optional[List[int]]: 

396 self.assertTableWriteable(table, f"Cannot insert into read-only table {table}.") 

397 autoincr = self._autoincr.get(table.name) 

398 if autoincr is not None: 

399 if select is not None: 

400 raise NotImplementedError( 

401 "Cannot do INSERT INTO ... SELECT on a SQLite table with a simulated autoincrement " 

402 "compound primary key" 

403 ) 

404 # This table has a compound primary key that includes an 

405 # autoincrement. That doesn't work natively in SQLite, so we 

406 # insert into a single-column table and use those IDs. 

407 if not rows: 

408 return [] if returnIds else None 

409 if autoincr.column in rows[0]: 

410 # Caller passed the autoincrement key values explicitly in the 

411 # first row. They had better have done the same for all rows, 

412 # or SQLAlchemy would have a problem, even if we didn't. 

413 assert all(autoincr.column in row for row in rows) 

414 # We need to insert only the values that correspond to 

415 # ``origin == self.origin`` into the single-column table, to 

416 # make sure we don't generate conflicting keys there later. 

417 rowsForAutoincrTable = [ 

418 dict(id=row[autoincr.column]) for row in rows if row["origin"] == self.origin 

419 ] 

420 # Insert into the autoincr table and the target table inside 

421 # a transaction. The main-table insertion can take care of 

422 # returnIds for us. 

423 with self.transaction(), self._connection() as connection: 

424 connection.execute(autoincr.table.insert(), rowsForAutoincrTable) 

425 return super().insert(table, *rows, returnIds=returnIds) 

426 else: 

427 # Caller did not pass autoincrement key values on the first 

428 # row. Make sure they didn't ever do that, and also make 

429 # sure the origin that was passed in is always self.origin, 

430 # because we can't safely generate autoincrement values 

431 # otherwise. 

432 assert all(autoincr.column not in row and row["origin"] == self.origin for row in rows) 

433 # Insert into the autoincr table one by one to get the 

434 # primary key values back, then insert into the target table 

435 # in the same transaction. 

436 with self.transaction(): 

437 newRows = [] 

438 ids = [] 

439 for row in rows: 

440 newRow = row.copy() 

441 with self._connection() as connection: 

442 id = connection.execute(autoincr.table.insert()).inserted_primary_key[0] 

443 newRow[autoincr.column] = id 

444 newRows.append(newRow) 

445 ids.append(id) 

446 # Don't ever ask to returnIds here, because we've already 

447 # got them. 

448 super().insert(table, *newRows) 

449 if returnIds: 

450 return ids 

451 else: 

452 return None 

453 else: 

454 return super().insert(table, *rows, select=select, names=names, returnIds=returnIds) 

455 

456 def replace(self, table: sqlalchemy.schema.Table, *rows: dict) -> None: 

457 self.assertTableWriteable(table, f"Cannot replace into read-only table {table}.") 

458 if not rows: 

459 return 

460 if table.name in self._autoincr: 

461 raise NotImplementedError( 

462 "replace does not support compound primary keys with autoincrement fields." 

463 ) 

464 with self._connection() as connection: 

465 connection.execute(_Replace(table), rows) 

466 

467 def ensure(self, table: sqlalchemy.schema.Table, *rows: dict) -> int: 

468 self.assertTableWriteable(table, f"Cannot ensure into read-only table {table}.") 

469 if not rows: 

470 return 0 

471 if table.name in self._autoincr: 

472 raise NotImplementedError( 

473 "ensure does not support compound primary keys with autoincrement fields." 

474 ) 

475 with self._connection() as connection: 

476 return connection.execute(_Ensure(table), rows).rowcount 

477 

478 filename: Optional[str] 

479 """Name of the file this database is connected to (`str` or `None`). 

480 

481 Set to `None` for in-memory databases. 

482 """