Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ["SqliteDatabase"] 

24 

25from contextlib import closing 

26import copy 

27from typing import Any, ContextManager, Dict, List, Optional 

28from dataclasses import dataclass 

29import os 

30import urllib.parse 

31 

32import sqlite3 

33import sqlalchemy 

34import sqlalchemy.ext.compiler 

35 

36from ..interfaces import Database, ReadOnlyDatabaseError, StaticTablesContext 

37from ...core import ddl 

38 

39 

40def _onSqlite3Connect(dbapiConnection: sqlite3.Connection, 

41 connectionRecord: sqlalchemy.pool._ConnectionRecord) -> None: 

42 assert isinstance(dbapiConnection, sqlite3.Connection) 

43 # Prevent pysqlite from emitting BEGIN and COMMIT statements. 

44 dbapiConnection.isolation_level = None 

45 # Enable foreign keys 

46 with closing(dbapiConnection.cursor()) as cursor: 

47 cursor.execute("PRAGMA foreign_keys=ON;") 

48 cursor.execute("PRAGMA busy_timeout = 300000;") # in ms, so 5min (way longer than should be needed) 

49 

50 

51def _onSqlite3Begin(connection: sqlalchemy.engine.Connection) -> sqlalchemy.engine.Connection: 

52 assert connection.dialect.name == "sqlite" 

53 # Replace pysqlite's buggy transaction handling that never BEGINs with our 

54 # own that does, and tell SQLite to try to acquire a lock as soon as we 

55 # start a transaction (this should lead to more blocking and fewer 

56 # deadlocks). 

57 connection.execute("BEGIN IMMEDIATE") 

58 return connection 

59 

60 

61class _Replace(sqlalchemy.sql.Insert): 

62 """A SQLAlchemy query that compiles to INSERT ... ON CONFLICT REPLACE 

63 on the primary key constraint for the table. 

64 """ 

65 pass 

66 

67 

68# Hard to infer what types these should be from SQLAlchemy docs; just disable 

69# static typing by calling everything "Any". 

70@sqlalchemy.ext.compiler.compiles(_Replace, "sqlite") 

71def _replace(insert: Any, compiler: Any, **kwargs: Any) -> Any: 

72 """Generate an INSERT ... ON CONFLICT REPLACE query. 

73 """ 

74 # SQLite and PostgreSQL use similar syntax for their ON CONFLICT extension, 

75 # but SQLAlchemy only knows about PostgreSQL's, so we have to compile some 

76 # custom text SQL ourselves. 

77 result = compiler.visit_insert(insert, **kwargs) 

78 preparer = compiler.preparer 

79 pk_columns = ", ".join([preparer.format_column(col) for col in insert.table.primary_key]) 

80 result += f" ON CONFLICT ({pk_columns})" 

81 columns = [preparer.format_column(col) for col in insert.table.columns 

82 if col.name not in insert.table.primary_key] 

83 updates = ", ".join([f"{col} = excluded.{col}" for col in columns]) 

84 result += f" DO UPDATE SET {updates}" 

85 return result 

86 

87 

88_AUTOINCR_TABLE_SPEC = ddl.TableSpec( 

89 fields=[ddl.FieldSpec(name="id", dtype=sqlalchemy.Integer, primaryKey=True)] 

90) 

91 

92 

93@dataclass 

94class _AutoincrementCompoundKeyWorkaround: 

95 """A workaround for SQLite's lack of support for compound primary keys that 

96 include an autoincrement field. 

97 """ 

98 

99 table: sqlalchemy.schema.Table 

100 """A single-column internal table that can be inserted into to yield 

101 autoincrement values (`sqlalchemy.schema.Table`). 

102 """ 

103 

104 column: str 

105 """The name of the column in the original table that needs to be populated 

106 with values from the internal table (`str`). 

107 """ 

108 

109 

110class SqliteDatabase(Database): 

111 """An implementation of the `Database` interface for SQLite3. 

112 

113 Parameters 

114 ---------- 

115 connection : `sqlalchemy.engine.Connection` 

116 An existing connection created by a previous call to `connect`. 

117 origin : `int` 

118 An integer ID that should be used as the default for any datasets, 

119 quanta, or other entities that use a (autoincrement, origin) compound 

120 primary key. 

121 namespace : `str`, optional 

122 The namespace (schema) this database is associated with. If `None`, 

123 the default schema for the connection is used (which may be `None`). 

124 writeable : `bool`, optional 

125 If `True`, allow write operations on the database, including 

126 ``CREATE TABLE``. 

127 

128 Notes 

129 ----- 

130 The case where ``namespace is not None`` is not yet tested, and may be 

131 broken; we need an API for attaching to different databases in order to 

132 write those tests, but haven't yet worked out what is common/different 

133 across databases well enough to define it. 

134 """ 

135 

136 def __init__(self, *, connection: sqlalchemy.engine.Connection, origin: int, 

137 namespace: Optional[str] = None, writeable: bool = True): 

138 super().__init__(origin=origin, connection=connection, namespace=namespace) 

139 # Get the filename from a call to 'PRAGMA database_list'. 

140 with closing(connection.connection.cursor()) as cursor: 

141 dbList = list(cursor.execute("PRAGMA database_list").fetchall()) 

142 if len(dbList) == 0: 

143 raise RuntimeError("No database in connection.") 

144 if namespace is None: 

145 namespace = "main" 

146 for _, dbname, filename in dbList: 

147 if dbname == namespace: 

148 break 

149 else: 

150 raise RuntimeError(f"No '{namespace}' database in connection.") 

151 if not filename: 

152 self.filename = None 

153 else: 

154 self.filename = filename 

155 self._writeable = writeable 

156 self._autoincr: Dict[str, _AutoincrementCompoundKeyWorkaround] = {} 

157 

158 @classmethod 

159 def makeDefaultUri(cls, root: str) -> Optional[str]: 

160 return "sqlite:///" + os.path.join(root, "gen3.sqlite3") 

161 

162 @classmethod 

163 def connect(cls, uri: Optional[str] = None, *, filename: Optional[str] = None, 

164 writeable: bool = True) -> sqlalchemy.engine.Connection: 

165 """Create a `sqlalchemy.engine.Connection` from a SQLAlchemy URI or 

166 filename. 

167 

168 Parameters 

169 ---------- 

170 uri : `str` 

171 A SQLAlchemy URI connection string. 

172 filename : `str` 

173 Name of the SQLite database file, or `None` to use an in-memory 

174 database. Ignored if ``uri is not None``. 

175 origin : `int` 

176 An integer ID that should be used as the default for any datasets, 

177 quanta, or other entities that use a (autoincrement, origin) 

178 compound primary key. 

179 writeable : `bool`, optional 

180 If `True`, allow write operations on the database, including 

181 ``CREATE TABLE``. 

182 

183 Returns 

184 ------- 

185 cs : `sqlalchemy.engine.Connection` 

186 A database connection and transaction state. 

187 """ 

188 # In order to be able to tell SQLite that we want a read-only or 

189 # read-write connection, we need to make the SQLite DBAPI connection 

190 # with a "URI"-based connection string. SQLAlchemy claims it can do 

191 # this 

192 # (https://docs.sqlalchemy.org/en/13/dialects/sqlite.html#uri-connections), 

193 # but it doesn't seem to work as advertised. To work around this, we 

194 # use the 'creator' argument to sqlalchemy.engine.create_engine, which 

195 # lets us pass a callable that creates the DBAPI connection. 

196 if uri is None: 

197 if filename is None: 

198 target = ":memory:" 

199 uri = "sqlite://" 

200 else: 

201 target = f"file:{filename}" 

202 uri = f"sqlite:///{filename}" 

203 else: 

204 parsed = urllib.parse.urlparse(uri) 

205 queries = parsed.query.split("&") 

206 if "uri=true" in queries: 

207 # This is a SQLAlchemy URI that is already trying to make a 

208 # SQLite connection via a SQLite URI, and hence there may 

209 # be URI components for both SQLite and SQLAlchemy. We 

210 # don't need to support that, and it'd be a 

211 # reimplementation of all of the (broken) logic in 

212 # SQLAlchemy for doing this, so we just don't. 

213 raise NotImplementedError("SQLite connection strings with 'uri=true' are not supported.") 

214 # This is just a SQLAlchemy URI with a non-URI SQLite 

215 # connection string inside it. Pull that out so we can use it 

216 # in the creator call. 

217 if parsed.path.startswith("/"): 

218 filename = parsed.path[1:] 

219 target = f"file:{filename}" 

220 else: 

221 filename = None 

222 target = ":memory:" 

223 if filename is None: 

224 if not writeable: 

225 raise NotImplementedError("Read-only :memory: databases are not supported.") 

226 else: 

227 if writeable: 

228 target += '?mode=rwc&uri=true' 

229 else: 

230 target += '?mode=ro&uri=true' 

231 

232 def creator() -> sqlite3.Connection: 

233 return sqlite3.connect(target, check_same_thread=False, uri=True) 

234 

235 engine = sqlalchemy.engine.create_engine(uri, poolclass=sqlalchemy.pool.NullPool, 

236 creator=creator) 

237 

238 sqlalchemy.event.listen(engine, "connect", _onSqlite3Connect) 

239 sqlalchemy.event.listen(engine, "begin", _onSqlite3Begin) 

240 try: 

241 return engine.connect() 

242 except sqlalchemy.exc.OperationalError as err: 

243 raise RuntimeError(f"Error creating connection with uri='{uri}', filename='{filename}', " 

244 f"target={target}.") from err 

245 

246 @classmethod 

247 def fromConnection(cls, connection: sqlalchemy.engine.Connection, *, origin: int, 

248 namespace: Optional[str] = None, writeable: bool = True) -> Database: 

249 return cls(connection=connection, origin=origin, writeable=writeable, namespace=namespace) 

250 

251 def isWriteable(self) -> bool: 

252 return self._writeable 

253 

254 def __str__(self) -> str: 

255 if self.filename: 

256 return f"SQLite3@{self.filename}" 

257 else: 

258 return "SQLite3@:memory:" 

259 

260 # MyPy claims that the return type here isn't covariant with the return 

261 # type of the base class method, which is formally correct but irrelevant 

262 # - the base class return type is _GeneratorContextManager, but only 

263 # because it's generated by the contextmanager decorator. 

264 def declareStaticTables(self, *, create: bool) -> ContextManager[StaticTablesContext]: # type: ignore 

265 # If the user asked for an in-memory, writeable database, assume 

266 # create=True. This is only really relevant for tests, and it's 

267 # convenient there. 

268 return super().declareStaticTables(create=(create if self.filename else self.isWriteable())) 

269 

270 def _convertFieldSpec(self, table: str, spec: ddl.FieldSpec, metadata: sqlalchemy.MetaData, 

271 **kwargs: Any) -> sqlalchemy.schema.Column: 

272 if spec.autoincrement: 

273 if not spec.primaryKey: 

274 raise RuntimeError(f"Autoincrement field {table}.{spec.name} that is not a " 

275 f"primary key is not supported.") 

276 if spec.dtype != sqlalchemy.Integer: 

277 # SQLite's autoincrement is really limited; it only works if 

278 # the column type is exactly "INTEGER". But it also doesn't 

279 # care about the distinctions between different integer types, 

280 # so it's safe to change it. 

281 spec = copy.copy(spec) 

282 spec.dtype = sqlalchemy.Integer 

283 return super()._convertFieldSpec(table, spec, metadata, **kwargs) 

284 

285 def _makeColumnConstraints(self, table: str, spec: ddl.FieldSpec) -> List[sqlalchemy.CheckConstraint]: 

286 # For sqlite we force constraints on all string columns since sqlite 

287 # ignores everything otherwise and this leads to problems with 

288 # other databases. 

289 

290 constraints = [] 

291 if spec.dtype == sqlalchemy.String: 

292 name = self.shrinkDatabaseEntityName("_".join([table, "len", spec.name])) 

293 constraints.append(sqlalchemy.CheckConstraint(f"length({spec.name})<={spec.length}" 

294 # Oracle converts 

295 # empty strings to 

296 # NULL so check 

297 f" AND length({spec.name})>=1", 

298 name=name)) 

299 

300 constraints.extend(super()._makeColumnConstraints(table, spec)) 

301 return constraints 

302 

303 def _convertTableSpec(self, name: str, spec: ddl.TableSpec, metadata: sqlalchemy.MetaData, 

304 **kwargs: Any) -> sqlalchemy.schema.Table: 

305 primaryKeyFieldNames = set(field.name for field in spec.fields if field.primaryKey) 

306 autoincrFieldNames = set(field.name for field in spec.fields if field.autoincrement) 

307 if len(autoincrFieldNames) > 1: 

308 raise RuntimeError("At most one autoincrement field per table is allowed.") 

309 if len(primaryKeyFieldNames) > 1 and len(autoincrFieldNames) > 0: 

310 # SQLite's default rowid-based autoincrement doesn't work if the 

311 # field is just one field in a compound primary key. As a 

312 # workaround, we create an extra table with just one column that 

313 # we'll insert into to generate those IDs. That's only safe if 

314 # that single-column table's records are already unique with just 

315 # the autoincrement field, not the rest of the primary key. In 

316 # practice, that means the single-column table's records are those 

317 # for which origin == self.origin. 

318 autoincrFieldName, = autoincrFieldNames 

319 otherPrimaryKeyFieldNames = primaryKeyFieldNames - autoincrFieldNames 

320 if otherPrimaryKeyFieldNames != {"origin"}: 

321 # We need the only other field in the key to be 'origin'. 

322 raise NotImplementedError( 

323 "Compound primary keys with an autoincrement are only supported in SQLite " 

324 "if the only non-autoincrement primary key field is 'origin'." 

325 ) 

326 self._autoincr[name] = _AutoincrementCompoundKeyWorkaround( 

327 table=self._convertTableSpec(f"_autoinc_{name}", _AUTOINCR_TABLE_SPEC, metadata, **kwargs), 

328 column=autoincrFieldName 

329 ) 

330 if not spec.recycleIds: 

331 kwargs = dict(kwargs, sqlite_autoincrement=True) 

332 return super()._convertTableSpec(name, spec, metadata, **kwargs) 

333 

334 def insert(self, table: sqlalchemy.schema.Table, *rows: dict, returnIds: bool = False, 

335 ) -> Optional[List[int]]: 

336 autoincr = self._autoincr.get(table.name) 

337 if autoincr is not None: 

338 # This table has a compound primary key that includes an 

339 # autoincrement. That doesn't work natively in SQLite, so we 

340 # insert into a single-column table and use those IDs. 

341 if not rows: 

342 return [] if returnIds else None 

343 if autoincr.column in rows[0]: 

344 # Caller passed the autoincrement key values explicitly in the 

345 # first row. They had better have done the same for all rows, 

346 # or SQLAlchemy would have a problem, even if we didn't. 

347 assert all(autoincr.column in row for row in rows) 

348 # We need to insert only the values that correspond to 

349 # ``origin == self.origin`` into the single-column table, to 

350 # make sure we don't generate conflicting keys there later. 

351 rowsForAutoincrTable = [dict(id=row[autoincr.column]) 

352 for row in rows if row["origin"] == self.origin] 

353 # Insert into the autoincr table and the target table inside 

354 # a transaction. The main-table insertion can take care of 

355 # returnIds for us. 

356 with self.transaction(): 

357 self._connection.execute(autoincr.table.insert(), *rowsForAutoincrTable) 

358 return super().insert(table, *rows, returnIds=returnIds) 

359 else: 

360 # Caller did not pass autoincrement key values on the first 

361 # row. Make sure they didn't ever do that, and also make 

362 # sure the origin that was passed in is always self.origin, 

363 # because we can't safely generate autoincrement values 

364 # otherwise. 

365 assert all(autoincr.column not in row and row["origin"] == self.origin for row in rows) 

366 # Insert into the autoincr table one by one to get the 

367 # primary key values back, then insert into the target table 

368 # in the same transaction. 

369 with self.transaction(): 

370 newRows = [] 

371 ids = [] 

372 for row in rows: 

373 newRow = row.copy() 

374 id = self._connection.execute(autoincr.table.insert()).inserted_primary_key[0] 

375 newRow[autoincr.column] = id 

376 newRows.append(newRow) 

377 ids.append(id) 

378 # Don't ever ask to returnIds here, because we've already 

379 # got them. 

380 super().insert(table, *newRows) 

381 if returnIds: 

382 return ids 

383 else: 

384 return None 

385 else: 

386 return super().insert(table, *rows, returnIds=returnIds) 

387 

388 def replace(self, table: sqlalchemy.schema.Table, *rows: dict) -> None: 

389 if not self.isWriteable(): 

390 raise ReadOnlyDatabaseError(f"Attempt to replace into read-only database '{self}'.") 

391 if not rows: 

392 return 

393 if table.name in self._autoincr: 

394 raise NotImplementedError( 

395 "replace does not support compound primary keys with autoincrement fields." 

396 ) 

397 self._connection.execute(_Replace(table), *rows) 

398 

399 filename: Optional[str] 

400 """Name of the file this database is connected to (`str` or `None`). 

401 

402 Set to `None` for in-memory databases. 

403 """