Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ["SqliteDatabase"] 

24 

25from contextlib import closing 

26import copy 

27from typing import ContextManager, List, Optional 

28from dataclasses import dataclass 

29import os 

30import urllib.parse 

31 

32import sqlite3 

33import sqlalchemy 

34import sqlalchemy.ext.compiler 

35 

36from ..interfaces import Database, ReadOnlyDatabaseError, StaticTablesContext 

37from ...core import ddl 

38 

39 

40def _onSqlite3Connect(dbapiConnection, connectionRecord): 

41 assert isinstance(dbapiConnection, sqlite3.Connection) 

42 # Prevent pysqlite from emitting BEGIN and COMMIT statements. 

43 dbapiConnection.isolation_level = None 

44 # Enable foreign keys 

45 with closing(dbapiConnection.cursor()) as cursor: 

46 cursor.execute("PRAGMA foreign_keys=ON;") 

47 cursor.execute("PRAGMA busy_timeout = 300000;") # in ms, so 5min (way longer than should be needed) 

48 

49 

50def _onSqlite3Begin(connection): 

51 assert connection.dialect.name == "sqlite" 

52 # Replace pysqlite's buggy transaction handling that never BEGINs with our 

53 # own that does, and tell SQLite to try to acquire a lock as soon as we 

54 # start a transaction (this should lead to more blocking and fewer 

55 # deadlocks). 

56 connection.execute("BEGIN IMMEDIATE") 

57 return connection 

58 

59 

60class _Replace(sqlalchemy.sql.Insert): 

61 """A SQLAlchemy query that compiles to INSERT ... ON CONFLICT REPLACE 

62 on the primary key constraint for the table. 

63 """ 

64 pass 

65 

66 

67@sqlalchemy.ext.compiler.compiles(_Replace, "sqlite") 

68def _replace(insert, compiler, **kw): 

69 """Generate an INSERT ... ON CONFLICT REPLACE query. 

70 """ 

71 # SQLite and PostgreSQL use similar syntax for their ON CONFLICT extension, 

72 # but SQLAlchemy only knows about PostgreSQL's, so we have to compile some 

73 # custom text SQL ourselves. 

74 result = compiler.visit_insert(insert, **kw) 

75 preparer = compiler.preparer 

76 pk_columns = ", ".join([preparer.format_column(col) for col in insert.table.primary_key]) 

77 result += f" ON CONFLICT ({pk_columns})" 

78 columns = [preparer.format_column(col) for col in insert.table.columns 

79 if col.name not in insert.table.primary_key] 

80 updates = ", ".join([f"{col} = excluded.{col}" for col in columns]) 

81 result += f" DO UPDATE SET {updates}" 

82 return result 

83 

84 

85_AUTOINCR_TABLE_SPEC = ddl.TableSpec( 

86 fields=[ddl.FieldSpec(name="id", dtype=sqlalchemy.Integer, primaryKey=True)] 

87) 

88 

89 

90@dataclass 

91class _AutoincrementCompoundKeyWorkaround: 

92 """A workaround for SQLite's lack of support for compound primary keys that 

93 include an autoincrement field. 

94 """ 

95 

96 table: sqlalchemy.schema.Table 

97 """A single-column internal table that can be inserted into to yield 

98 autoincrement values (`sqlalchemy.schema.Table`). 

99 """ 

100 

101 column: str 

102 """The name of the column in the original table that needs to be populated 

103 with values from the internal table (`str`). 

104 """ 

105 

106 

107class SqliteDatabase(Database): 

108 """An implementation of the `Database` interface for SQLite3. 

109 

110 Parameters 

111 ---------- 

112 connection : `sqlalchemy.engine.Connection` 

113 An existing connection created by a previous call to `connect`. 

114 origin : `int` 

115 An integer ID that should be used as the default for any datasets, 

116 quanta, or other entities that use a (autoincrement, origin) compound 

117 primary key. 

118 namespace : `str`, optional 

119 The namespace (schema) this database is associated with. If `None`, 

120 the default schema for the connection is used (which may be `None`). 

121 writeable : `bool`, optional 

122 If `True`, allow write operations on the database, including 

123 ``CREATE TABLE``. 

124 

125 Notes 

126 ----- 

127 The case where ``namespace is not None`` is not yet tested, and may be 

128 broken; we need an API for attaching to different databases in order to 

129 write those tests, but haven't yet worked out what is common/different 

130 across databases well enough to define it. 

131 """ 

132 

133 def __init__(self, *, connection: sqlalchemy.engine.Connection, origin: int, 

134 namespace: Optional[str] = None, writeable: bool = True): 

135 super().__init__(origin=origin, connection=connection, namespace=namespace) 

136 # Get the filename from a call to 'PRAGMA database_list'. 

137 with closing(connection.connection.cursor()) as cursor: 

138 dbList = list(cursor.execute("PRAGMA database_list").fetchall()) 

139 if len(dbList) == 0: 

140 raise RuntimeError("No database in connection.") 

141 if namespace is None: 

142 namespace = "main" 

143 for _, dbname, filename in dbList: 

144 if dbname == namespace: 

145 break 

146 else: 

147 raise RuntimeError(f"No '{namespace}' database in connection.") 

148 if not filename: 

149 self.filename = None 

150 else: 

151 self.filename = filename 

152 self._writeable = writeable 

153 self._autoincr = {} 

154 

155 @classmethod 

156 def makeDefaultUri(cls, root: str) -> Optional[str]: 

157 return "sqlite:///" + os.path.join(root, "gen3.sqlite3") 

158 

159 @classmethod 

160 def connect(cls, uri: Optional[str] = None, *, filename: Optional[str] = None, 

161 writeable: bool = True) -> sqlalchemy.engine.Connection: 

162 """Create a `sqlalchemy.engine.Connection` from a SQLAlchemy URI or 

163 filename. 

164 

165 Parameters 

166 ---------- 

167 uri : `str` 

168 A SQLAlchemy URI connection string. 

169 filename : `str` 

170 Name of the SQLite database file, or `None` to use an in-memory 

171 database. Ignored if ``uri is not None``. 

172 origin : `int` 

173 An integer ID that should be used as the default for any datasets, 

174 quanta, or other entities that use a (autoincrement, origin) 

175 compound primary key. 

176 writeable : `bool`, optional 

177 If `True`, allow write operations on the database, including 

178 ``CREATE TABLE``. 

179 

180 Returns 

181 ------- 

182 cs : `sqlalchemy.engine.Connection` 

183 A database connection and transaction state. 

184 """ 

185 # In order to be able to tell SQLite that we want a read-only or 

186 # read-write connection, we need to make the SQLite DBAPI connection 

187 # with a "URI"-based connection string. SQLAlchemy claims it can do 

188 # this 

189 # (https://docs.sqlalchemy.org/en/13/dialects/sqlite.html#uri-connections), 

190 # but it doesn't seem to work as advertised. To work around this, we 

191 # use the 'creator' argument to sqlalchemy.engine.create_engine, which 

192 # lets us pass a callable that creates the DBAPI connection. 

193 if uri is None: 

194 if filename is None: 

195 target = ":memory:" 

196 uri = "sqlite://" 

197 else: 

198 target = f"file:{filename}" 

199 uri = f"sqlite:///{filename}" 

200 else: 

201 parsed = urllib.parse.urlparse(uri) 

202 queries = parsed.query.split("&") 

203 if "uri=true" in queries: 

204 # This is a SQLAlchemy URI that is already trying to make a 

205 # SQLite connection via a SQLite URI, and hence there may 

206 # be URI components for both SQLite and SQLAlchemy. We 

207 # don't need to support that, and it'd be a 

208 # reimplementation of all of the (broken) logic in 

209 # SQLAlchemy for doing this, so we just don't. 

210 raise NotImplementedError("SQLite connection strings with 'uri=true' are not supported.") 

211 # This is just a SQLAlchemy URI with a non-URI SQLite 

212 # connection string inside it. Pull that out so we can use it 

213 # in the creator call. 

214 if parsed.path.startswith("/"): 

215 filename = parsed.path[1:] 

216 target = f"file:{filename}" 

217 else: 

218 filename = None 

219 target = ":memory:" 

220 if filename is None: 

221 if not writeable: 

222 raise NotImplementedError("Read-only :memory: databases are not supported.") 

223 else: 

224 if writeable: 

225 target += '?mode=rwc&uri=true' 

226 else: 

227 target += '?mode=ro&uri=true' 

228 

229 def creator(): 

230 return sqlite3.connect(target, check_same_thread=False, uri=True) 

231 

232 engine = sqlalchemy.engine.create_engine(uri, poolclass=sqlalchemy.pool.NullPool, 

233 creator=creator) 

234 

235 sqlalchemy.event.listen(engine, "connect", _onSqlite3Connect) 

236 sqlalchemy.event.listen(engine, "begin", _onSqlite3Begin) 

237 try: 

238 return engine.connect() 

239 except sqlalchemy.exc.OperationalError as err: 

240 raise RuntimeError(f"Error creating connection with uri='{uri}', filename='{filename}', " 

241 f"target={target}.") from err 

242 

243 @classmethod 

244 def fromConnection(cls, connection: sqlalchemy.engine.Connection, *, origin: int, 

245 namespace: Optional[str] = None, writeable: bool = True) -> Database: 

246 return cls(connection=connection, origin=origin, writeable=writeable, namespace=namespace) 

247 

248 def isWriteable(self) -> bool: 

249 return self._writeable 

250 

251 def __str__(self) -> str: 

252 if self.filename: 

253 return f"SQLite3@{self.filename}" 

254 else: 

255 return "SQLite3@:memory:" 

256 

257 def declareStaticTables(self, *, create: bool) -> ContextManager[StaticTablesContext]: 

258 # If the user asked for an in-memory, writeable database, assume 

259 # create=True. This is only really relevant for tests, and it's 

260 # convenient there. 

261 return super().declareStaticTables(create=(create if self.filename else self.isWriteable())) 

262 

263 def _convertFieldSpec(self, table: str, spec: ddl.FieldSpec, metadata: sqlalchemy.MetaData, 

264 **kwds) -> sqlalchemy.schema.Column: 

265 if spec.autoincrement: 

266 if not spec.primaryKey: 

267 raise RuntimeError(f"Autoincrement field {table}.{spec.name} that is not a " 

268 f"primary key is not supported.") 

269 if spec.dtype != sqlalchemy.Integer: 

270 # SQLite's autoincrement is really limited; it only works if 

271 # the column type is exactly "INTEGER". But it also doesn't 

272 # care about the distinctions between different integer types, 

273 # so it's safe to change it. 

274 spec = copy.copy(spec) 

275 spec.dtype = sqlalchemy.Integer 

276 return super()._convertFieldSpec(table, spec, metadata, **kwds) 

277 

278 def _makeColumnConstraints(self, table: str, spec: ddl.FieldSpec) -> List[sqlalchemy.CheckConstraint]: 

279 # For sqlite we force constraints on all string columns since sqlite 

280 # ignores everything otherwise and this leads to problems with 

281 # other databases. 

282 

283 constraints = [] 

284 if spec.dtype == sqlalchemy.String: 

285 name = self.shrinkDatabaseEntityName("_".join([table, "len", spec.name])) 

286 constraints.append(sqlalchemy.CheckConstraint(f"length({spec.name})<={spec.length}" 

287 # Oracle converts 

288 # empty strings to 

289 # NULL so check 

290 f" AND length({spec.name})>=1", 

291 name=name)) 

292 

293 constraints.extend(super()._makeColumnConstraints(table, spec)) 

294 return constraints 

295 

296 def _convertTableSpec(self, name: str, spec: ddl.TableSpec, metadata: sqlalchemy.MetaData, 

297 **kwds) -> sqlalchemy.schema.Table: 

298 primaryKeyFieldNames = set(field.name for field in spec.fields if field.primaryKey) 

299 autoincrFieldNames = set(field.name for field in spec.fields if field.autoincrement) 

300 if len(autoincrFieldNames) > 1: 

301 raise RuntimeError("At most one autoincrement field per table is allowed.") 

302 if len(primaryKeyFieldNames) > 1 and len(autoincrFieldNames) > 0: 

303 # SQLite's default rowid-based autoincrement doesn't work if the 

304 # field is just one field in a compound primary key. As a 

305 # workaround, we create an extra table with just one column that 

306 # we'll insert into to generate those IDs. That's only safe if 

307 # that single-column table's records are already unique with just 

308 # the autoincrement field, not the rest of the primary key. In 

309 # practice, that means the single-column table's records are those 

310 # for which origin == self.origin. 

311 autoincrFieldName, = autoincrFieldNames 

312 otherPrimaryKeyFieldNames = primaryKeyFieldNames - autoincrFieldNames 

313 if otherPrimaryKeyFieldNames != {"origin"}: 

314 # We need the only other field in the key to be 'origin'. 

315 raise NotImplementedError( 

316 "Compound primary keys with an autoincrement are only supported in SQLite " 

317 "if the only non-autoincrement primary key field is 'origin'." 

318 ) 

319 self._autoincr[name] = _AutoincrementCompoundKeyWorkaround( 

320 table=self._convertTableSpec(f"_autoinc_{name}", _AUTOINCR_TABLE_SPEC, metadata, **kwds), 

321 column=autoincrFieldName 

322 ) 

323 if not spec.recycleIds: 

324 kwds = dict(kwds, sqlite_autoincrement=True) 

325 return super()._convertTableSpec(name, spec, metadata, **kwds) 

326 

327 def insert(self, table: sqlalchemy.schema.Table, *rows: dict, returnIds: bool = False, 

328 ) -> Optional[List[int]]: 

329 autoincr = self._autoincr.get(table.name) 

330 if autoincr is not None: 

331 # This table has a compound primary key that includes an 

332 # autoincrement. That doesn't work natively in SQLite, so we 

333 # insert into a single-column table and use those IDs. 

334 if not rows: 

335 return [] if returnIds else None 

336 if autoincr.column in rows[0]: 

337 # Caller passed the autoincrement key values explicitly in the 

338 # first row. They had better have done the same for all rows, 

339 # or SQLAlchemy would have a problem, even if we didn't. 

340 assert all(autoincr.column in row for row in rows) 

341 # We need to insert only the values that correspond to 

342 # ``origin == self.origin`` into the single-column table, to 

343 # make sure we don't generate conflicting keys there later. 

344 rowsForAutoincrTable = [dict(id=row[autoincr.column]) 

345 for row in rows if row["origin"] == self.origin] 

346 # Insert into the autoincr table and the target table inside 

347 # a transaction. The main-table insertion can take care of 

348 # returnIds for us. 

349 with self.transaction(): 

350 self._connection.execute(autoincr.table.insert(), *rowsForAutoincrTable) 

351 return super().insert(table, *rows, returnIds=returnIds) 

352 else: 

353 # Caller did not pass autoincrement key values on the first 

354 # row. Make sure they didn't ever do that, and also make 

355 # sure the origin that was passed in is always self.origin, 

356 # because we can't safely generate autoincrement values 

357 # otherwise. 

358 assert all(autoincr.column not in row and row["origin"] == self.origin for row in rows) 

359 # Insert into the autoincr table one by one to get the 

360 # primary key values back, then insert into the target table 

361 # in the same transaction. 

362 with self.transaction(): 

363 newRows = [] 

364 ids = [] 

365 for row in rows: 

366 newRow = row.copy() 

367 id = self._connection.execute(autoincr.table.insert()).inserted_primary_key[0] 

368 newRow[autoincr.column] = id 

369 newRows.append(newRow) 

370 ids.append(id) 

371 # Don't ever ask to returnIds here, because we've already 

372 # got them. 

373 super().insert(table, *newRows) 

374 if returnIds: 

375 return ids 

376 else: 

377 return None 

378 else: 

379 return super().insert(table, *rows, returnIds=returnIds) 

380 

381 def replace(self, table: sqlalchemy.schema.Table, *rows: dict): 

382 if not self.isWriteable(): 

383 raise ReadOnlyDatabaseError(f"Attempt to replace into read-only database '{self}'.") 

384 if not rows: 

385 return 

386 if table.name in self._autoincr: 

387 raise NotImplementedError( 

388 "replace does not support compound primary keys with autoincrement fields." 

389 ) 

390 self._connection.execute(_Replace(table), *rows) 

391 

392 filename: Optional[str] 

393 """Name of the file this database is connected to (`str` or `None`). 

394 

395 Set to `None` for in-memory databases. 

396 """