Coverage for python/lsst/daf/butler/registry/databases/sqlite.py: 23%

163 statements  

« prev     ^ index     » next       coverage.py v7.5.0, created at 2024-04-25 10:24 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27from __future__ import annotations 

28 

29__all__ = ["SqliteDatabase"] 

30 

31import copy 

32import os 

33import sqlite3 

34import urllib.parse 

35from collections.abc import Iterable 

36from contextlib import AbstractContextManager, closing 

37from typing import Any 

38 

39import sqlalchemy 

40import sqlalchemy.dialects.sqlite 

41import sqlalchemy.ext.compiler 

42 

43from ... import ddl 

44from ..._named import NamedValueAbstractSet 

45from ..interfaces import Database, StaticTablesContext 

46 

47 

48def _onSqlite3Connect( 

49 dbapiConnection: sqlite3.Connection, connectionRecord: sqlalchemy.pool._ConnectionRecord 

50) -> None: 

51 assert isinstance(dbapiConnection, sqlite3.Connection) 

52 # Prevent pysqlite from emitting BEGIN and COMMIT statements. 

53 dbapiConnection.isolation_level = None 

54 # Enable foreign keys 

55 with closing(dbapiConnection.cursor()) as cursor: 

56 cursor.execute("PRAGMA foreign_keys=ON;") 

57 cursor.execute("PRAGMA busy_timeout = 300000;") # in ms, so 5min (way longer than should be needed) 

58 

59 

60class SqliteDatabase(Database): 

61 """An implementation of the `Database` interface for SQLite3. 

62 

63 Parameters 

64 ---------- 

65 engine : `sqlalchemy.engine.Engine` 

66 Engine to use for this connection. 

67 origin : `int` 

68 An integer ID that should be used as the default for any datasets, 

69 quanta, or other entities that use a (autoincrement, origin) compound 

70 primary key. 

71 namespace : `str`, optional 

72 The namespace (schema) this database is associated with. If `None`, 

73 the default schema for the connection is used (which may be `None`). 

74 writeable : `bool`, optional 

75 If `True`, allow write operations on the database, including 

76 ``CREATE TABLE``. 

77 

78 Notes 

79 ----- 

80 The case where ``namespace is not None`` is not yet tested, and may be 

81 broken; we need an API for attaching to different databases in order to 

82 write those tests, but haven't yet worked out what is common/different 

83 across databases well enough to define it. 

84 """ 

85 

86 def __init__( 

87 self, 

88 *, 

89 engine: sqlalchemy.engine.Engine, 

90 origin: int, 

91 namespace: str | None = None, 

92 writeable: bool = True, 

93 ): 

94 filename = _find_database_filename(engine, namespace) 

95 self._init( 

96 engine=engine, 

97 origin=origin, 

98 namespace=namespace, 

99 writeable=writeable, 

100 filename=filename, 

101 metadata=None, 

102 ) 

103 

104 def _init( 

105 self, 

106 *, 

107 engine: sqlalchemy.engine.Engine, 

108 origin: int, 

109 namespace: str | None = None, 

110 writeable: bool = True, 

111 filename: str | None, 

112 metadata: sqlalchemy.schema.MetaData | None, 

113 ) -> None: 

114 # Initialization logic shared between ``__init__`` and ``clone``. 

115 super().__init__(origin=origin, engine=engine, namespace=namespace, metadata=metadata) 

116 self._writeable = writeable 

117 self.filename = filename 

118 

119 def clone(self) -> SqliteDatabase: 

120 clone = self.__new__(type(self)) 

121 clone._init( 

122 engine=self._engine, 

123 origin=self.origin, 

124 namespace=self.namespace, 

125 writeable=self._writeable, 

126 filename=self.filename, 

127 metadata=self._metadata, 

128 ) 

129 return clone 

130 

131 @classmethod 

132 def makeDefaultUri(cls, root: str) -> str | None: 

133 return "sqlite:///" + os.path.join(root, "gen3.sqlite3") 

134 

135 @classmethod 

136 def makeEngine( 

137 cls, 

138 uri: str | sqlalchemy.engine.URL | None = None, 

139 *, 

140 filename: str | None = None, 

141 writeable: bool = True, 

142 ) -> sqlalchemy.engine.Engine: 

143 """Create a `sqlalchemy.engine.Engine` from a SQLAlchemy URI or 

144 filename. 

145 

146 Parameters 

147 ---------- 

148 uri : `str` or `sqlalchemy.engine.URL`, optional 

149 A SQLAlchemy URI connection string. 

150 filename : `str` 

151 Name of the SQLite database file, or `None` to use an in-memory 

152 database. Ignored if ``uri is not None``. 

153 writeable : `bool`, optional 

154 If `True`, allow write operations on the database, including 

155 ``CREATE TABLE``. 

156 

157 Returns 

158 ------- 

159 engine : `sqlalchemy.engine.Engine` 

160 A database engine. 

161 """ 

162 # In order to be able to tell SQLite that we want a read-only or 

163 # read-write connection, we need to make the SQLite DBAPI connection 

164 # with a "URI"-based connection string. SQLAlchemy claims it can do 

165 # this 

166 # (https://docs.sqlalchemy.org/en/13/dialects/sqlite.html#uri-connections), 

167 # but it doesn't seem to work as advertised. To work around this, we 

168 # use the 'creator' argument to sqlalchemy.engine.create_engine, which 

169 # lets us pass a callable that creates the DBAPI connection. 

170 if uri is None: 

171 if filename is None: 

172 target = ":memory:" 

173 uri = "sqlite://" 

174 else: 

175 target = f"file:{filename}" 

176 uri = f"sqlite:///{filename}" 

177 else: 

178 if isinstance(uri, sqlalchemy.engine.URL): 

179 # We have to parse strings anyway, so convert it to string. 

180 uri = uri.render_as_string(hide_password=False) 

181 parsed = urllib.parse.urlparse(uri) 

182 queries = parsed.query.split("&") 

183 if "uri=true" in queries: 

184 # This is a SQLAlchemy URI that is already trying to make a 

185 # SQLite connection via a SQLite URI, and hence there may 

186 # be URI components for both SQLite and SQLAlchemy. We 

187 # don't need to support that, and it'd be a 

188 # reimplementation of all of the (broken) logic in 

189 # SQLAlchemy for doing this, so we just don't. 

190 raise NotImplementedError("SQLite connection strings with 'uri=true' are not supported.") 

191 # This is just a SQLAlchemy URI with a non-URI SQLite 

192 # connection string inside it. Pull that out so we can use it 

193 # in the creator call. 

194 if parsed.path.startswith("/"): 

195 filename = parsed.path[1:] 

196 target = f"file:{filename}" 

197 else: 

198 filename = None 

199 target = ":memory:" 

200 if filename is None: 

201 if not writeable: 

202 raise NotImplementedError("Read-only :memory: databases are not supported.") 

203 else: 

204 if writeable: 

205 target += "?mode=rwc&uri=true" 

206 else: 

207 target += "?mode=ro&uri=true" 

208 

209 def creator() -> sqlite3.Connection: 

210 return sqlite3.connect(target, check_same_thread=False, uri=True) 

211 

212 engine = sqlalchemy.engine.create_engine(uri, creator=creator) 

213 

214 sqlalchemy.event.listen(engine, "connect", _onSqlite3Connect) 

215 

216 def _onSqlite3Begin(connection: sqlalchemy.engine.Connection) -> sqlalchemy.engine.Connection: 

217 assert connection.dialect.name == "sqlite" 

218 # Replace pysqlite's buggy transaction handling that never BEGINs 

219 # with our own that does, and tell SQLite to try to acquire a lock 

220 # as soon as we start a transaction that might involve writes (this 

221 # should lead to more blocking and fewer deadlocks). 

222 if writeable: 

223 connection.execute(sqlalchemy.text("BEGIN IMMEDIATE")) 

224 else: 

225 connection.execute(sqlalchemy.text("BEGIN")) 

226 return connection 

227 

228 sqlalchemy.event.listen(engine, "begin", _onSqlite3Begin) 

229 

230 return engine 

231 

232 @classmethod 

233 def fromEngine( 

234 cls, 

235 engine: sqlalchemy.engine.Engine, 

236 *, 

237 origin: int, 

238 namespace: str | None = None, 

239 writeable: bool = True, 

240 ) -> Database: 

241 return cls(engine=engine, origin=origin, writeable=writeable, namespace=namespace) 

242 

243 def isWriteable(self) -> bool: 

244 return self._writeable 

245 

246 def __str__(self) -> str: 

247 if self.filename: 

248 return f"SQLite3@{self.filename}" 

249 else: 

250 return "SQLite3@:memory:" 

251 

252 def _lockTables( 

253 self, connection: sqlalchemy.engine.Connection, tables: Iterable[sqlalchemy.schema.Table] = () 

254 ) -> None: 

255 # Docstring inherited. 

256 # Our SQLite database always acquires full-database locks at the 

257 # beginning of a transaction, so there's no need to acquire table-level 

258 # locks - which is good, because SQLite doesn't have table-level 

259 # locking. 

260 pass 

261 

262 # MyPy claims that the return type here isn't covariant with the return 

263 # type of the base class method, which is formally correct but irrelevant 

264 # - the base class return type is _GeneratorContextManager, but only 

265 # because it's generated by the contextmanager decorator. 

266 def declareStaticTables( # type: ignore 

267 self, *, create: bool 

268 ) -> AbstractContextManager[StaticTablesContext]: 

269 # If the user asked for an in-memory, writeable database, then we may 

270 # need to re-create schema even if create=False because schema can be 

271 # lost on re-connect. This is only really relevant for tests, and it's 

272 # convenient there. 

273 if self.filename is None and self.isWriteable(): 

274 inspector = sqlalchemy.inspect(self._engine) 

275 tables = inspector.get_table_names(schema=self.namespace) 

276 if not tables: 

277 create = True 

278 return super().declareStaticTables(create=create) 

279 

280 def _convertFieldSpec( 

281 self, table: str, spec: ddl.FieldSpec, metadata: sqlalchemy.MetaData, **kwargs: Any 

282 ) -> sqlalchemy.schema.Column: 

283 if spec.autoincrement: 

284 if not spec.primaryKey: 

285 raise RuntimeError( 

286 f"Autoincrement field {table}.{spec.name} that is not a primary key is not supported." 

287 ) 

288 if spec.dtype != sqlalchemy.Integer: 

289 # SQLite's autoincrement is really limited; it only works if 

290 # the column type is exactly "INTEGER". But it also doesn't 

291 # care about the distinctions between different integer types, 

292 # so it's safe to change it. 

293 spec = copy.copy(spec) 

294 spec.dtype = sqlalchemy.Integer 

295 return super()._convertFieldSpec(table, spec, metadata, **kwargs) 

296 

297 def _makeColumnConstraints(self, table: str, spec: ddl.FieldSpec) -> list[sqlalchemy.CheckConstraint]: 

298 # For sqlite we force constraints on all string columns since sqlite 

299 # ignores everything otherwise and this leads to problems with 

300 # other databases. 

301 

302 constraints = [] 

303 if spec.isStringType(): 

304 name = self.shrinkDatabaseEntityName("_".join([table, "len", spec.name])) 

305 constraints.append( 

306 sqlalchemy.CheckConstraint( 

307 f'length("{spec.name}")<={spec.length}' 

308 # Oracle converts 

309 # empty strings to 

310 # NULL so check 

311 f' AND length("{spec.name}")>=1', 

312 name=name, 

313 ) 

314 ) 

315 

316 constraints.extend(super()._makeColumnConstraints(table, spec)) 

317 return constraints 

318 

319 def _convertTableSpec( 

320 self, name: str, spec: ddl.TableSpec, metadata: sqlalchemy.MetaData, **kwargs: Any 

321 ) -> sqlalchemy.schema.Table: 

322 primaryKeyFieldNames = {field.name for field in spec.fields if field.primaryKey} 

323 autoincrFieldNames = {field.name for field in spec.fields if field.autoincrement} 

324 if len(autoincrFieldNames) > 1: 

325 raise RuntimeError("At most one autoincrement field per table is allowed.") 

326 if len(primaryKeyFieldNames) > 1 and len(autoincrFieldNames) > 0: 

327 # SQLite's default rowid-based autoincrement doesn't work if the 

328 # field is just one field in a compound primary key. As a 

329 # workaround, we create an extra table with just one column that 

330 # we'll insert into to generate those IDs. That's only safe if 

331 # that single-column table's records are already unique with just 

332 # the autoincrement field, not the rest of the primary key. In 

333 # practice, that means the single-column table's records are those 

334 # for which origin == self.origin. 

335 (autoincrFieldName,) = autoincrFieldNames 

336 otherPrimaryKeyFieldNames = primaryKeyFieldNames - autoincrFieldNames 

337 if otherPrimaryKeyFieldNames != {"origin"}: 

338 # We need the only other field in the key to be 'origin'. 

339 raise NotImplementedError( 

340 "Compound primary keys with an autoincrement are only supported in SQLite " 

341 "if the only non-autoincrement primary key field is 'origin'." 

342 ) 

343 if not spec.recycleIds: 

344 kwargs = dict(kwargs, sqlite_autoincrement=True) 

345 return super()._convertTableSpec(name, spec, metadata, **kwargs) 

346 

347 def replace(self, table: sqlalchemy.schema.Table, *rows: dict) -> None: 

348 self.assertTableWriteable(table, f"Cannot replace into read-only table {table}.") 

349 if not rows: 

350 return 

351 query = sqlalchemy.dialects.sqlite.insert(table) 

352 excluded = query.excluded 

353 data = { 

354 column.name: getattr(excluded, column.name) 

355 for column in table.columns 

356 if column.name not in table.primary_key 

357 } 

358 query = query.on_conflict_do_update(index_elements=table.primary_key, set_=data) 

359 with self._transaction() as (_, connection): 

360 connection.execute(query, rows) 

361 

362 def ensure(self, table: sqlalchemy.schema.Table, *rows: dict, primary_key_only: bool = False) -> int: 

363 self.assertTableWriteable(table, f"Cannot ensure into read-only table {table}.") 

364 if not rows: 

365 return 0 

366 query = sqlalchemy.dialects.sqlite.insert(table) 

367 if primary_key_only: 

368 query = query.on_conflict_do_nothing(index_elements=table.primary_key) 

369 else: 

370 query = query.on_conflict_do_nothing() 

371 with self._transaction() as (_, connection): 

372 return connection.execute(query, rows).rowcount 

373 

374 def constant_rows( 

375 self, 

376 fields: NamedValueAbstractSet[ddl.FieldSpec], 

377 *rows: dict, 

378 name: str | None = None, 

379 ) -> sqlalchemy.sql.FromClause: 

380 # Docstring inherited. 

381 # While SQLite supports VALUES, it doesn't support assigning a name 

382 # to that construct or the names of its columns, and hence there's no 

383 # way to actually join it into a SELECT query. It seems the only 

384 # alternative is something like: 

385 # 

386 # SELECT ? AS a, ? AS b 

387 # UNION ALL 

388 # SELECT ? AS a, ? AS b 

389 # 

390 selects = [ 

391 sqlalchemy.sql.select( 

392 *[sqlalchemy.sql.literal(row[field.name], field.dtype).label(field.name) for field in fields] 

393 ) 

394 for row in rows 

395 ] 

396 return sqlalchemy.sql.union_all(*selects).alias(name) 

397 

398 @property 

399 def has_distinct_on(self) -> bool: 

400 # Docstring inherited. 

401 return False 

402 

403 @property 

404 def has_any_aggregate(self) -> bool: 

405 # Docstring inherited. 

406 return True 

407 

408 def apply_any_aggregate(self, column: sqlalchemy.ColumnElement[Any]) -> sqlalchemy.ColumnElement[Any]: 

409 # Docstring inherited. 

410 # In SQLite, columns are permitted in the SELECT clause without an 

411 # aggregate function even if they're not in the GROUP BY, with an 

412 # arbitrary value picked if there is more than one. 

413 return column 

414 

415 filename: str | None 

416 """Name of the file this database is connected to (`str` or `None`). 

417 

418 Set to `None` for in-memory databases. 

419 """ 

420 

421 

422def _find_database_filename( 

423 engine: sqlalchemy.engine.Engine, 

424 namespace: str | None = None, 

425) -> str | None: 

426 # Get the filename from a call to 'PRAGMA database_list'. 

427 with engine.connect() as connection, closing(connection.connection.cursor()) as cursor: 

428 dbList = list(cursor.execute("PRAGMA database_list").fetchall()) 

429 if len(dbList) == 0: 

430 raise RuntimeError("No database in connection.") 

431 if namespace is None: 

432 namespace = "main" 

433 # Look for the filename associated with this namespace. 

434 for _, dbname, filename in dbList: # B007 

435 if dbname == namespace: 

436 break 

437 else: 

438 raise RuntimeError(f"No '{namespace}' database in connection.") 

439 if not filename: 

440 return None 

441 else: 

442 return filename