Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

269

270

271

272

273

274

275

276

277

278

279

280

281

282

283

284

285

286

287

288

289

290

291

292

293

294

295

296

297

298

299

300

301

302

303

304

305

306

307

308

309

310

311

312

313

314

315

316

317

318

319

320

321

322

323

324

325

326

327

328

329

330

331

332

333

334

335

336

337

338

339

340

341

342

343

344

345

346

347

348

349

350

351

352

353

354

355

356

357

358

359

360

361

362

363

364

365

366

367

368

369

370

371

372

373

374

# This file is part of daf_butler. 

# 

# Developed for the LSST Data Management System. 

# This product includes software developed by the LSST Project 

# (http://www.lsst.org). 

# See the COPYRIGHT file at the top-level directory of this distribution 

# for details of code ownership. 

# 

# This program is free software: you can redistribute it and/or modify 

# it under the terms of the GNU General Public License as published by 

# the Free Software Foundation, either version 3 of the License, or 

# (at your option) any later version. 

# 

# This program is distributed in the hope that it will be useful, 

# but WITHOUT ANY WARRANTY; without even the implied warranty of 

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

# GNU General Public License for more details. 

# 

# You should have received a copy of the GNU General Public License 

# along with this program. If not, see <http://www.gnu.org/licenses/>. 

from __future__ import annotations 

 

__all__ = ["SqliteDatabase"] 

 

from contextlib import closing 

import copy 

from typing import ContextManager, List, Optional 

from dataclasses import dataclass 

import os 

import urllib.parse 

 

import sqlite3 

import sqlalchemy 

import sqlalchemy.ext.compiler 

 

from ..interfaces import Database, ReadOnlyDatabaseError, StaticTablesContext 

from ...core import ddl 

 

 

def _onSqlite3Connect(dbapiConnection, connectionRecord): 

assert isinstance(dbapiConnection, sqlite3.Connection) 

# Prevent pysqlite from emitting BEGIN and COMMIT statements. 

dbapiConnection.isolation_level = None 

# Enable foreign keys 

with closing(dbapiConnection.cursor()) as cursor: 

cursor.execute("PRAGMA foreign_keys=ON;") 

cursor.execute("PRAGMA busy_timeout = 300000;") # in ms, so 5min (way longer than should be needed) 

 

 

def _onSqlite3Begin(connection): 

assert connection.dialect.name == "sqlite" 

# Replace pysqlite's buggy transaction handling that never BEGINs with our 

# own that does, and tell SQLite to try to acquire a lock as soon as we 

# start a transaction (this should lead to more blocking and fewer 

# deadlocks). 

connection.execute("BEGIN IMMEDIATE") 

return connection 

 

 

class _Replace(sqlalchemy.sql.Insert): 

"""A SQLAlchemy query that compiles to INSERT ... ON CONFLICT REPLACE 

on the primary key constraint for the table. 

""" 

pass 

 

 

@sqlalchemy.ext.compiler.compiles(_Replace, "sqlite") 

def _replace(insert, compiler, **kw): 

"""Generate an INSERT ... ON CONFLICT REPLACE query. 

""" 

# SQLite and PostgreSQL use similar syntax for their ON CONFLICT extension, 

# but SQLAlchemy only knows about PostgreSQL's, so we have to compile some 

# custom text SQL ourselves. 

result = compiler.visit_insert(insert, **kw) 

preparer = compiler.preparer 

pk_columns = ", ".join([preparer.format_column(col) for col in insert.table.primary_key]) 

result += f" ON CONFLICT ({pk_columns})" 

columns = [preparer.format_column(col) for col in insert.table.columns 

if col.name not in insert.table.primary_key] 

updates = ", ".join([f"{col} = excluded.{col}" for col in columns]) 

result += f" DO UPDATE SET {updates}" 

return result 

 

 

_AUTOINCR_TABLE_SPEC = ddl.TableSpec( 

fields=[ddl.FieldSpec(name="id", dtype=sqlalchemy.Integer, primaryKey=True)] 

) 

 

 

@dataclass 

class _AutoincrementCompoundKeyWorkaround: 

"""A workaround for SQLite's lack of support for compound primary keys that 

include an autoincrement field. 

""" 

 

table: sqlalchemy.schema.Table 

"""A single-column internal table that can be inserted into to yield 

autoincrement values (`sqlalchemy.schema.Table`). 

""" 

 

column: str 

"""The name of the column in the original table that needs to be populated 

with values from the internal table (`str`). 

""" 

 

 

class SqliteDatabase(Database): 

"""An implementation of the `Database` interface for SQLite3. 

 

Parameters 

---------- 

connection : `sqlalchemy.engine.Connection` 

An existing connection created by a previous call to `connect`. 

origin : `int` 

An integer ID that should be used as the default for any datasets, 

quanta, or other entities that use a (autoincrement, origin) compound 

primary key. 

namespace : `str`, optional 

The namespace (schema) this database is associated with. If `None`, 

the default schema for the connection is used (which may be `None`). 

writeable : `bool`, optional 

If `True`, allow write operations on the database, including 

``CREATE TABLE``. 

 

Notes 

----- 

The case where ``namespace is not None`` is not yet tested, and may be 

broken; we need an API for attaching to different databases in order to 

write those tests, but haven't yet worked out what is common/different 

across databases well enough to define it. 

""" 

 

def __init__(self, *, connection: sqlalchemy.engine.Connection, origin: int, 

namespace: Optional[str] = None, writeable: bool = True): 

super().__init__(origin=origin, connection=connection, namespace=namespace) 

# Get the filename from a call to 'PRAGMA database_list'. 

with closing(connection.connection.cursor()) as cursor: 

dbList = list(cursor.execute("PRAGMA database_list").fetchall()) 

if len(dbList) == 0: 

raise RuntimeError("No database in connection.") 

if namespace is None: 

namespace = "main" 

for _, dbname, filename in dbList: 

if dbname == namespace: 

break 

else: 

raise RuntimeError(f"No '{namespace}' database in connection.") 

if not filename: 

self.filename = None 

else: 

self.filename = filename 

self._writeable = writeable 

self._autoincr = {} 

 

@classmethod 

def makeDefaultUri(cls, root: str) -> Optional[str]: 

return "sqlite:///" + os.path.join(root, "gen3.sqlite3") 

 

@classmethod 

def connect(cls, uri: Optional[str] = None, *, filename: Optional[str] = None, 

writeable: bool = True) -> sqlalchemy.engine.Connection: 

"""Create a `sqlalchemy.engine.Connection` from a SQLAlchemy URI or 

filename. 

 

Parameters 

---------- 

uri : `str` 

A SQLAlchemy URI connection string. 

filename : `str` 

Name of the SQLite database file, or `None` to use an in-memory 

database. Ignored if ``uri is not None``. 

origin : `int` 

An integer ID that should be used as the default for any datasets, 

quanta, or other entities that use a (autoincrement, origin) 

compound primary key. 

writeable : `bool`, optional 

If `True`, allow write operations on the database, including 

``CREATE TABLE``. 

 

Returns 

------- 

cs : `sqlalchemy.engine.Connection` 

A database connection and transaction state. 

""" 

# In order to be able to tell SQLite that we want a read-only or 

# read-write connection, we need to make the SQLite DBAPI connection 

# with a "URI"-based connection string. SQLAlchemy claims it can do 

# this 

# (https://docs.sqlalchemy.org/en/13/dialects/sqlite.html#uri-connections), 

# but it doesn't seem to work as advertised. To work around this, we 

# use the 'creator' argument to sqlalchemy.engine.create_engine, which 

# lets us pass a callable that creates the DBAPI connection. 

if uri is None: 

if filename is None: 

target = ":memory:" 

uri = "sqlite://" 

else: 

target = f"file:{filename}" 

uri = f"sqlite:///{filename}" 

else: 

parsed = urllib.parse.urlparse(uri) 

queries = parsed.query.split("&") 

if "uri=true" in queries: 

# This is a SQLAlchemy URI that is already trying to make a 

# SQLite connection via a SQLite URI, and hence there may 

# be URI components for both SQLite and SQLAlchemy. We 

# don't need to support that, and it'd be a 

# reimplementation of all of the (broken) logic in 

# SQLAlchemy for doing this, so we just don't. 

raise NotImplementedError("SQLite connection strings with 'uri=true' are not supported.") 

# This is just a SQLAlchemy URI with a non-URI SQLite 

# connection string inside it. Pull that out so we can use it 

# in the creator call. 

if parsed.path.startswith("/"): 

filename = parsed.path[1:] 

target = f"file:{filename}" 

else: 

filename = None 

target = ":memory:" 

if filename is None: 

if not writeable: 

raise NotImplementedError("Read-only :memory: databases are not supported.") 

else: 

if writeable: 

target += '?mode=rwc&uri=true' 

else: 

target += '?mode=ro&uri=true' 

 

def creator(): 

return sqlite3.connect(target, check_same_thread=False, uri=True) 

 

engine = sqlalchemy.engine.create_engine(uri, poolclass=sqlalchemy.pool.NullPool, 

creator=creator) 

 

sqlalchemy.event.listen(engine, "connect", _onSqlite3Connect) 

sqlalchemy.event.listen(engine, "begin", _onSqlite3Begin) 

try: 

return engine.connect() 

except sqlalchemy.exc.OperationalError as err: 

raise RuntimeError(f"Error creating connection with uri='{uri}', filename='{filename}', " 

f"target={target}.") from err 

 

@classmethod 

def fromConnection(cls, connection: sqlalchemy.engine.Connection, *, origin: int, 

namespace: Optional[str] = None, writeable: bool = True) -> Database: 

return cls(connection=connection, origin=origin, writeable=writeable, namespace=namespace) 

 

def isWriteable(self) -> bool: 

return self._writeable 

 

def __str__(self) -> str: 

if self.filename: 

return f"SQLite3@{self.filename}" 

else: 

return "SQLite3@:memory:" 

 

def declareStaticTables(self, *, create: bool) -> ContextManager[StaticTablesContext]: 

# If the user asked for an in-memory, writeable database, assume 

# create=True. This is only really relevant for tests, and it's 

# convenient there. 

return super().declareStaticTables(create=(create if self.filename else self.isWriteable())) 

 

def _convertFieldSpec(self, table: str, spec: ddl.FieldSpec, metadata: sqlalchemy.MetaData, 

**kwds) -> sqlalchemy.schema.Column: 

if spec.autoincrement: 

if not spec.primaryKey: 

raise RuntimeError(f"Autoincrement field {table}.{spec.name} that is not a " 

f"primary key is not supported.") 

if spec.dtype != sqlalchemy.Integer: 

# SQLite's autoincrement is really limited; it only works if 

# the column type is exactly "INTEGER". But it also doesn't 

# care about the distinctions between different integer types, 

# so it's safe to change it. 

spec = copy.copy(spec) 

spec.dtype = sqlalchemy.Integer 

return super()._convertFieldSpec(table, spec, metadata, **kwds) 

 

def _convertTableSpec(self, name: str, spec: ddl.TableSpec, metadata: sqlalchemy.MetaData, 

**kwds) -> sqlalchemy.schema.Table: 

primaryKeyFieldNames = set(field.name for field in spec.fields if field.primaryKey) 

autoincrFieldNames = set(field.name for field in spec.fields if field.autoincrement) 

if len(autoincrFieldNames) > 1: 

raise RuntimeError("At most one autoincrement field per table is allowed.") 

if len(primaryKeyFieldNames) > 1 and len(autoincrFieldNames) > 0: 

# SQLite's default rowid-based autoincrement doesn't work if the 

# field is just one field in a compound primary key. As a 

# workaround, we create an extra table with just one column that 

# we'll insert into to generate those IDs. That's only safe if 

# that single-column table's records are already unique with just 

# the autoincrement field, not the rest of the primary key. In 

# practice, that means the single-column table's records are those 

# for which origin == self.origin. 

autoincrFieldName, = autoincrFieldNames 

otherPrimaryKeyFieldNames = primaryKeyFieldNames - autoincrFieldNames 

if otherPrimaryKeyFieldNames != {"origin"}: 

# We need the only other field in the key to be 'origin'. 

raise NotImplementedError( 

"Compound primary keys with an autoincrement are only supported in SQLite " 

"if the only non-autoincrement primary key field is 'origin'." 

) 

self._autoincr[name] = _AutoincrementCompoundKeyWorkaround( 

table=self._convertTableSpec(f"_autoinc_{name}", _AUTOINCR_TABLE_SPEC, metadata, **kwds), 

column=autoincrFieldName 

) 

return super()._convertTableSpec(name, spec, metadata, **kwds) 

 

def insert(self, table: sqlalchemy.schema.Table, *rows: dict, returnIds: bool = False, 

) -> Optional[List[int]]: 

autoincr = self._autoincr.get(table.name) 

if autoincr is not None: 

# This table has a compound primary key that includes an 

# autoincrement. That doesn't work natively in SQLite, so we 

# insert into a single-column table and use those IDs. 

if not rows: 

return [] if returnIds else None 

if autoincr.column in rows[0]: 

# Caller passed the autoincrement key values explicitly in the 

# first row. They had better have done the same for all rows, 

# or SQLAlchemy would have a problem, even if we didn't. 

assert all(autoincr.column in row for row in rows) 

# We need to insert only the values that correspond to 

# ``origin == self.origin`` into the single-column table, to 

# make sure we don't generate conflicting keys there later. 

rowsForAutoincrTable = [dict(id=row[autoincr.column]) 

for row in rows if row["origin"] == self.origin] 

# Insert into the autoincr table and the target table inside 

# a transaction. The main-table insertion can take care of 

# returnIds for us. 

with self.transaction(): 

self._connection.execute(autoincr.table.insert(), *rowsForAutoincrTable) 

return super().insert(table, *rows, returnIds=returnIds) 

else: 

# Caller did not pass autoincrement key values on the first 

# row. Make sure they didn't ever do that, and also make 

# sure the origin that was passed in is always self.origin, 

# because we can't safely generate autoincrement values 

# otherwise. 

assert all(autoincr.column not in row and row["origin"] == self.origin for row in rows) 

# Insert into the autoincr table one by one to get the 

# primary key values back, then insert into the target table 

# in the same transaction. 

with self.transaction(): 

newRows = [] 

ids = [] 

for row in rows: 

newRow = row.copy() 

id = self._connection.execute(autoincr.table.insert()).inserted_primary_key[0] 

newRow[autoincr.column] = id 

newRows.append(newRow) 

ids.append(id) 

# Don't ever ask to returnIds here, because we've already 

# got them. 

super().insert(table, *newRows) 

if returnIds: 

return ids 

else: 

return None 

else: 

return super().insert(table, *rows, returnIds=returnIds) 

 

def replace(self, table: sqlalchemy.schema.Table, *rows: dict): 

if not self.isWriteable(): 

raise ReadOnlyDatabaseError(f"Attempt to replace into read-only database '{self}'.") 

if table.name in self._autoincr: 

raise NotImplementedError( 

"replace does not support compound primary keys with autoincrement fields." 

) 

self._connection.execute(_Replace(table), *rows) 

 

filename: Optional[str] 

"""Name of the file this database is connected to (`str` or `None`). 

 

Set to `None` for in-memory databases. 

"""