Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = [ 

25 "ButlerVersionsManager", "IncompatibleVersionError", "MissingVersionError" 

26] 

27 

28import hashlib 

29import logging 

30from typing import ( 

31 TYPE_CHECKING, 

32 Iterable, 

33 List, 

34 Mapping, 

35 MutableMapping, 

36 NamedTuple, 

37 Optional, 

38) 

39 

40import sqlalchemy 

41 

42if TYPE_CHECKING: 42 ↛ 43line 42 didn't jump to line 43, because the condition on line 42 was never true

43 from .interfaces import ( 

44 ButlerAttributeManager, 

45 ) 

46 from ..core import Config 

47 

48 

49_LOG = logging.getLogger(__name__) 

50 

51 

52class MissingVersionError(RuntimeError): 

53 """Exception raised when existing database is missing attributes with 

54 version numbers. 

55 """ 

56 pass 

57 

58 

59class IncompatibleVersionError(RuntimeError): 

60 """Exception raised when configured version number is not compatible with 

61 database version. 

62 """ 

63 pass 

64 

65 

66class VersionTuple(NamedTuple): 

67 """Class representing a version number. 

68 

69 Parameters 

70 ---------- 

71 major, minor, patch : `int` 

72 Version number componenets 

73 """ 

74 major: int 

75 minor: int 

76 patch: int 

77 

78 @classmethod 

79 def fromString(cls, versionStr: str) -> VersionTuple: 

80 """Extract version number from a string. 

81 

82 Parameters 

83 ---------- 

84 versionStr : `str` 

85 Version number in string form "X.Y.Z", all componenets must be 

86 present. 

87 

88 Returns 

89 ------- 

90 version : `VersionTuple` 

91 Parsed version tuple. 

92 

93 Raises 

94 ------ 

95 ValueError 

96 Raised if string has an invalid format. 

97 """ 

98 try: 

99 version = tuple(int(v) for v in versionStr.split(".")) 

100 except ValueError as exc: 

101 raise ValueError(f"Invalid version string '{versionStr}'") from exc 

102 if len(version) != 3: 

103 raise ValueError(f"Invalid version string '{versionStr}', must consist of three numbers") 

104 return cls(*version) 

105 

106 def __str__(self) -> str: 

107 """Transform version tuple into a canonical string form. 

108 """ 

109 return f"{self.major}.{self.minor}.{self.patch}" 

110 

111 

112class VersionInfo: 

113 """Representation of version information as defined by configuration. 

114 

115 Parameters 

116 ---------- 

117 version : `VersionTuple` 

118 Version number in parsed format. 

119 digest : `str`, optional 

120 Optional digest of the corresponding part of the schema definition. 

121 

122 Notes 

123 ----- 

124 Schema digest is supposed to help with detecting unintentional schema 

125 changes in the code without upgrading schema version. Digest is 

126 constructed whom the set of table definitions and is compared to a digest 

127 defined in configuration, if two digests differ it means schema was 

128 changed. Intentional schema updates will need to update both configured 

129 schema version and schema digest. 

130 """ 

131 def __init__(self, version: VersionTuple, digest: Optional[str] = None): 

132 self.version = version 

133 self.digest = digest 

134 

135 

136class ButlerVersionsManager: 

137 """Utility class to manage and verify schema version compatibility. 

138 

139 Parameters 

140 ---------- 

141 versions : `dict` [`str`, `VersionInfo`] 

142 Mapping of the group name to corresponding schema version and digest. 

143 Group represents a piece of overall database schema, group names are 

144 typically defined by configuration. 

145 """ 

146 def __init__(self, versions: Mapping[str, VersionInfo]): 

147 self._versions = versions 

148 self._tablesGroups: MutableMapping[str, List[sqlalchemy.schema.Table]] = {} 

149 

150 @classmethod 

151 def fromConfig(cls, schemaVersionConfig: Optional[Config]) -> ButlerVersionsManager: 

152 """Make `ButlerVersionsManager` instance based on configuration. 

153 

154 Parameters 

155 ---------- 

156 schemaVersionConfig : `Config` or `None` 

157 Configuration object describing schema versions, typically 

158 "schema_versions" sub-object of registry configuration. 

159 

160 Returns 

161 ------- 

162 manager : `ButlerVersionsManager` 

163 New instance of the versions manager. 

164 """ 

165 versions = {} 

166 if schemaVersionConfig: 

167 for key, vdict in schemaVersionConfig.items(): 

168 version = VersionTuple.fromString(vdict["version"]) 

169 digest = vdict.get("digest") 

170 versions[key] = VersionInfo(version, digest) 

171 return cls(versions) 

172 

173 @staticmethod 

174 def checkCompatibility(old_version: VersionTuple, new_version: VersionTuple, update: bool) -> bool: 

175 """Compare two versions for compatibility. 

176 

177 Parameters 

178 ---------- 

179 old_version : `VersionTuple` 

180 Old schema version, typically one stored in a database. 

181 new_version : `VersionTuple` 

182 New schema version, typically version defined in configuration. 

183 update : `bool` 

184 If True then read-write access is expected. 

185 """ 

186 if old_version.major != new_version.major: 

187 # different major versions are not compatible at all 

188 return False 

189 if old_version.minor != new_version.minor: 

190 # different minor versions are backward compatible for read 

191 # access only 

192 return new_version.minor > old_version.minor and not update 

193 # patch difference does not matter 

194 return True 

195 

196 @staticmethod 

197 def schemaDigest(tables: Iterable[sqlalchemy.schema.Table]) -> str: 

198 """Calculate digest for a schema. 

199 

200 Parameters 

201 ---------- 

202 tables : iterable [`sqlalchemy.schema.Table`] 

203 Set of tables comprising the schema. 

204 

205 Returns 

206 ------- 

207 digest : `str` 

208 String representation of the digest of the schema. 

209 

210 Notes 

211 ----- 

212 It is not specified what kind of implementation is used to calculate 

213 digest string. The only requirement for that is that result should be 

214 stable over time as this digest string will be stored in the 

215 configuration and probably in the database too. It should detect (by 

216 producing different digests) sensible changes to the schema, but it 

217 also should be stable w.r.t. changes that do not actually change the 

218 schema (e.g. change in the order of columns or keys.) Current 

219 implementation is likely incomplete in that it does not detect all 

220 possible changes (e.g. some constraints may not be included into 

221 total digest). Digest checking is optional and can be disabled in 

222 configuration if configured digest is an empty string, we should delay 

223 activating that check until we have a stable implementation for this 

224 method. 

225 """ 

226 

227 def tableSchemaRepr(table: sqlalchemy.schema.Table) -> str: 

228 """Make string representation of a single table schema. 

229 """ 

230 tableSchemaRepr = [table.name] 

231 schemaReps = [] 

232 for column in table.columns: 

233 columnRep = f"COL,{column.name},{column.type}" 

234 if column.primary_key: 

235 columnRep += ",PK" 

236 if column.nullable: 

237 columnRep += ",NULL" 

238 schemaReps += [columnRep] 

239 for fkConstr in table.foreign_key_constraints: 

240 fkRep = f"FK,{fkConstr.name}" 

241 for fk in fkConstr.elements: 

242 fkRep += f"{fk.column.name}->{fk.target_fullname}" 

243 schemaReps += [fkRep] 

244 schemaReps.sort() 

245 tableSchemaRepr += schemaReps 

246 return ";".join(tableSchemaRepr) 

247 

248 md5 = hashlib.md5() 

249 tableSchemas = sorted(tableSchemaRepr(table) for table in tables) 

250 for tableRepr in tableSchemas: 

251 md5.update(tableRepr.encode()) 

252 digest = md5.hexdigest() 

253 return digest 

254 

255 def addTable(self, group: str, table: sqlalchemy.schema.Table) -> None: 

256 """Add a table to specified schema group. 

257 

258 Table schema added to a group will be used when calculating digest 

259 for that group. 

260 

261 Parameters 

262 ---------- 

263 group : `str` 

264 Schema group name, e.g. "core", or " dimensions". 

265 table : `sqlalchemy.schema.Table` 

266 Table schema. 

267 """ 

268 self._tablesGroups.setdefault(group, []).append(table) 

269 

270 def storeVersions(self, attributes: ButlerAttributeManager) -> None: 

271 """Store configured schema versions in registry arttributes. 

272 

273 Parameters 

274 ---------- 

275 attributes : `ButlerAttributeManager` 

276 Attribute manager instance. 

277 """ 

278 for key, vInfo in self._versions.items(): 

279 # attribute name reflects configuration path in "registry" config 

280 attributes.set(f"schema_versions.{key}.version", str(vInfo.version)) 

281 # TODO: we could also store digest in the database but I'm not 

282 # sure that digest calculation is stable enough at this point. 

283 

284 def checkVersionDigests(self) -> None: 

285 """Compare current schema digest to a configured digest. 

286 

287 It calculates digest to all schema groups using tables added to each 

288 group with `addTable` method. If digest is different from a configured 

289 digest for the same group it generates logging warning message. 

290 """ 

291 for group, tables in self._tablesGroups.items(): 

292 if group in self._versions: 

293 configDigest = self._versions[group].digest 

294 if configDigest: 

295 digest = self.schemaDigest(tables) 

296 if digest != configDigest: 

297 _LOG.warning("Digest mismatch for %s schema. Configured digest: '%s', " 

298 "actual digest '%s'.", group, configDigest, digest) 

299 

300 def checkStoredVersions(self, attributes: ButlerAttributeManager, writeable: bool) -> None: 

301 """Compare configured versions with the versions stored in database. 

302 

303 Parameters 

304 ---------- 

305 attributes : `ButlerAttributeManager` 

306 Attribute manager instance. 

307 writeable : `bool` 

308 If ``True`` then read-write access needs to be checked. 

309 

310 Raises 

311 ------ 

312 IncompatibleVersionError 

313 Raised if versions are not compatible. 

314 MissingVersionError 

315 Raised if database has no stored version for one or more groups. 

316 """ 

317 for key, vInfo in self._versions.items(): 

318 storedVersionStr = attributes.get(f"schema_versions.{key}.version") 

319 if storedVersionStr is None: 

320 raise MissingVersionError(f"Failed to read version number for group {key}") 

321 storedVersion = VersionTuple.fromString(storedVersionStr) 

322 if not self.checkCompatibility(storedVersion, vInfo.version, writeable): 

323 raise IncompatibleVersionError( 

324 f"Configured version {vInfo.version} is not compatible with stored version " 

325 f"{storedVersion} for group {key}" 

326 )