Coverage for python/lsst/daf/butler/registry/interfaces/_versioning.py: 33%

56 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2022-08-26 09:24 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = [ 

25 "VersionTuple", 

26 "VersionedExtension", 

27] 

28 

29import hashlib 

30from abc import ABC, abstractmethod 

31from typing import Iterable, NamedTuple, Optional 

32 

33import sqlalchemy 

34 

35 

36class VersionTuple(NamedTuple): 

37 """Class representing a version number. 

38 

39 Parameters 

40 ---------- 

41 major, minor, patch : `int` 

42 Version number components 

43 """ 

44 

45 major: int 

46 minor: int 

47 patch: int 

48 

49 @classmethod 

50 def fromString(cls, versionStr: str) -> VersionTuple: 

51 """Extract version number from a string. 

52 

53 Parameters 

54 ---------- 

55 versionStr : `str` 

56 Version number in string form "X.Y.Z", all components must be 

57 present. 

58 

59 Returns 

60 ------- 

61 version : `VersionTuple` 

62 Parsed version tuple. 

63 

64 Raises 

65 ------ 

66 ValueError 

67 Raised if string has an invalid format. 

68 """ 

69 try: 

70 version = tuple(int(v) for v in versionStr.split(".")) 

71 except ValueError as exc: 

72 raise ValueError(f"Invalid version string '{versionStr}'") from exc 

73 if len(version) != 3: 

74 raise ValueError(f"Invalid version string '{versionStr}', must consist of three numbers") 

75 return cls(*version) 

76 

77 def __str__(self) -> str: 

78 """Transform version tuple into a canonical string form.""" 

79 return f"{self.major}.{self.minor}.{self.patch}" 

80 

81 

82class VersionedExtension(ABC): 

83 """Interface for extension classes with versions.""" 

84 

85 @classmethod 

86 @abstractmethod 

87 def currentVersion(cls) -> Optional[VersionTuple]: 

88 """Return extension version as defined by current implementation. 

89 

90 This method can return ``None`` if an extension does not require 

91 its version to be saved or checked. 

92 

93 Returns 

94 ------- 

95 version : `VersionTuple` 

96 Current extension version or ``None``. 

97 """ 

98 raise NotImplementedError() 

99 

100 @classmethod 

101 def extensionName(cls) -> str: 

102 """Return full name of the extension. 

103 

104 This name should match the name defined in registry configuration. It 

105 is also stored in registry attributes. Default implementation returns 

106 full class name. 

107 

108 Returns 

109 ------- 

110 name : `str` 

111 Full extension name. 

112 """ 

113 return f"{cls.__module__}.{cls.__name__}" 

114 

115 @abstractmethod 

116 def schemaDigest(self) -> Optional[str]: 

117 """Return digest for schema piece managed by this extension. 

118 

119 Returns 

120 ------- 

121 digest : `str` or `None` 

122 String representation of the digest of the schema, ``None`` should 

123 be returned if schema digest is not to be saved or checked. The 

124 length of the returned string cannot exceed the length of the 

125 "value" column of butler attributes table, currently 65535 

126 characters. 

127 

128 Notes 

129 ----- 

130 There is no exact definition of digest format, any string should work. 

131 The only requirement for string contents is that it has to remain 

132 stable over time if schema does not change but it should produce 

133 different string for any change in the schema. In many cases default 

134 implementation in `_defaultSchemaDigest` can be used as a reasonable 

135 choice. 

136 """ 

137 raise NotImplementedError() 

138 

139 def _defaultSchemaDigest( 

140 self, tables: Iterable[sqlalchemy.schema.Table], dialect: sqlalchemy.engine.Dialect 

141 ) -> str: 

142 """Calculate digest for a schema based on list of tables schemas. 

143 

144 Parameters 

145 ---------- 

146 tables : iterable [`sqlalchemy.schema.Table`] 

147 Set of tables comprising the schema. 

148 dialect : `sqlalchemy.engine.Dialect`, optional 

149 Dialect used to stringify types; needed to support dialect-specific 

150 types. 

151 

152 Returns 

153 ------- 

154 digest : `str` 

155 String representation of the digest of the schema. 

156 

157 Notes 

158 ----- 

159 It is not specified what kind of implementation is used to calculate 

160 digest string. The only requirement for that is that result should be 

161 stable over time as this digest string will be stored in the database. 

162 It should detect (by producing different digests) sensible changes to 

163 the schema, but it also should be stable w.r.t. changes that do 

164 not actually change the schema (e.g. change in the order of columns or 

165 keys.) Current implementation is likely incomplete in that it does not 

166 detect all possible changes (e.g. some constraints may not be included 

167 into total digest). 

168 """ 

169 

170 def tableSchemaRepr(table: sqlalchemy.schema.Table) -> str: 

171 """Make string representation of a single table schema.""" 

172 tableSchemaRepr = [table.name] 

173 schemaReps = [] 

174 for column in table.columns: 

175 columnRep = f"COL,{column.name},{column.type.compile(dialect=dialect)}" 

176 if column.primary_key: 

177 columnRep += ",PK" 

178 if column.nullable: 

179 columnRep += ",NULL" 

180 schemaReps += [columnRep] 

181 for fkConstr in table.foreign_key_constraints: 

182 # for foreign key we include only one side of relations into 

183 # digest, other side could be managed by different extension 

184 fkReps = ["FK", fkConstr.name] + [fk.column.name for fk in fkConstr.elements] 

185 fkRep = ",".join(fkReps) 

186 schemaReps += [fkRep] 

187 # sort everything to keep it stable 

188 schemaReps.sort() 

189 tableSchemaRepr += schemaReps 

190 return ";".join(tableSchemaRepr) 

191 

192 md5 = hashlib.md5() 

193 tableSchemas = sorted(tableSchemaRepr(table) for table in tables) 

194 for tableRepr in tableSchemas: 

195 md5.update(tableRepr.encode()) 

196 digest = md5.hexdigest() 

197 return digest