Coverage for python/lsst/daf/butler/registry/versions.py : 21%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = [
25 "ButlerVersionsManager", "IncompatibleVersionError", "MissingVersionError"
26]
28import hashlib
29import logging
30from typing import (
31 TYPE_CHECKING,
32 Iterable,
33 List,
34 Mapping,
35 MutableMapping,
36 NamedTuple,
37 Optional,
38)
40import sqlalchemy
42if TYPE_CHECKING: 42 ↛ 43line 42 didn't jump to line 43, because the condition on line 42 was never true
43 from .interfaces import (
44 ButlerAttributeManager,
45 )
46 from ..core import Config
49_LOG = logging.getLogger(__name__)
52class MissingVersionError(RuntimeError):
53 """Exception raised when existing database is missing attributes with
54 version numbers.
55 """
56 pass
59class IncompatibleVersionError(RuntimeError):
60 """Exception raised when configured version number is not compatible with
61 database version.
62 """
63 pass
66class VersionTuple(NamedTuple):
67 """Class representing a version number.
69 Parameters
70 ----------
71 major, minor, patch : `int`
72 Version number componenets
73 """
74 major: int
75 minor: int
76 patch: int
78 @classmethod
79 def fromString(cls, versionStr: str) -> VersionTuple:
80 """Extract version number from a string.
82 Parameters
83 ----------
84 versionStr : `str`
85 Version number in string form "X.Y.Z", all componenets must be
86 present.
88 Returns
89 -------
90 version : `VersionTuple`
91 Parsed version tuple.
93 Raises
94 ------
95 ValueError
96 Raised if string has an invalid format.
97 """
98 try:
99 version = tuple(int(v) for v in versionStr.split("."))
100 except ValueError as exc:
101 raise ValueError(f"Invalid version string '{versionStr}'") from exc
102 if len(version) != 3:
103 raise ValueError(f"Invalid version string '{versionStr}', must consist of three numbers")
104 return cls(*version)
106 def __str__(self) -> str:
107 """Transform version tuple into a canonical string form.
108 """
109 return f"{self.major}.{self.minor}.{self.patch}"
112class VersionInfo:
113 """Representation of version information as defined by configuration.
115 Parameters
116 ----------
117 version : `VersionTuple`
118 Version number in parsed format.
119 digest : `str`, optional
120 Optional digest of the corresponding part of the schema definition.
122 Notes
123 -----
124 Schema digest is supposed to help with detecting unintentional schema
125 changes in the code without upgrading schema version. Digest is
126 constructed whom the set of table definitions and is compared to a digest
127 defined in configuration, if two digests differ it means schema was
128 changed. Intentional schema updates will need to update both configured
129 schema version and schema digest.
130 """
131 def __init__(self, version: VersionTuple, digest: Optional[str] = None):
132 self.version = version
133 self.digest = digest
136class ButlerVersionsManager:
137 """Utility class to manage and verify schema version compatibility.
139 Parameters
140 ----------
141 versions : `dict` [`str`, `VersionInfo`]
142 Mapping of the group name to corresponding schema version and digest.
143 Group represents a piece of overall database schema, group names are
144 typically defined by configuration.
145 """
146 def __init__(self, versions: Mapping[str, VersionInfo]):
147 self._versions = versions
148 self._tablesGroups: MutableMapping[str, List[sqlalchemy.schema.Table]] = {}
150 @classmethod
151 def fromConfig(cls, schemaVersionConfig: Optional[Config]) -> ButlerVersionsManager:
152 """Make `ButlerVersionsManager` instance based on configuration.
154 Parameters
155 ----------
156 schemaVersionConfig : `Config` or `None`
157 Configuration object describing schema versions, typically
158 "schema_versions" sub-object of registry configuration.
160 Returns
161 -------
162 manager : `ButlerVersionsManager`
163 New instance of the versions manager.
164 """
165 versions = {}
166 if schemaVersionConfig:
167 for key, vdict in schemaVersionConfig.items():
168 version = VersionTuple.fromString(vdict["version"])
169 digest = vdict.get("digest")
170 versions[key] = VersionInfo(version, digest)
171 return cls(versions)
173 @staticmethod
174 def checkCompatibility(old_version: VersionTuple, new_version: VersionTuple, update: bool) -> bool:
175 """Compare two versions for compatibility.
177 Parameters
178 ----------
179 old_version : `VersionTuple`
180 Old schema version, typically one stored in a database.
181 new_version : `VersionTuple`
182 New schema version, typically version defined in configuration.
183 update : `bool`
184 If True then read-write access is expected.
185 """
186 if old_version.major != new_version.major:
187 # different major versions are not compatible at all
188 return False
189 if old_version.minor != new_version.minor:
190 # different minor versions are backward compatible for read
191 # access only
192 return new_version.minor > old_version.minor and not update
193 # patch difference does not matter
194 return True
196 @staticmethod
197 def schemaDigest(tables: Iterable[sqlalchemy.schema.Table]) -> str:
198 """Calculate digest for a schema.
200 Parameters
201 ----------
202 tables : iterable [`sqlalchemy.schema.Table`]
203 Set of tables comprising the schema.
205 Returns
206 -------
207 digest : `str`
208 String representation of the digest of the schema.
210 Notes
211 -----
212 It is not specified what kind of implementation is used to calculate
213 digest string. The only requirement for that is that result should be
214 stable over time as this digest string will be stored in the
215 configuration and probably in the database too. It should detect (by
216 producing different digests) sensible changes to the schema, but it
217 also should be stable w.r.t. changes that do not actually change the
218 schema (e.g. change in the order of columns or keys.) Current
219 implementation is likely incomplete in that it does not detect all
220 possible changes (e.g. some constraints may not be included into
221 total digest). Digest checking is optional and can be disabled in
222 configuration if configured digest is an empty string, we should delay
223 activating that check until we have a stable implementation for this
224 method.
225 """
227 def tableSchemaRepr(table: sqlalchemy.schema.Table) -> str:
228 """Make string representation of a single table schema.
229 """
230 tableSchemaRepr = [table.name]
231 schemaReps = []
232 for column in table.columns:
233 columnRep = f"COL,{column.name},{column.type}"
234 if column.primary_key:
235 columnRep += ",PK"
236 if column.nullable:
237 columnRep += ",NULL"
238 schemaReps += [columnRep]
239 for fkConstr in table.foreign_key_constraints:
240 fkRep = f"FK,{fkConstr.name}"
241 for fk in fkConstr.elements:
242 fkRep += f"{fk.column.name}->{fk.target_fullname}"
243 schemaReps += [fkRep]
244 schemaReps.sort()
245 tableSchemaRepr += schemaReps
246 return ";".join(tableSchemaRepr)
248 md5 = hashlib.md5()
249 tableSchemas = sorted(tableSchemaRepr(table) for table in tables)
250 for tableRepr in tableSchemas:
251 md5.update(tableRepr.encode())
252 digest = md5.hexdigest()
253 return digest
255 def addTable(self, group: str, table: sqlalchemy.schema.Table) -> None:
256 """Add a table to specified schema group.
258 Table schema added to a group will be used when calculating digest
259 for that group.
261 Parameters
262 ----------
263 group : `str`
264 Schema group name, e.g. "core", or " dimensions".
265 table : `sqlalchemy.schema.Table`
266 Table schema.
267 """
268 self._tablesGroups.setdefault(group, []).append(table)
270 def storeVersions(self, attributes: ButlerAttributeManager) -> None:
271 """Store configured schema versions in registry arttributes.
273 Parameters
274 ----------
275 attributes : `ButlerAttributeManager`
276 Attribute manager instance.
277 """
278 for key, vInfo in self._versions.items():
279 # attribute name reflects configuration path in "registry" config
280 attributes.set(f"schema_versions.{key}.version", str(vInfo.version))
281 # TODO: we could also store digest in the database but I'm not
282 # sure that digest calculation is stable enough at this point.
284 def checkVersionDigests(self) -> None:
285 """Compare current schema digest to a configured digest.
287 It calculates digest to all schema groups using tables added to each
288 group with `addTable` method. If digest is different from a configured
289 digest for the same group it generates logging warning message.
290 """
291 for group, tables in self._tablesGroups.items():
292 if group in self._versions:
293 configDigest = self._versions[group].digest
294 if configDigest:
295 digest = self.schemaDigest(tables)
296 if digest != configDigest:
297 _LOG.warning("Digest mismatch for %s schema. Configured digest: '%s', "
298 "actual digest '%s'.", group, configDigest, digest)
300 def checkStoredVersions(self, attributes: ButlerAttributeManager, writeable: bool) -> None:
301 """Compare configured versions with the versions stored in database.
303 Parameters
304 ----------
305 attributes : `ButlerAttributeManager`
306 Attribute manager instance.
307 writeable : `bool`
308 If ``True`` then read-write access needs to be checked.
310 Raises
311 ------
312 IncompatibleVersionError
313 Raised if versions are not compatible.
314 MissingVersionError
315 Raised if database has no stored version for one or more groups.
316 """
317 for key, vInfo in self._versions.items():
318 storedVersionStr = attributes.get(f"schema_versions.{key}.version")
319 if storedVersionStr is None:
320 raise MissingVersionError(f"Failed to read version number for group {key}")
321 storedVersion = VersionTuple.fromString(storedVersionStr)
322 if not self.checkCompatibility(storedVersion, vInfo.version, writeable):
323 raise IncompatibleVersionError(
324 f"Configured version {vInfo.version} is not compatible with stored version "
325 f"{storedVersion} for group {key}"
326 )