Coverage for python/lsst/daf/butler/registry/nameShrinker.py: 25%

24 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-02 02:16 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ["NameShrinker"] 

24 

25import hashlib 

26 

27 

28class NameShrinker: 

29 """A utility class for `Database` implementations that need a nontrivial 

30 implementation of `Database.shrinkDatabaseEntityName` and 

31 `Database.expandDatabaseEntityName`. 

32 

33 Parameters 

34 ---------- 

35 maxLength : `int` 

36 The maximum number of characters in a database entity name. 

37 hashSize : `int`, optional 

38 The size of the hash (in bytes) to use for the tail of the shortened 

39 name. The hash is written in hexadecimal and prefixed with a "_", so 

40 the number of characters the hash occupies is ``hashSize*2 + 1``, and 

41 hence the number of characters preserved from the beginning of the 

42 original name is ``maxLength - hashSize*2 - 1``. 

43 """ 

44 

45 def __init__(self, maxLength: int, hashSize: int = 4): 

46 self.maxLength = maxLength 

47 self.hashSize = hashSize 

48 self._by_shrunk: dict[str, str] = {} 

49 self._by_original: dict[str, str] = {} 

50 

51 def shrink(self, original: str) -> str: 

52 """Shrink a name and remember the mapping between the original name and 

53 its shrunk form. 

54 """ 

55 if len(original) <= self.maxLength: 

56 return original 

57 if original in self._by_original: 

58 return self._by_original[original] 

59 message = hashlib.blake2b(digest_size=self.hashSize) 

60 message.update(original.encode("ascii")) 

61 trunc = self.maxLength - 2 * self.hashSize - 1 

62 shrunk = f"{original[:trunc]}_{message.digest().hex()}" 

63 assert len(shrunk) == self.maxLength 

64 self._by_shrunk[shrunk] = original 

65 self._by_original[original] = shrunk 

66 return shrunk 

67 

68 def expand(self, shrunk: str) -> str: 

69 """Return the original name that was passed to a previous call to 

70 `shrink`. 

71 

72 If the given name was not passed to `shrink` or was not modified by 

73 it, it is returned unmodified. 

74 """ 

75 return self._by_shrunk.get(shrunk, shrunk)