Coverage for python/lsst/daf/butler/registry/nameShrinker.py: 25%

24 statements  

« prev     ^ index     » next       coverage.py v7.4.0, created at 2024-01-16 10:44 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27from __future__ import annotations 

28 

29__all__ = ["NameShrinker"] 

30 

31import hashlib 

32 

33 

34class NameShrinker: 

35 """A utility class for `Database` implementations that need a nontrivial 

36 implementation of `Database.shrinkDatabaseEntityName` and 

37 `Database.expandDatabaseEntityName`. 

38 

39 Parameters 

40 ---------- 

41 maxLength : `int` 

42 The maximum number of characters in a database entity name. 

43 hashSize : `int`, optional 

44 The size of the hash (in bytes) to use for the tail of the shortened 

45 name. The hash is written in hexadecimal and prefixed with a "_", so 

46 the number of characters the hash occupies is ``hashSize*2 + 1``, and 

47 hence the number of characters preserved from the beginning of the 

48 original name is ``maxLength - hashSize*2 - 1``. 

49 """ 

50 

51 def __init__(self, maxLength: int, hashSize: int = 4): 

52 self.maxLength = maxLength 

53 self.hashSize = hashSize 

54 self._by_shrunk: dict[str, str] = {} 

55 self._by_original: dict[str, str] = {} 

56 

57 def shrink(self, original: str) -> str: 

58 """Shrink a name and remember the mapping between the original name and 

59 its shrunk form. 

60 

61 Parameters 

62 ---------- 

63 original : `str` 

64 The original name. 

65 

66 Returns 

67 ------- 

68 shrunk : `str` 

69 The shrunk form. 

70 """ 

71 if len(original) <= self.maxLength: 

72 return original 

73 if original in self._by_original: 

74 return self._by_original[original] 

75 message = hashlib.blake2b(digest_size=self.hashSize) 

76 message.update(original.encode("ascii")) 

77 trunc = self.maxLength - 2 * self.hashSize - 1 

78 shrunk = f"{original[:trunc]}_{message.digest().hex()}" 

79 assert len(shrunk) == self.maxLength 

80 self._by_shrunk[shrunk] = original 

81 self._by_original[original] = shrunk 

82 return shrunk 

83 

84 def expand(self, shrunk: str) -> str: 

85 """Return the original name that was passed to a previous call to 

86 `shrink`. 

87 

88 Parameters 

89 ---------- 

90 shrunk : `str` 

91 The shrunk form. 

92 

93 Returns 

94 ------- 

95 expanded : `str` 

96 The expanded form. If the given name was not passed to `shrink` 

97 or was not modified by it, it is returned unmodified. 

98 """ 

99 return self._by_shrunk.get(shrunk, shrunk)