Coverage for python/lsst/daf/butler/name_shrinker.py: 31%

32 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-04-10 10:14 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27from __future__ import annotations 

28 

29__all__ = ["NameShrinker"] 

30 

31import hashlib 

32from collections.abc import Iterator 

33 

34 

35class NameShrinker: 

36 """A utility class for `Database` implementations that need a nontrivial 

37 implementation of `Database.shrinkDatabaseEntityName` and 

38 `Database.expandDatabaseEntityName`. 

39 

40 Parameters 

41 ---------- 

42 maxLength : `int` 

43 The maximum number of characters in a database entity name. 

44 hashSize : `int`, optional 

45 The size of the hash (in bytes) to use for the tail of the shortened 

46 name. The hash is written in hexadecimal and prefixed with a "_", so 

47 the number of characters the hash occupies is ``hashSize*2 + 1``, and 

48 hence the number of characters preserved from the beginning of the 

49 original name is ``maxLength - hashSize*2 - 1``. 

50 """ 

51 

52 def __init__(self, maxLength: int, hashSize: int = 4): 

53 self.maxLength = maxLength 

54 self.hashSize = hashSize 

55 self._by_shrunk: dict[str, str] = {} 

56 self._by_original: dict[str, str] = {} 

57 

58 def shrink(self, original: str) -> str: 

59 """Shrink a name and remember the mapping between the original name and 

60 its shrunk form. 

61 

62 Parameters 

63 ---------- 

64 original : `str` 

65 The original name. 

66 

67 Returns 

68 ------- 

69 shrunk : `str` 

70 The shrunk form. 

71 """ 

72 if len(original) <= self.maxLength: 

73 return original 

74 if original in self._by_original: 

75 return self._by_original[original] 

76 message = hashlib.blake2b(digest_size=self.hashSize) 

77 message.update(original.encode("ascii")) 

78 trunc = self.maxLength - 2 * self.hashSize - 1 

79 shrunk = f"{original[:trunc]}_{message.digest().hex()}" 

80 assert len(shrunk) == self.maxLength 

81 self._by_shrunk[shrunk] = original 

82 self._by_original[original] = shrunk 

83 return shrunk 

84 

85 def expand(self, shrunk: str) -> str: 

86 """Return the original name that was passed to a previous call to 

87 `shrink`. 

88 

89 Parameters 

90 ---------- 

91 shrunk : `str` 

92 The shrunk form. 

93 

94 Returns 

95 ------- 

96 expanded : `str` 

97 The expanded form. If the given name was not passed to `shrink` 

98 or was not modified by it, it is returned unmodified. 

99 """ 

100 return self._by_shrunk.get(shrunk, shrunk) 

101 

102 def __iter__(self) -> Iterator[tuple[str, str]]: 

103 return iter(self._by_original.items()) 

104 

105 def __len__(self) -> int: 

106 return len(self._by_original) 

107 

108 def update(self, other: NameShrinker) -> None: 

109 """Add all original <-> shrunk mappings from ``other`` to ``self``. 

110 

111 Parameters 

112 ---------- 

113 other : `NameShrinker` 

114 Object to extract name mappings from. 

115 """ 

116 self._by_original.update(other._by_original) 

117 self._by_shrunk.update(other._by_shrunk)