Coverage for python/lsst/daf/butler/name_shrinker.py: 31%
32 statements
« prev ^ index » next coverage.py v7.5.0, created at 2024-04-30 09:54 +0000
« prev ^ index » next coverage.py v7.5.0, created at 2024-04-30 09:54 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
27from __future__ import annotations
29__all__ = ["NameShrinker"]
31import hashlib
32from collections.abc import Iterator
35class NameShrinker:
36 """A utility class for `Database` implementations that need a nontrivial
37 implementation of `Database.shrinkDatabaseEntityName` and
38 `Database.expandDatabaseEntityName`.
40 Parameters
41 ----------
42 maxLength : `int`
43 The maximum number of characters in a database entity name.
44 hashSize : `int`, optional
45 The size of the hash (in bytes) to use for the tail of the shortened
46 name. The hash is written in hexadecimal and prefixed with a "_", so
47 the number of characters the hash occupies is ``hashSize*2 + 1``, and
48 hence the number of characters preserved from the beginning of the
49 original name is ``maxLength - hashSize*2 - 1``.
50 """
52 def __init__(self, maxLength: int, hashSize: int = 4):
53 self.maxLength = maxLength
54 self.hashSize = hashSize
55 self._by_shrunk: dict[str, str] = {}
56 self._by_original: dict[str, str] = {}
58 def shrink(self, original: str) -> str:
59 """Shrink a name and remember the mapping between the original name and
60 its shrunk form.
62 Parameters
63 ----------
64 original : `str`
65 The original name.
67 Returns
68 -------
69 shrunk : `str`
70 The shrunk form.
71 """
72 if len(original) <= self.maxLength:
73 return original
74 if original in self._by_original:
75 return self._by_original[original]
76 message = hashlib.blake2b(digest_size=self.hashSize)
77 message.update(original.encode("ascii"))
78 trunc = self.maxLength - 2 * self.hashSize - 1
79 shrunk = f"{original[:trunc]}_{message.digest().hex()}"
80 assert len(shrunk) == self.maxLength
81 self._by_shrunk[shrunk] = original
82 self._by_original[original] = shrunk
83 return shrunk
85 def expand(self, shrunk: str) -> str:
86 """Return the original name that was passed to a previous call to
87 `shrink`.
89 Parameters
90 ----------
91 shrunk : `str`
92 The shrunk form.
94 Returns
95 -------
96 expanded : `str`
97 The expanded form. If the given name was not passed to `shrink`
98 or was not modified by it, it is returned unmodified.
99 """
100 return self._by_shrunk.get(shrunk, shrunk)
102 def __iter__(self) -> Iterator[tuple[str, str]]:
103 return iter(self._by_original.items())
105 def __len__(self) -> int:
106 return len(self._by_original)
108 def update(self, other: NameShrinker) -> None:
109 """Add all original <-> shrunk mappings from ``other`` to ``self``.
111 Parameters
112 ----------
113 other : `NameShrinker`
114 Object to extract name mappings from.
115 """
116 self._by_original.update(other._by_original)
117 self._by_shrunk.update(other._by_shrunk)