Coverage for python/lsst/daf/butler/registry/nameShrinker.py: 25%
24 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-08-05 01:26 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-08-05 01:26 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ["NameShrinker"]
25import hashlib
28class NameShrinker:
29 """A utility class for `Database` implementations that need a nontrivial
30 implementation of `Database.shrinkDatabaseEntityName` and
31 `Database.expandDatabaseEntityName`.
33 Parameters
34 ----------
35 maxLength : `int`
36 The maximum number of characters in a database entity name.
37 hashSize : `int`, optional
38 The size of the hash (in bytes) to use for the tail of the shortened
39 name. The hash is written in hexadecimal and prefixed with a "_", so
40 the number of characters the hash occupies is ``hashSize*2 + 1``, and
41 hence the number of characters preserved from the beginning of the
42 original name is ``maxLength - hashSize*2 - 1``.
43 """
45 def __init__(self, maxLength: int, hashSize: int = 4):
46 self.maxLength = maxLength
47 self.hashSize = hashSize
48 self._by_shrunk: dict[str, str] = {}
49 self._by_original: dict[str, str] = {}
51 def shrink(self, original: str) -> str:
52 """Shrink a name and remember the mapping between the original name and
53 its shrunk form.
54 """
55 if len(original) <= self.maxLength:
56 return original
57 if original in self._by_original:
58 return self._by_original[original]
59 message = hashlib.blake2b(digest_size=self.hashSize)
60 message.update(original.encode("ascii"))
61 trunc = self.maxLength - 2 * self.hashSize - 1
62 shrunk = f"{original[:trunc]}_{message.digest().hex()}"
63 assert len(shrunk) == self.maxLength
64 self._by_shrunk[shrunk] = original
65 self._by_original[original] = shrunk
66 return shrunk
68 def expand(self, shrunk: str) -> str:
69 """Return the original name that was passed to a previous call to
70 `shrink`.
72 If the given name was not passed to `shrink` or was not modified by
73 it, it is returned unmodified.
74 """
75 return self._by_shrunk.get(shrunk, shrunk)