Coverage for python/lsst/daf/butler/registry/nameShrinker.py: 25%
24 statements
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-25 10:50 +0000
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-25 10:50 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
27from __future__ import annotations
29__all__ = ["NameShrinker"]
31import hashlib
34class NameShrinker:
35 """A utility class for `Database` implementations that need a nontrivial
36 implementation of `Database.shrinkDatabaseEntityName` and
37 `Database.expandDatabaseEntityName`.
39 Parameters
40 ----------
41 maxLength : `int`
42 The maximum number of characters in a database entity name.
43 hashSize : `int`, optional
44 The size of the hash (in bytes) to use for the tail of the shortened
45 name. The hash is written in hexadecimal and prefixed with a "_", so
46 the number of characters the hash occupies is ``hashSize*2 + 1``, and
47 hence the number of characters preserved from the beginning of the
48 original name is ``maxLength - hashSize*2 - 1``.
49 """
51 def __init__(self, maxLength: int, hashSize: int = 4):
52 self.maxLength = maxLength
53 self.hashSize = hashSize
54 self._by_shrunk: dict[str, str] = {}
55 self._by_original: dict[str, str] = {}
57 def shrink(self, original: str) -> str:
58 """Shrink a name and remember the mapping between the original name and
59 its shrunk form.
61 Parameters
62 ----------
63 original : `str`
64 The original name.
66 Returns
67 -------
68 shrunk : `str`
69 The shrunk form.
70 """
71 if len(original) <= self.maxLength:
72 return original
73 if original in self._by_original:
74 return self._by_original[original]
75 message = hashlib.blake2b(digest_size=self.hashSize)
76 message.update(original.encode("ascii"))
77 trunc = self.maxLength - 2 * self.hashSize - 1
78 shrunk = f"{original[:trunc]}_{message.digest().hex()}"
79 assert len(shrunk) == self.maxLength
80 self._by_shrunk[shrunk] = original
81 self._by_original[original] = shrunk
82 return shrunk
84 def expand(self, shrunk: str) -> str:
85 """Return the original name that was passed to a previous call to
86 `shrink`.
88 Parameters
89 ----------
90 shrunk : `str`
91 The shrunk form.
93 Returns
94 -------
95 expanded : `str`
96 The expanded form. If the given name was not passed to `shrink`
97 or was not modified by it, it is returned unmodified.
98 """
99 return self._by_shrunk.get(shrunk, shrunk)