Coverage for python/lsst/daf/butler/_butler_repo_index.py: 38%
75 statements
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-16 10:44 +0000
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-16 10:44 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30__all__ = ("ButlerRepoIndex",)
32import os
33from typing import ClassVar
35from lsst.resources import ResourcePath
37from ._config import Config
38from ._utilities.thread_safe_cache import ThreadSafeCache
41class ButlerRepoIndex:
42 """Index of all known butler repositories.
44 The index of butler repositories is found by looking for a
45 configuration file at the URI pointed at by the environment
46 variable ``$DAF_BUTLER_REPOSITORY_INDEX``. The configuration file
47 is a simple dictionary lookup of the form:
49 .. code-block:: yaml
51 label1: uri1
52 label2: uri2
54 and can be in YAML or JSON format. The content of the file will be
55 cached.
56 """
58 index_env_var: ClassVar[str] = "DAF_BUTLER_REPOSITORY_INDEX"
59 """The name of the environment variable to read to locate the index."""
61 _cache: ClassVar[ThreadSafeCache[ResourcePath, Config]] = ThreadSafeCache()
62 """Cache of indexes. In most scenarios only one index will be found
63 and the environment will not change. In tests this may not be true."""
65 _most_recent_failure: ClassVar[str] = ""
66 """Cache of the most recent failure when reading an index. Reset on
67 every read."""
69 @classmethod
70 def _read_repository_index(cls, index_uri: ResourcePath) -> Config:
71 """Read the repository index from the supplied URI.
73 Parameters
74 ----------
75 index_uri : `lsst.resources.ResourcePath`
76 URI of the repository index.
78 Returns
79 -------
80 repo_index : `Config`
81 The index found at this URI.
83 Raises
84 ------
85 FileNotFoundError
86 Raised if the URI does not exist.
88 Notes
89 -----
90 Does check the cache before reading the file.
91 """
92 config = cls._cache.get(index_uri)
93 if config is not None:
94 return config
96 try:
97 repo_index = Config(index_uri)
98 except FileNotFoundError as e:
99 # More explicit error message.
100 raise FileNotFoundError(f"Butler repository index file not found at {index_uri}.") from e
101 except Exception as e:
102 raise RuntimeError(
103 f"Butler repository index file at {index_uri} could not be read: {type(e).__qualname__} {e}"
104 ) from e
105 repo_index = cls._cache.set_or_get(index_uri, repo_index)
107 return repo_index
109 @classmethod
110 def _get_index_uri(cls) -> ResourcePath:
111 """Find the URI to the repository index.
113 Returns
114 -------
115 index_uri : `lsst.resources.ResourcePath`
116 URI to the repository index.
118 Raises
119 ------
120 KeyError
121 Raised if the location of the index could not be determined.
122 """
123 index_uri = os.environ.get(cls.index_env_var)
124 if index_uri is None:
125 raise KeyError(f"No repository index defined in environment variable {cls.index_env_var}")
126 return ResourcePath(index_uri)
128 @classmethod
129 def _read_repository_index_from_environment(cls) -> Config:
130 """Look in environment for index location and read it.
132 Returns
133 -------
134 repo_index : `Config`
135 The index found in the environment.
136 """
137 cls._most_recent_failure = ""
138 try:
139 index_uri = cls._get_index_uri()
140 except KeyError as e:
141 cls._most_recent_failure = str(e)
142 raise
143 try:
144 repo_index = cls._read_repository_index(index_uri)
145 except Exception as e:
146 cls._most_recent_failure = str(e)
147 raise
148 return repo_index
150 @classmethod
151 def get_known_repos(cls) -> set[str]:
152 """Retrieve the list of known repository labels.
154 Returns
155 -------
156 repos : `set` of `str`
157 All the known labels. Can be empty if no index can be found.
158 """
159 try:
160 repo_index = cls._read_repository_index_from_environment()
161 except Exception:
162 return set()
163 return set(repo_index)
165 @classmethod
166 def get_failure_reason(cls) -> str:
167 """Return possible reason for failure to return repository index.
169 Returns
170 -------
171 reason : `str`
172 If there is a problem reading the repository index, this will
173 contain a string with an explanation. Empty string if everything
174 worked.
176 Notes
177 -----
178 The value returned is only reliable if called immediately after a
179 failure. The most recent failure reason is reset every time an attempt
180 is made to request a label and so the reason can be out of date.
181 """
182 return cls._most_recent_failure
184 @classmethod
185 def get_repo_uri(cls, label: str, return_label: bool = False) -> ResourcePath:
186 """Look up the label in a butler repository index.
188 Parameters
189 ----------
190 label : `str`
191 Label of the Butler repository to look up.
192 return_label : `bool`, optional
193 If ``label`` cannot be found in the repository index (either
194 because index is not defined or ``label`` is not in the index) and
195 ``return_label`` is `True` then return ``ResourcePath(label)``.
196 If ``return_label`` is `False` (default) then an exception will be
197 raised instead.
199 Returns
200 -------
201 uri : `lsst.resources.ResourcePath`
202 URI to the Butler repository associated with the given label or
203 default value if it is provided.
205 Raises
206 ------
207 KeyError
208 Raised if the label is not found in the index, or if an index
209 is not defined, and ``return_label`` is `False`.
210 FileNotFoundError
211 Raised if an index is defined in the environment but it
212 can not be found.
213 """
214 try:
215 repo_index = cls._read_repository_index_from_environment()
216 except Exception:
217 if return_label:
218 return ResourcePath(label, forceAbsolute=False)
219 raise
221 repo_uri = repo_index.get(label)
222 if repo_uri is None:
223 if return_label:
224 return ResourcePath(label, forceAbsolute=False)
225 # This should not raise since it worked earlier.
226 try:
227 index_uri = str(cls._get_index_uri())
228 except KeyError:
229 index_uri = "<environment variable not defined>"
230 raise KeyError(f"Label '{label}' not known to repository index at {index_uri}")
231 return ResourcePath(repo_uri)