Coverage for python/lsst/daf/butler/_butler_repo_index.py: 38%
73 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-05 11:07 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-05 11:07 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30__all__ = ("ButlerRepoIndex",)
32import os
33from typing import ClassVar
35from lsst.resources import ResourcePath
37from ._config import Config
40class ButlerRepoIndex:
41 """Index of all known butler repositories.
43 The index of butler repositories is found by looking for a
44 configuration file at the URI pointed at by the environment
45 variable ``$DAF_BUTLER_REPOSITORY_INDEX``. The configuration file
46 is a simple dictionary lookup of the form:
48 .. code-block:: yaml
50 label1: uri1
51 label2: uri2
53 and can be in YAML or JSON format. The content of the file will be
54 cached.
55 """
57 index_env_var: ClassVar[str] = "DAF_BUTLER_REPOSITORY_INDEX"
58 """The name of the environment variable to read to locate the index."""
60 _cache: ClassVar[dict[ResourcePath, Config]] = {}
61 """Cache of indexes. In most scenarios only one index will be found
62 and the environment will not change. In tests this may not be true."""
64 _most_recent_failure: ClassVar[str] = ""
65 """Cache of the most recent failure when reading an index. Reset on
66 every read."""
68 @classmethod
69 def _read_repository_index(cls, index_uri: ResourcePath) -> Config:
70 """Read the repository index from the supplied URI.
72 Parameters
73 ----------
74 index_uri : `lsst.resources.ResourcePath`
75 URI of the repository index.
77 Returns
78 -------
79 repo_index : `Config`
80 The index found at this URI.
82 Raises
83 ------
84 FileNotFoundError
85 Raised if the URI does not exist.
87 Notes
88 -----
89 Does check the cache before reading the file.
90 """
91 if index_uri in cls._cache:
92 return cls._cache[index_uri]
94 try:
95 repo_index = Config(index_uri)
96 except FileNotFoundError as e:
97 # More explicit error message.
98 raise FileNotFoundError(f"Butler repository index file not found at {index_uri}.") from e
99 except Exception as e:
100 raise RuntimeError(
101 f"Butler repository index file at {index_uri} could not be read: {type(e).__qualname__} {e}"
102 ) from e
103 cls._cache[index_uri] = repo_index
105 return repo_index
107 @classmethod
108 def _get_index_uri(cls) -> ResourcePath:
109 """Find the URI to the repository index.
111 Returns
112 -------
113 index_uri : `lsst.resources.ResourcePath`
114 URI to the repository index.
116 Raises
117 ------
118 KeyError
119 Raised if the location of the index could not be determined.
120 """
121 index_uri = os.environ.get(cls.index_env_var)
122 if index_uri is None:
123 raise KeyError(f"No repository index defined in environment variable {cls.index_env_var}")
124 return ResourcePath(index_uri)
126 @classmethod
127 def _read_repository_index_from_environment(cls) -> Config:
128 """Look in environment for index location and read it.
130 Returns
131 -------
132 repo_index : `Config`
133 The index found in the environment.
134 """
135 cls._most_recent_failure = ""
136 try:
137 index_uri = cls._get_index_uri()
138 except KeyError as e:
139 cls._most_recent_failure = str(e)
140 raise
141 try:
142 repo_index = cls._read_repository_index(index_uri)
143 except Exception as e:
144 cls._most_recent_failure = str(e)
145 raise
146 return repo_index
148 @classmethod
149 def get_known_repos(cls) -> set[str]:
150 """Retrieve the list of known repository labels.
152 Returns
153 -------
154 repos : `set` of `str`
155 All the known labels. Can be empty if no index can be found.
156 """
157 try:
158 repo_index = cls._read_repository_index_from_environment()
159 except Exception:
160 return set()
161 return set(repo_index)
163 @classmethod
164 def get_failure_reason(cls) -> str:
165 """Return possible reason for failure to return repository index.
167 Returns
168 -------
169 reason : `str`
170 If there is a problem reading the repository index, this will
171 contain a string with an explanation. Empty string if everything
172 worked.
174 Notes
175 -----
176 The value returned is only reliable if called immediately after a
177 failure. The most recent failure reason is reset every time an attempt
178 is made to request a label and so the reason can be out of date.
179 """
180 return cls._most_recent_failure
182 @classmethod
183 def get_repo_uri(cls, label: str, return_label: bool = False) -> ResourcePath:
184 """Look up the label in a butler repository index.
186 Parameters
187 ----------
188 label : `str`
189 Label of the Butler repository to look up.
190 return_label : `bool`, optional
191 If ``label`` cannot be found in the repository index (either
192 because index is not defined or ``label`` is not in the index) and
193 ``return_label`` is `True` then return ``ResourcePath(label)``.
194 If ``return_label`` is `False` (default) then an exception will be
195 raised instead.
197 Returns
198 -------
199 uri : `lsst.resources.ResourcePath`
200 URI to the Butler repository associated with the given label or
201 default value if it is provided.
203 Raises
204 ------
205 KeyError
206 Raised if the label is not found in the index, or if an index
207 is not defined, and ``return_label`` is `False`.
208 FileNotFoundError
209 Raised if an index is defined in the environment but it
210 can not be found.
211 """
212 try:
213 repo_index = cls._read_repository_index_from_environment()
214 except Exception:
215 if return_label:
216 return ResourcePath(label, forceAbsolute=False)
217 raise
219 repo_uri = repo_index.get(label)
220 if repo_uri is None:
221 if return_label:
222 return ResourcePath(label, forceAbsolute=False)
223 # This should not raise since it worked earlier.
224 try:
225 index_uri = str(cls._get_index_uri())
226 except KeyError:
227 index_uri = "<environment variable not defined>"
228 raise KeyError(f"Label '{label}' not known to repository index at {index_uri}")
229 return ResourcePath(repo_uri)