Coverage for python / lsst / daf / butler / _butler_repo_index.py: 33%
77 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-24 08:17 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-24 08:17 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30__all__ = ("ButlerRepoIndex",)
32import os
33from typing import Any, ClassVar
35import yaml
36from pydantic import TypeAdapter, ValidationError
38from lsst.resources import ResourcePath
40from ._config import Config
41from ._utilities.thread_safe_cache import ThreadSafeCache
44class ButlerRepoIndex:
45 """Index of all known butler repositories.
47 The index of butler repositories can be configured in two ways:
49 1. By setting the environment variable ``DAF_BUTLER_REPOSITORY_INDEX`` to
50 the URI of a configuration file.
51 2. By setting the environment variable ``DAF_BUTLER_REPOSITORIES`` to the
52 contents of the configuration file as a string.
54 In either case, the configuration is a simple dictionary lookup of the
55 form:
57 .. code-block:: yaml
59 label1: uri1
60 label2: uri2
62 and can be in YAML or JSON format. The content of the file will be
63 cached.
64 """
66 index_env_var: ClassVar[str] = "DAF_BUTLER_REPOSITORY_INDEX"
67 """The name of the environment variable containing the URI of the index
68 configuration file.
69 """
70 repositories_env_var: ClassVar[str] = "DAF_BUTLER_REPOSITORIES"
71 """The name of the environment variable containing the configuration
72 directly as a string.
73 """
75 _cache: ClassVar[ThreadSafeCache[str, dict[str, str]]] = ThreadSafeCache()
76 """Cache of indexes. In most scenarios only one index will be found
77 and the environment will not change. In tests this may not be true."""
79 _most_recent_failure: ClassVar[str] = ""
80 """Cache of the most recent failure when reading an index. Reset on
81 every read."""
83 @classmethod
84 def _read_repository_index(cls, index_uri: str) -> dict[str, str]:
85 """Read the repository index from the supplied URI.
87 Parameters
88 ----------
89 index_uri : `str`
90 URI of the repository index.
92 Returns
93 -------
94 repo_index : `dict` [ `str` , `str` ]
95 The index found at this URI.
97 Raises
98 ------
99 FileNotFoundError
100 Raised if the URI does not exist.
102 Notes
103 -----
104 Does check the cache before reading the file.
105 """
106 config = cls._cache.get(index_uri)
107 if config is not None:
108 return config
110 try:
111 repo_index = cls._validate_configuration(Config(index_uri))
112 except FileNotFoundError as e:
113 # More explicit error message.
114 raise FileNotFoundError(f"Butler repository index file not found at {index_uri}.") from e
115 except Exception as e:
116 raise RuntimeError(
117 f"Butler repository index file at {index_uri} could not be read: {type(e).__qualname__} {e}"
118 ) from e
119 repo_index = cls._cache.set_or_get(index_uri, repo_index)
121 return repo_index
123 @classmethod
124 def _read_repository_index_from_environment(cls) -> dict[str, str]:
125 """Look in environment for index location and read it.
127 Returns
128 -------
129 repo_index : `dict` [ `str` , `str` ]
130 The index found in the environment.
131 """
132 cls._most_recent_failure = ""
133 try:
134 index_uri = os.getenv(cls.index_env_var)
135 direct_configuration = os.getenv(cls.repositories_env_var)
137 if index_uri and direct_configuration:
138 raise RuntimeError(
139 f"Only one of the environment variables {cls.repositories_env_var} and"
140 f" {cls.index_env_var} should be set."
141 )
143 if direct_configuration:
144 return cls._validate_configuration(yaml.safe_load(direct_configuration))
146 if index_uri:
147 return cls._read_repository_index(index_uri)
149 raise RuntimeError(
150 "No repository index defined. Neither of the environment variables"
151 f" {cls.repositories_env_var} or {cls.index_env_var} was set."
152 )
153 except Exception as e:
154 cls._most_recent_failure = str(e)
155 raise
157 @classmethod
158 def get_known_repos(cls) -> set[str]:
159 """Retrieve the list of known repository labels.
161 Returns
162 -------
163 repos : `set` of `str`
164 All the known labels. Can be empty if no index can be found.
165 """
166 try:
167 repo_index = cls._read_repository_index_from_environment()
168 except Exception:
169 return set()
170 return set(repo_index)
172 @classmethod
173 def get_failure_reason(cls) -> str:
174 """Return possible reason for failure to return repository index.
176 Returns
177 -------
178 reason : `str`
179 If there is a problem reading the repository index, this will
180 contain a string with an explanation. Empty string if everything
181 worked.
183 Notes
184 -----
185 The value returned is only reliable if called immediately after a
186 failure. The most recent failure reason is reset every time an attempt
187 is made to request a label and so the reason can be out of date.
188 """
189 return cls._most_recent_failure
191 @classmethod
192 def get_repo_uri(cls, label: str, return_label: bool = False) -> ResourcePath:
193 """Look up the label in a butler repository index.
195 Parameters
196 ----------
197 label : `str`
198 Label of the Butler repository to look up.
199 return_label : `bool`, optional
200 If ``label`` cannot be found in the repository index (either
201 because index is not defined or ``label`` is not in the index) and
202 ``return_label`` is `True` then return ``ResourcePath(label)``.
203 If ``return_label`` is `False` (default) then an exception will be
204 raised instead.
206 Returns
207 -------
208 uri : `lsst.resources.ResourcePath`
209 URI to the Butler repository associated with the given label or
210 default value if it is provided.
212 Raises
213 ------
214 KeyError
215 Raised if the label is not found in the index, or if an index
216 is not defined, and ``return_label`` is `False`.
217 FileNotFoundError
218 Raised if an index is defined in the environment but it
219 can not be found.
220 """
221 try:
222 repo_index = cls._read_repository_index_from_environment()
223 except Exception:
224 if return_label:
225 return ResourcePath(label, forceAbsolute=False)
226 raise
228 repo_uri = repo_index.get(label)
229 if repo_uri is None:
230 if return_label:
231 return ResourcePath(label, forceAbsolute=False)
232 raise KeyError(f"Label '{label}' not known to repository index")
233 return ResourcePath(repo_uri)
235 @classmethod
236 def _validate_configuration(cls, obj: Any) -> dict[str, str]:
237 try:
238 return TypeAdapter(dict[str, str]).validate_python(obj)
239 except ValidationError as e:
240 raise ValueError("Repository index not in expected format") from e