Coverage for python/lsst/daf/butler/_butler_repo_index.py: 40%
77 statements
« prev ^ index » next coverage.py v7.5.0, created at 2024-04-27 03:00 -0700
« prev ^ index » next coverage.py v7.5.0, created at 2024-04-27 03:00 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30__all__ = ("ButlerRepoIndex",)
32import os
33from typing import Any, ClassVar
35import yaml
36from lsst.resources import ResourcePath
37from pydantic import TypeAdapter, ValidationError
39from ._config import Config
40from ._utilities.thread_safe_cache import ThreadSafeCache
43class ButlerRepoIndex:
44 """Index of all known butler repositories.
46 The index of butler repositories can be configured in two ways:
48 1. By setting the environment variable ``DAF_BUTLER_REPOSITORY_INDEX`` to
49 the URI of a configuration file.
50 2. By setting the environment variable ``DAF_BUTLER_REPOSITORIES`` to the
51 contents of the configuration file as a string.
53 In either case, the configuration is a simple dictionary lookup of the
54 form:
56 .. code-block:: yaml
58 label1: uri1
59 label2: uri2
61 and can be in YAML or JSON format. The content of the file will be
62 cached.
63 """
65 index_env_var: ClassVar[str] = "DAF_BUTLER_REPOSITORY_INDEX"
66 """The name of the environment variable containing the URI of the index
67 configuration file.
68 """
69 repositories_env_var: ClassVar[str] = "DAF_BUTLER_REPOSITORIES"
70 """The name of the environment variable containing the configuration
71 directly as a string.
72 """
74 _cache: ClassVar[ThreadSafeCache[str, dict[str, str]]] = ThreadSafeCache()
75 """Cache of indexes. In most scenarios only one index will be found
76 and the environment will not change. In tests this may not be true."""
78 _most_recent_failure: ClassVar[str] = ""
79 """Cache of the most recent failure when reading an index. Reset on
80 every read."""
82 @classmethod
83 def _read_repository_index(cls, index_uri: str) -> dict[str, str]:
84 """Read the repository index from the supplied URI.
86 Parameters
87 ----------
88 index_uri : `str`
89 URI of the repository index.
91 Returns
92 -------
93 repo_index : `dict` [ `str` , `str` ]
94 The index found at this URI.
96 Raises
97 ------
98 FileNotFoundError
99 Raised if the URI does not exist.
101 Notes
102 -----
103 Does check the cache before reading the file.
104 """
105 config = cls._cache.get(index_uri)
106 if config is not None:
107 return config
109 try:
110 repo_index = cls._validate_configuration(Config(index_uri))
111 except FileNotFoundError as e:
112 # More explicit error message.
113 raise FileNotFoundError(f"Butler repository index file not found at {index_uri}.") from e
114 except Exception as e:
115 raise RuntimeError(
116 f"Butler repository index file at {index_uri} could not be read: {type(e).__qualname__} {e}"
117 ) from e
118 repo_index = cls._cache.set_or_get(index_uri, repo_index)
120 return repo_index
122 @classmethod
123 def _read_repository_index_from_environment(cls) -> dict[str, str]:
124 """Look in environment for index location and read it.
126 Returns
127 -------
128 repo_index : `dict` [ `str` , `str` ]
129 The index found in the environment.
130 """
131 cls._most_recent_failure = ""
132 try:
133 index_uri = os.getenv(cls.index_env_var)
134 direct_configuration = os.getenv(cls.repositories_env_var)
136 if index_uri and direct_configuration:
137 raise RuntimeError(
138 f"Only one of the environment variables {cls.repositories_env_var} and"
139 f" {cls.index_env_var} should be set."
140 )
142 if direct_configuration:
143 return cls._validate_configuration(yaml.safe_load(direct_configuration))
145 if index_uri:
146 return cls._read_repository_index(index_uri)
148 raise RuntimeError(
149 "No repository index defined. Neither of the environment variables"
150 f" {cls.repositories_env_var} or {cls.index_env_var} was set."
151 )
152 except Exception as e:
153 cls._most_recent_failure = str(e)
154 raise
156 @classmethod
157 def get_known_repos(cls) -> set[str]:
158 """Retrieve the list of known repository labels.
160 Returns
161 -------
162 repos : `set` of `str`
163 All the known labels. Can be empty if no index can be found.
164 """
165 try:
166 repo_index = cls._read_repository_index_from_environment()
167 except Exception:
168 return set()
169 return set(repo_index)
171 @classmethod
172 def get_failure_reason(cls) -> str:
173 """Return possible reason for failure to return repository index.
175 Returns
176 -------
177 reason : `str`
178 If there is a problem reading the repository index, this will
179 contain a string with an explanation. Empty string if everything
180 worked.
182 Notes
183 -----
184 The value returned is only reliable if called immediately after a
185 failure. The most recent failure reason is reset every time an attempt
186 is made to request a label and so the reason can be out of date.
187 """
188 return cls._most_recent_failure
190 @classmethod
191 def get_repo_uri(cls, label: str, return_label: bool = False) -> ResourcePath:
192 """Look up the label in a butler repository index.
194 Parameters
195 ----------
196 label : `str`
197 Label of the Butler repository to look up.
198 return_label : `bool`, optional
199 If ``label`` cannot be found in the repository index (either
200 because index is not defined or ``label`` is not in the index) and
201 ``return_label`` is `True` then return ``ResourcePath(label)``.
202 If ``return_label`` is `False` (default) then an exception will be
203 raised instead.
205 Returns
206 -------
207 uri : `lsst.resources.ResourcePath`
208 URI to the Butler repository associated with the given label or
209 default value if it is provided.
211 Raises
212 ------
213 KeyError
214 Raised if the label is not found in the index, or if an index
215 is not defined, and ``return_label`` is `False`.
216 FileNotFoundError
217 Raised if an index is defined in the environment but it
218 can not be found.
219 """
220 try:
221 repo_index = cls._read_repository_index_from_environment()
222 except Exception:
223 if return_label:
224 return ResourcePath(label, forceAbsolute=False)
225 raise
227 repo_uri = repo_index.get(label)
228 if repo_uri is None:
229 if return_label:
230 return ResourcePath(label, forceAbsolute=False)
231 raise KeyError(f"Label '{label}' not known to repository index")
232 return ResourcePath(repo_uri)
234 @classmethod
235 def _validate_configuration(cls, obj: Any) -> dict[str, str]:
236 try:
237 return TypeAdapter(dict[str, str]).validate_python(obj)
238 except ValidationError as e:
239 raise ValueError("Repository index not in expected format") from e