Coverage for python / lsst / daf / butler / _butler_repo_index.py: 33%

77 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-24 08:17 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30__all__ = ("ButlerRepoIndex",) 

31 

32import os 

33from typing import Any, ClassVar 

34 

35import yaml 

36from pydantic import TypeAdapter, ValidationError 

37 

38from lsst.resources import ResourcePath 

39 

40from ._config import Config 

41from ._utilities.thread_safe_cache import ThreadSafeCache 

42 

43 

44class ButlerRepoIndex: 

45 """Index of all known butler repositories. 

46 

47 The index of butler repositories can be configured in two ways: 

48 

49 1. By setting the environment variable ``DAF_BUTLER_REPOSITORY_INDEX`` to 

50 the URI of a configuration file. 

51 2. By setting the environment variable ``DAF_BUTLER_REPOSITORIES`` to the 

52 contents of the configuration file as a string. 

53 

54 In either case, the configuration is a simple dictionary lookup of the 

55 form: 

56 

57 .. code-block:: yaml 

58 

59 label1: uri1 

60 label2: uri2 

61 

62 and can be in YAML or JSON format. The content of the file will be 

63 cached. 

64 """ 

65 

66 index_env_var: ClassVar[str] = "DAF_BUTLER_REPOSITORY_INDEX" 

67 """The name of the environment variable containing the URI of the index 

68 configuration file. 

69 """ 

70 repositories_env_var: ClassVar[str] = "DAF_BUTLER_REPOSITORIES" 

71 """The name of the environment variable containing the configuration 

72 directly as a string. 

73 """ 

74 

75 _cache: ClassVar[ThreadSafeCache[str, dict[str, str]]] = ThreadSafeCache() 

76 """Cache of indexes. In most scenarios only one index will be found 

77 and the environment will not change. In tests this may not be true.""" 

78 

79 _most_recent_failure: ClassVar[str] = "" 

80 """Cache of the most recent failure when reading an index. Reset on 

81 every read.""" 

82 

83 @classmethod 

84 def _read_repository_index(cls, index_uri: str) -> dict[str, str]: 

85 """Read the repository index from the supplied URI. 

86 

87 Parameters 

88 ---------- 

89 index_uri : `str` 

90 URI of the repository index. 

91 

92 Returns 

93 ------- 

94 repo_index : `dict` [ `str` , `str` ] 

95 The index found at this URI. 

96 

97 Raises 

98 ------ 

99 FileNotFoundError 

100 Raised if the URI does not exist. 

101 

102 Notes 

103 ----- 

104 Does check the cache before reading the file. 

105 """ 

106 config = cls._cache.get(index_uri) 

107 if config is not None: 

108 return config 

109 

110 try: 

111 repo_index = cls._validate_configuration(Config(index_uri)) 

112 except FileNotFoundError as e: 

113 # More explicit error message. 

114 raise FileNotFoundError(f"Butler repository index file not found at {index_uri}.") from e 

115 except Exception as e: 

116 raise RuntimeError( 

117 f"Butler repository index file at {index_uri} could not be read: {type(e).__qualname__} {e}" 

118 ) from e 

119 repo_index = cls._cache.set_or_get(index_uri, repo_index) 

120 

121 return repo_index 

122 

123 @classmethod 

124 def _read_repository_index_from_environment(cls) -> dict[str, str]: 

125 """Look in environment for index location and read it. 

126 

127 Returns 

128 ------- 

129 repo_index : `dict` [ `str` , `str` ] 

130 The index found in the environment. 

131 """ 

132 cls._most_recent_failure = "" 

133 try: 

134 index_uri = os.getenv(cls.index_env_var) 

135 direct_configuration = os.getenv(cls.repositories_env_var) 

136 

137 if index_uri and direct_configuration: 

138 raise RuntimeError( 

139 f"Only one of the environment variables {cls.repositories_env_var} and" 

140 f" {cls.index_env_var} should be set." 

141 ) 

142 

143 if direct_configuration: 

144 return cls._validate_configuration(yaml.safe_load(direct_configuration)) 

145 

146 if index_uri: 

147 return cls._read_repository_index(index_uri) 

148 

149 raise RuntimeError( 

150 "No repository index defined. Neither of the environment variables" 

151 f" {cls.repositories_env_var} or {cls.index_env_var} was set." 

152 ) 

153 except Exception as e: 

154 cls._most_recent_failure = str(e) 

155 raise 

156 

157 @classmethod 

158 def get_known_repos(cls) -> set[str]: 

159 """Retrieve the list of known repository labels. 

160 

161 Returns 

162 ------- 

163 repos : `set` of `str` 

164 All the known labels. Can be empty if no index can be found. 

165 """ 

166 try: 

167 repo_index = cls._read_repository_index_from_environment() 

168 except Exception: 

169 return set() 

170 return set(repo_index) 

171 

172 @classmethod 

173 def get_failure_reason(cls) -> str: 

174 """Return possible reason for failure to return repository index. 

175 

176 Returns 

177 ------- 

178 reason : `str` 

179 If there is a problem reading the repository index, this will 

180 contain a string with an explanation. Empty string if everything 

181 worked. 

182 

183 Notes 

184 ----- 

185 The value returned is only reliable if called immediately after a 

186 failure. The most recent failure reason is reset every time an attempt 

187 is made to request a label and so the reason can be out of date. 

188 """ 

189 return cls._most_recent_failure 

190 

191 @classmethod 

192 def get_repo_uri(cls, label: str, return_label: bool = False) -> ResourcePath: 

193 """Look up the label in a butler repository index. 

194 

195 Parameters 

196 ---------- 

197 label : `str` 

198 Label of the Butler repository to look up. 

199 return_label : `bool`, optional 

200 If ``label`` cannot be found in the repository index (either 

201 because index is not defined or ``label`` is not in the index) and 

202 ``return_label`` is `True` then return ``ResourcePath(label)``. 

203 If ``return_label`` is `False` (default) then an exception will be 

204 raised instead. 

205 

206 Returns 

207 ------- 

208 uri : `lsst.resources.ResourcePath` 

209 URI to the Butler repository associated with the given label or 

210 default value if it is provided. 

211 

212 Raises 

213 ------ 

214 KeyError 

215 Raised if the label is not found in the index, or if an index 

216 is not defined, and ``return_label`` is `False`. 

217 FileNotFoundError 

218 Raised if an index is defined in the environment but it 

219 can not be found. 

220 """ 

221 try: 

222 repo_index = cls._read_repository_index_from_environment() 

223 except Exception: 

224 if return_label: 

225 return ResourcePath(label, forceAbsolute=False) 

226 raise 

227 

228 repo_uri = repo_index.get(label) 

229 if repo_uri is None: 

230 if return_label: 

231 return ResourcePath(label, forceAbsolute=False) 

232 raise KeyError(f"Label '{label}' not known to repository index") 

233 return ResourcePath(repo_uri) 

234 

235 @classmethod 

236 def _validate_configuration(cls, obj: Any) -> dict[str, str]: 

237 try: 

238 return TypeAdapter(dict[str, str]).validate_python(obj) 

239 except ValidationError as e: 

240 raise ValueError("Repository index not in expected format") from e