Coverage for python/lsst/daf/butler/_butler_repo_index.py: 40%

77 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-04-05 02:53 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30__all__ = ("ButlerRepoIndex",) 

31 

32import os 

33from typing import Any, ClassVar 

34 

35import yaml 

36from lsst.resources import ResourcePath 

37from pydantic import TypeAdapter, ValidationError 

38 

39from ._config import Config 

40from ._utilities.thread_safe_cache import ThreadSafeCache 

41 

42 

43class ButlerRepoIndex: 

44 """Index of all known butler repositories. 

45 

46 The index of butler repositories can be configured in two ways: 

47 

48 1. By setting the environment variable ``DAF_BUTLER_REPOSITORY_INDEX`` to 

49 the URI of a configuration file. 

50 2. By setting the environment variable ``DAF_BUTLER_REPOSITORIES`` to the 

51 contents of the configuration file as a string. 

52 

53 In either case, the configuration is a simple dictionary lookup of the 

54 form: 

55 

56 .. code-block:: yaml 

57 

58 label1: uri1 

59 label2: uri2 

60 

61 and can be in YAML or JSON format. The content of the file will be 

62 cached. 

63 """ 

64 

65 index_env_var: ClassVar[str] = "DAF_BUTLER_REPOSITORY_INDEX" 

66 """The name of the environment variable containing the URI of the index 

67 configuration file. 

68 """ 

69 repositories_env_var: ClassVar[str] = "DAF_BUTLER_REPOSITORIES" 

70 """The name of the environment variable containing the configuration 

71 directly as a string. 

72 """ 

73 

74 _cache: ClassVar[ThreadSafeCache[str, dict[str, str]]] = ThreadSafeCache() 

75 """Cache of indexes. In most scenarios only one index will be found 

76 and the environment will not change. In tests this may not be true.""" 

77 

78 _most_recent_failure: ClassVar[str] = "" 

79 """Cache of the most recent failure when reading an index. Reset on 

80 every read.""" 

81 

82 @classmethod 

83 def _read_repository_index(cls, index_uri: str) -> dict[str, str]: 

84 """Read the repository index from the supplied URI. 

85 

86 Parameters 

87 ---------- 

88 index_uri : `str` 

89 URI of the repository index. 

90 

91 Returns 

92 ------- 

93 repo_index : `dict` [ `str` , `str` ] 

94 The index found at this URI. 

95 

96 Raises 

97 ------ 

98 FileNotFoundError 

99 Raised if the URI does not exist. 

100 

101 Notes 

102 ----- 

103 Does check the cache before reading the file. 

104 """ 

105 config = cls._cache.get(index_uri) 

106 if config is not None: 

107 return config 

108 

109 try: 

110 repo_index = cls._validate_configuration(Config(index_uri)) 

111 except FileNotFoundError as e: 

112 # More explicit error message. 

113 raise FileNotFoundError(f"Butler repository index file not found at {index_uri}.") from e 

114 except Exception as e: 

115 raise RuntimeError( 

116 f"Butler repository index file at {index_uri} could not be read: {type(e).__qualname__} {e}" 

117 ) from e 

118 repo_index = cls._cache.set_or_get(index_uri, repo_index) 

119 

120 return repo_index 

121 

122 @classmethod 

123 def _read_repository_index_from_environment(cls) -> dict[str, str]: 

124 """Look in environment for index location and read it. 

125 

126 Returns 

127 ------- 

128 repo_index : `dict` [ `str` , `str` ] 

129 The index found in the environment. 

130 """ 

131 cls._most_recent_failure = "" 

132 try: 

133 index_uri = os.getenv(cls.index_env_var) 

134 direct_configuration = os.getenv(cls.repositories_env_var) 

135 

136 if index_uri and direct_configuration: 

137 raise RuntimeError( 

138 f"Only one of the environment variables {cls.repositories_env_var} and" 

139 f" {cls.index_env_var} should be set." 

140 ) 

141 

142 if direct_configuration: 

143 return cls._validate_configuration(yaml.safe_load(direct_configuration)) 

144 

145 if index_uri: 

146 return cls._read_repository_index(index_uri) 

147 

148 raise RuntimeError( 

149 "No repository index defined. Neither of the environment variables" 

150 f" {cls.repositories_env_var} or {cls.index_env_var} was set." 

151 ) 

152 except Exception as e: 

153 cls._most_recent_failure = str(e) 

154 raise 

155 

156 @classmethod 

157 def get_known_repos(cls) -> set[str]: 

158 """Retrieve the list of known repository labels. 

159 

160 Returns 

161 ------- 

162 repos : `set` of `str` 

163 All the known labels. Can be empty if no index can be found. 

164 """ 

165 try: 

166 repo_index = cls._read_repository_index_from_environment() 

167 except Exception: 

168 return set() 

169 return set(repo_index) 

170 

171 @classmethod 

172 def get_failure_reason(cls) -> str: 

173 """Return possible reason for failure to return repository index. 

174 

175 Returns 

176 ------- 

177 reason : `str` 

178 If there is a problem reading the repository index, this will 

179 contain a string with an explanation. Empty string if everything 

180 worked. 

181 

182 Notes 

183 ----- 

184 The value returned is only reliable if called immediately after a 

185 failure. The most recent failure reason is reset every time an attempt 

186 is made to request a label and so the reason can be out of date. 

187 """ 

188 return cls._most_recent_failure 

189 

190 @classmethod 

191 def get_repo_uri(cls, label: str, return_label: bool = False) -> ResourcePath: 

192 """Look up the label in a butler repository index. 

193 

194 Parameters 

195 ---------- 

196 label : `str` 

197 Label of the Butler repository to look up. 

198 return_label : `bool`, optional 

199 If ``label`` cannot be found in the repository index (either 

200 because index is not defined or ``label`` is not in the index) and 

201 ``return_label`` is `True` then return ``ResourcePath(label)``. 

202 If ``return_label`` is `False` (default) then an exception will be 

203 raised instead. 

204 

205 Returns 

206 ------- 

207 uri : `lsst.resources.ResourcePath` 

208 URI to the Butler repository associated with the given label or 

209 default value if it is provided. 

210 

211 Raises 

212 ------ 

213 KeyError 

214 Raised if the label is not found in the index, or if an index 

215 is not defined, and ``return_label`` is `False`. 

216 FileNotFoundError 

217 Raised if an index is defined in the environment but it 

218 can not be found. 

219 """ 

220 try: 

221 repo_index = cls._read_repository_index_from_environment() 

222 except Exception: 

223 if return_label: 

224 return ResourcePath(label, forceAbsolute=False) 

225 raise 

226 

227 repo_uri = repo_index.get(label) 

228 if repo_uri is None: 

229 if return_label: 

230 return ResourcePath(label, forceAbsolute=False) 

231 raise KeyError(f"Label '{label}' not known to repository index") 

232 return ResourcePath(repo_uri) 

233 

234 @classmethod 

235 def _validate_configuration(cls, obj: Any) -> dict[str, str]: 

236 try: 

237 return TypeAdapter(dict[str, str]).validate_python(obj) 

238 except ValidationError as e: 

239 raise ValueError("Repository index not in expected format") from e