Coverage for python/lsst/daf/butler/_butler_repo_index.py: 38%

75 statements  

« prev     ^ index     » next       coverage.py v7.4.0, created at 2024-01-25 10:50 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30__all__ = ("ButlerRepoIndex",) 

31 

32import os 

33from typing import ClassVar 

34 

35from lsst.resources import ResourcePath 

36 

37from ._config import Config 

38from ._utilities.thread_safe_cache import ThreadSafeCache 

39 

40 

41class ButlerRepoIndex: 

42 """Index of all known butler repositories. 

43 

44 The index of butler repositories is found by looking for a 

45 configuration file at the URI pointed at by the environment 

46 variable ``$DAF_BUTLER_REPOSITORY_INDEX``. The configuration file 

47 is a simple dictionary lookup of the form: 

48 

49 .. code-block:: yaml 

50 

51 label1: uri1 

52 label2: uri2 

53 

54 and can be in YAML or JSON format. The content of the file will be 

55 cached. 

56 """ 

57 

58 index_env_var: ClassVar[str] = "DAF_BUTLER_REPOSITORY_INDEX" 

59 """The name of the environment variable to read to locate the index.""" 

60 

61 _cache: ClassVar[ThreadSafeCache[ResourcePath, Config]] = ThreadSafeCache() 

62 """Cache of indexes. In most scenarios only one index will be found 

63 and the environment will not change. In tests this may not be true.""" 

64 

65 _most_recent_failure: ClassVar[str] = "" 

66 """Cache of the most recent failure when reading an index. Reset on 

67 every read.""" 

68 

69 @classmethod 

70 def _read_repository_index(cls, index_uri: ResourcePath) -> Config: 

71 """Read the repository index from the supplied URI. 

72 

73 Parameters 

74 ---------- 

75 index_uri : `lsst.resources.ResourcePath` 

76 URI of the repository index. 

77 

78 Returns 

79 ------- 

80 repo_index : `Config` 

81 The index found at this URI. 

82 

83 Raises 

84 ------ 

85 FileNotFoundError 

86 Raised if the URI does not exist. 

87 

88 Notes 

89 ----- 

90 Does check the cache before reading the file. 

91 """ 

92 config = cls._cache.get(index_uri) 

93 if config is not None: 

94 return config 

95 

96 try: 

97 repo_index = Config(index_uri) 

98 except FileNotFoundError as e: 

99 # More explicit error message. 

100 raise FileNotFoundError(f"Butler repository index file not found at {index_uri}.") from e 

101 except Exception as e: 

102 raise RuntimeError( 

103 f"Butler repository index file at {index_uri} could not be read: {type(e).__qualname__} {e}" 

104 ) from e 

105 repo_index = cls._cache.set_or_get(index_uri, repo_index) 

106 

107 return repo_index 

108 

109 @classmethod 

110 def _get_index_uri(cls) -> ResourcePath: 

111 """Find the URI to the repository index. 

112 

113 Returns 

114 ------- 

115 index_uri : `lsst.resources.ResourcePath` 

116 URI to the repository index. 

117 

118 Raises 

119 ------ 

120 KeyError 

121 Raised if the location of the index could not be determined. 

122 """ 

123 index_uri = os.environ.get(cls.index_env_var) 

124 if index_uri is None: 

125 raise KeyError(f"No repository index defined in environment variable {cls.index_env_var}") 

126 return ResourcePath(index_uri) 

127 

128 @classmethod 

129 def _read_repository_index_from_environment(cls) -> Config: 

130 """Look in environment for index location and read it. 

131 

132 Returns 

133 ------- 

134 repo_index : `Config` 

135 The index found in the environment. 

136 """ 

137 cls._most_recent_failure = "" 

138 try: 

139 index_uri = cls._get_index_uri() 

140 except KeyError as e: 

141 cls._most_recent_failure = str(e) 

142 raise 

143 try: 

144 repo_index = cls._read_repository_index(index_uri) 

145 except Exception as e: 

146 cls._most_recent_failure = str(e) 

147 raise 

148 return repo_index 

149 

150 @classmethod 

151 def get_known_repos(cls) -> set[str]: 

152 """Retrieve the list of known repository labels. 

153 

154 Returns 

155 ------- 

156 repos : `set` of `str` 

157 All the known labels. Can be empty if no index can be found. 

158 """ 

159 try: 

160 repo_index = cls._read_repository_index_from_environment() 

161 except Exception: 

162 return set() 

163 return set(repo_index) 

164 

165 @classmethod 

166 def get_failure_reason(cls) -> str: 

167 """Return possible reason for failure to return repository index. 

168 

169 Returns 

170 ------- 

171 reason : `str` 

172 If there is a problem reading the repository index, this will 

173 contain a string with an explanation. Empty string if everything 

174 worked. 

175 

176 Notes 

177 ----- 

178 The value returned is only reliable if called immediately after a 

179 failure. The most recent failure reason is reset every time an attempt 

180 is made to request a label and so the reason can be out of date. 

181 """ 

182 return cls._most_recent_failure 

183 

184 @classmethod 

185 def get_repo_uri(cls, label: str, return_label: bool = False) -> ResourcePath: 

186 """Look up the label in a butler repository index. 

187 

188 Parameters 

189 ---------- 

190 label : `str` 

191 Label of the Butler repository to look up. 

192 return_label : `bool`, optional 

193 If ``label`` cannot be found in the repository index (either 

194 because index is not defined or ``label`` is not in the index) and 

195 ``return_label`` is `True` then return ``ResourcePath(label)``. 

196 If ``return_label`` is `False` (default) then an exception will be 

197 raised instead. 

198 

199 Returns 

200 ------- 

201 uri : `lsst.resources.ResourcePath` 

202 URI to the Butler repository associated with the given label or 

203 default value if it is provided. 

204 

205 Raises 

206 ------ 

207 KeyError 

208 Raised if the label is not found in the index, or if an index 

209 is not defined, and ``return_label`` is `False`. 

210 FileNotFoundError 

211 Raised if an index is defined in the environment but it 

212 can not be found. 

213 """ 

214 try: 

215 repo_index = cls._read_repository_index_from_environment() 

216 except Exception: 

217 if return_label: 

218 return ResourcePath(label, forceAbsolute=False) 

219 raise 

220 

221 repo_uri = repo_index.get(label) 

222 if repo_uri is None: 

223 if return_label: 

224 return ResourcePath(label, forceAbsolute=False) 

225 # This should not raise since it worked earlier. 

226 try: 

227 index_uri = str(cls._get_index_uri()) 

228 except KeyError: 

229 index_uri = "<environment variable not defined>" 

230 raise KeyError(f"Label '{label}' not known to repository index at {index_uri}") 

231 return ResourcePath(repo_uri)