Coverage for python/lsst/daf/butler/_butlerRepoIndex.py: 38%

73 statements  

« prev     ^ index     » next       coverage.py v7.3.1, created at 2023-10-02 08:00 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30__all__ = ("ButlerRepoIndex",) 

31 

32import os 

33from typing import ClassVar 

34 

35from lsst.resources import ResourcePath 

36 

37from .core import Config 

38 

39 

40class ButlerRepoIndex: 

41 """Index of all known butler repositories. 

42 

43 The index of butler repositories is found by looking for a 

44 configuration file at the URI pointed at by the environment 

45 variable ``$DAF_BUTLER_REPOSITORY_INDEX``. The configuration file 

46 is a simple dictionary lookup of the form: 

47 

48 .. code-block:: yaml 

49 

50 label1: uri1 

51 label2: uri2 

52 

53 and can be in YAML or JSON format. The content of the file will be 

54 cached. 

55 """ 

56 

57 index_env_var: ClassVar[str] = "DAF_BUTLER_REPOSITORY_INDEX" 

58 """The name of the environment variable to read to locate the index.""" 

59 

60 _cache: ClassVar[dict[ResourcePath, Config]] = {} 

61 """Cache of indexes. In most scenarios only one index will be found 

62 and the environment will not change. In tests this may not be true.""" 

63 

64 _most_recent_failure: ClassVar[str] = "" 

65 """Cache of the most recent failure when reading an index. Reset on 

66 every read.""" 

67 

68 @classmethod 

69 def _read_repository_index(cls, index_uri: ResourcePath) -> Config: 

70 """Read the repository index from the supplied URI. 

71 

72 Parameters 

73 ---------- 

74 index_uri : `lsst.resources.ResourcePath` 

75 URI of the repository index. 

76 

77 Returns 

78 ------- 

79 repo_index : `Config` 

80 The index found at this URI. 

81 

82 Raises 

83 ------ 

84 FileNotFoundError 

85 Raised if the URI does not exist. 

86 

87 Notes 

88 ----- 

89 Does check the cache before reading the file. 

90 """ 

91 if index_uri in cls._cache: 

92 return cls._cache[index_uri] 

93 

94 try: 

95 repo_index = Config(index_uri) 

96 except FileNotFoundError as e: 

97 # More explicit error message. 

98 raise FileNotFoundError(f"Butler repository index file not found at {index_uri}.") from e 

99 except Exception as e: 

100 raise RuntimeError( 

101 f"Butler repository index file at {index_uri} could not be read: {type(e).__qualname__} {e}" 

102 ) from e 

103 cls._cache[index_uri] = repo_index 

104 

105 return repo_index 

106 

107 @classmethod 

108 def _get_index_uri(cls) -> ResourcePath: 

109 """Find the URI to the repository index. 

110 

111 Returns 

112 ------- 

113 index_uri : `lsst.resources.ResourcePath` 

114 URI to the repository index. 

115 

116 Raises 

117 ------ 

118 KeyError 

119 Raised if the location of the index could not be determined. 

120 """ 

121 index_uri = os.environ.get(cls.index_env_var) 

122 if index_uri is None: 

123 raise KeyError(f"No repository index defined in environment variable {cls.index_env_var}") 

124 return ResourcePath(index_uri) 

125 

126 @classmethod 

127 def _read_repository_index_from_environment(cls) -> Config: 

128 """Look in environment for index location and read it. 

129 

130 Returns 

131 ------- 

132 repo_index : `Config` 

133 The index found in the environment. 

134 """ 

135 cls._most_recent_failure = "" 

136 try: 

137 index_uri = cls._get_index_uri() 

138 except KeyError as e: 

139 cls._most_recent_failure = str(e) 

140 raise 

141 try: 

142 repo_index = cls._read_repository_index(index_uri) 

143 except Exception as e: 

144 cls._most_recent_failure = str(e) 

145 raise 

146 return repo_index 

147 

148 @classmethod 

149 def get_known_repos(cls) -> set[str]: 

150 """Retrieve the list of known repository labels. 

151 

152 Returns 

153 ------- 

154 repos : `set` of `str` 

155 All the known labels. Can be empty if no index can be found. 

156 """ 

157 try: 

158 repo_index = cls._read_repository_index_from_environment() 

159 except Exception: 

160 return set() 

161 return set(repo_index) 

162 

163 @classmethod 

164 def get_failure_reason(cls) -> str: 

165 """Return possible reason for failure to return repository index. 

166 

167 Returns 

168 ------- 

169 reason : `str` 

170 If there is a problem reading the repository index, this will 

171 contain a string with an explanation. Empty string if everything 

172 worked. 

173 

174 Notes 

175 ----- 

176 The value returned is only reliable if called immediately after a 

177 failure. The most recent failure reason is reset every time an attempt 

178 is made to request a label and so the reason can be out of date. 

179 """ 

180 return cls._most_recent_failure 

181 

182 @classmethod 

183 def get_repo_uri(cls, label: str, return_label: bool = False) -> ResourcePath: 

184 """Look up the label in a butler repository index. 

185 

186 Parameters 

187 ---------- 

188 label : `str` 

189 Label of the Butler repository to look up. 

190 return_label : `bool`, optional 

191 If ``label`` cannot be found in the repository index (either 

192 because index is not defined or ``label`` is not in the index) and 

193 ``return_label`` is `True` then return ``ResourcePath(label)``. 

194 If ``return_label`` is `False` (default) then an exception will be 

195 raised instead. 

196 

197 Returns 

198 ------- 

199 uri : `lsst.resources.ResourcePath` 

200 URI to the Butler repository associated with the given label or 

201 default value if it is provided. 

202 

203 Raises 

204 ------ 

205 KeyError 

206 Raised if the label is not found in the index, or if an index 

207 is not defined, and ``return_label`` is `False`. 

208 FileNotFoundError 

209 Raised if an index is defined in the environment but it 

210 can not be found. 

211 """ 

212 try: 

213 repo_index = cls._read_repository_index_from_environment() 

214 except Exception: 

215 if return_label: 

216 return ResourcePath(label, forceAbsolute=False) 

217 raise 

218 

219 repo_uri = repo_index.get(label) 

220 if repo_uri is None: 

221 if return_label: 

222 return ResourcePath(label, forceAbsolute=False) 

223 # This should not raise since it worked earlier. 

224 try: 

225 index_uri = str(cls._get_index_uri()) 

226 except KeyError: 

227 index_uri = "<environment variable not defined>" 

228 raise KeyError(f"Label '{label}' not known to repository index at {index_uri}") 

229 return ResourcePath(repo_uri)