Coverage for python/lsst/daf/butler/_butlerRepoIndex.py: 29%

73 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-15 09:13 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("ButlerRepoIndex",) 

25 

26import os 

27from typing import ClassVar 

28 

29from lsst.resources import ResourcePath 

30 

31from .core import Config 

32 

33 

34class ButlerRepoIndex: 

35 """Index of all known butler repositories. 

36 

37 The index of butler repositories is found by looking for a 

38 configuration file at the URI pointed at by the environment 

39 variable ``$DAF_BUTLER_REPOSITORY_INDEX``. The configuration file 

40 is a simple dictionary lookup of the form: 

41 

42 .. code-block:: yaml 

43 

44 label1: uri1 

45 label2: uri2 

46 

47 and can be in YAML or JSON format. The content of the file will be 

48 cached. 

49 """ 

50 

51 index_env_var: ClassVar[str] = "DAF_BUTLER_REPOSITORY_INDEX" 

52 """The name of the environment variable to read to locate the index.""" 

53 

54 _cache: ClassVar[dict[ResourcePath, Config]] = {} 

55 """Cache of indexes. In most scenarios only one index will be found 

56 and the environment will not change. In tests this may not be true.""" 

57 

58 _most_recent_failure: ClassVar[str] = "" 

59 """Cache of the most recent failure when reading an index. Reset on 

60 every read.""" 

61 

62 @classmethod 

63 def _read_repository_index(cls, index_uri: ResourcePath) -> Config: 

64 """Read the repository index from the supplied URI. 

65 

66 Parameters 

67 ---------- 

68 index_uri : `lsst.resources.ResourcePath` 

69 URI of the repository index. 

70 

71 Returns 

72 ------- 

73 repo_index : `Config` 

74 The index found at this URI. 

75 

76 Raises 

77 ------ 

78 FileNotFoundError 

79 Raised if the URI does not exist. 

80 

81 Notes 

82 ----- 

83 Does check the cache before reading the file. 

84 """ 

85 if index_uri in cls._cache: 

86 return cls._cache[index_uri] 

87 

88 try: 

89 repo_index = Config(index_uri) 

90 except FileNotFoundError as e: 

91 # More explicit error message. 

92 raise FileNotFoundError(f"Butler repository index file not found at {index_uri}.") from e 

93 except Exception as e: 

94 raise RuntimeError( 

95 f"Butler repository index file at {index_uri} could not be read: {type(e).__qualname__} {e}" 

96 ) from e 

97 cls._cache[index_uri] = repo_index 

98 

99 return repo_index 

100 

101 @classmethod 

102 def _get_index_uri(cls) -> ResourcePath: 

103 """Find the URI to the repository index. 

104 

105 Returns 

106 ------- 

107 index_uri : `lsst.resources.ResourcePath` 

108 URI to the repository index. 

109 

110 Raises 

111 ------ 

112 KeyError 

113 Raised if the location of the index could not be determined. 

114 """ 

115 index_uri = os.environ.get(cls.index_env_var) 

116 if index_uri is None: 

117 raise KeyError(f"No repository index defined in environment variable {cls.index_env_var}") 

118 return ResourcePath(index_uri) 

119 

120 @classmethod 

121 def _read_repository_index_from_environment(cls) -> Config: 

122 """Look in environment for index location and read it. 

123 

124 Returns 

125 ------- 

126 repo_index : `Config` 

127 The index found in the environment. 

128 """ 

129 cls._most_recent_failure = "" 

130 try: 

131 index_uri = cls._get_index_uri() 

132 except KeyError as e: 

133 cls._most_recent_failure = str(e) 

134 raise 

135 try: 

136 repo_index = cls._read_repository_index(index_uri) 

137 except Exception as e: 

138 cls._most_recent_failure = str(e) 

139 raise 

140 return repo_index 

141 

142 @classmethod 

143 def get_known_repos(cls) -> set[str]: 

144 """Retrieve the list of known repository labels. 

145 

146 Returns 

147 ------- 

148 repos : `set` of `str` 

149 All the known labels. Can be empty if no index can be found. 

150 """ 

151 try: 

152 repo_index = cls._read_repository_index_from_environment() 

153 except Exception: 

154 return set() 

155 return set(repo_index) 

156 

157 @classmethod 

158 def get_failure_reason(cls) -> str: 

159 """Return possible reason for failure to return repository index. 

160 

161 Returns 

162 ------- 

163 reason : `str` 

164 If there is a problem reading the repository index, this will 

165 contain a string with an explanation. Empty string if everything 

166 worked. 

167 

168 Notes 

169 ----- 

170 The value returned is only reliable if called immediately after a 

171 failure. The most recent failure reason is reset every time an attempt 

172 is made to request a label and so the reason can be out of date. 

173 """ 

174 return cls._most_recent_failure 

175 

176 @classmethod 

177 def get_repo_uri(cls, label: str, return_label: bool = False) -> ResourcePath: 

178 """Look up the label in a butler repository index. 

179 

180 Parameters 

181 ---------- 

182 label : `str` 

183 Label of the Butler repository to look up. 

184 return_label : `bool`, optional 

185 If ``label`` cannot be found in the repository index (either 

186 because index is not defined or ``label`` is not in the index) and 

187 ``return_label`` is `True` then return ``ResourcePath(label)``. 

188 If ``return_label`` is `False` (default) then an exception will be 

189 raised instead. 

190 

191 Returns 

192 ------- 

193 uri : `lsst.resources.ResourcePath` 

194 URI to the Butler repository associated with the given label or 

195 default value if it is provided. 

196 

197 Raises 

198 ------ 

199 KeyError 

200 Raised if the label is not found in the index, or if an index 

201 is not defined, and ``return_label`` is `False`. 

202 FileNotFoundError 

203 Raised if an index is defined in the environment but it 

204 can not be found. 

205 """ 

206 try: 

207 repo_index = cls._read_repository_index_from_environment() 

208 except Exception: 

209 if return_label: 

210 return ResourcePath(label, forceAbsolute=False) 

211 raise 

212 

213 repo_uri = repo_index.get(label) 

214 if repo_uri is None: 

215 if return_label: 

216 return ResourcePath(label, forceAbsolute=False) 

217 # This should not raise since it worked earlier. 

218 try: 

219 index_uri = str(cls._get_index_uri()) 

220 except KeyError: 

221 index_uri = "<environment variable not defined>" 

222 raise KeyError(f"Label '{label}' not known to repository index at {index_uri}") 

223 return ResourcePath(repo_uri)