Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("getHttpSession", "isWebdavEndpoint", "webdavCheckFileExists", 

25 "folderExists", "webdavDeleteFile", "refreshToken", 

26 "finalurl") 

27 

28import os 

29import requests 

30import logging 

31from requests.adapters import HTTPAdapter 

32from requests.packages.urllib3.util.retry import Retry 

33 

34from typing import ( 

35 Optional, 

36 Tuple, 

37 Union, 

38) 

39 

40from .location import ButlerURI, Location 

41 

42log = logging.getLogger(__name__) 

43 

44 

45def getHttpSession() -> requests.Session: 

46 """Create a requests.Session pre-configured with environment variable data 

47 

48 Returns 

49 ------- 

50 session : `requests.Session` 

51 An http session used to execute requests. 

52 

53 Notes 

54 ----- 

55 The following environment variables must be set: 

56 - LSST_BUTLER_WEBDAV_CA_BUNDLE: the directory where CA 

57 certificates are stored if you intend to use HTTPS to 

58 communicate with the endpoint. 

59 - LSST_BUTLER_WEBDAV_AUTH: which authentication method to use. 

60 Possible values are X509 and TOKEN 

61 - (X509 only) LSST_BUTLER_WEBDAV_PROXY_CERT: path to proxy 

62 certificate used to authenticate requests 

63 - (TOKEN only) LSST_BUTLER_WEBDAV_TOKEN_FILE: file which 

64 contains the bearer token used to authenticate requests 

65 - (OPTIONAL) LSST_BUTLER_WEBDAV_EXPECT100: if set, we will add an 

66 "Expect: 100-Continue" header in all requests. This is required 

67 on certain endpoints where requests redirection is made. 

68 """ 

69 

70 retries = Retry(total=3, backoff_factor=1, status_forcelist=[429, 500, 502, 503, 504]) 

71 

72 session = requests.Session() 

73 session.mount("http://", HTTPAdapter(max_retries=retries)) 

74 session.mount("https://", HTTPAdapter(max_retries=retries)) 

75 

76 log.debug("Creating new HTTP session...") 

77 

78 try: 

79 env_auth_method = os.environ['LSST_BUTLER_WEBDAV_AUTH'] 

80 except KeyError: 

81 raise KeyError("Environment variable LSST_BUTLER_WEBDAV_AUTH is not set, " 

82 "please use values X509 or TOKEN") 

83 

84 if env_auth_method == "X509": 

85 log.debug("... using x509 authentication.") 

86 try: 

87 proxy_cert = os.environ['LSST_BUTLER_WEBDAV_PROXY_CERT'] 

88 except KeyError: 

89 raise KeyError("Environment variable LSST_BUTLER_WEBDAV_PROXY_CERT is not set") 

90 session.cert = (proxy_cert, proxy_cert) 

91 elif env_auth_method == "TOKEN": 

92 log.debug("... using bearer-token authentication.") 

93 refreshToken(session) 

94 else: 

95 raise ValueError("Environment variable LSST_BUTLER_WEBDAV_AUTH must be set to X509 or TOKEN") 

96 

97 ca_bundle = None 

98 try: 

99 ca_bundle = os.environ['LSST_BUTLER_WEBDAV_CA_BUNDLE'] 

100 except KeyError: 

101 log.warning("Environment variable LSST_BUTLER_WEBDAV_CA_BUNDLE is not set: " 

102 "HTTPS requests will fail. If you intend to use HTTPS, please " 

103 "export this variable.") 

104 

105 session.verify = ca_bundle 

106 

107 # This header is required for request redirection, in dCache for example 

108 if "LSST_BUTLER_WEBDAV_EXPECT100" in os.environ: 

109 log.debug("Expect: 100-Continue header enabled.") 

110 session.headers.update({'Expect': '100-continue'}) 

111 

112 log.debug("Session configured and ready.") 

113 

114 return session 

115 

116 

117def isTokenAuth() -> bool: 

118 """Returns the status of bearer-token authentication. 

119 

120 Returns 

121 ------- 

122 isTokenAuth : `bool` 

123 True if LSST_BUTLER_WEBDAV_AUTH is set to TOKEN, False otherwise. 

124 """ 

125 try: 

126 env_auth_method = os.environ['LSST_BUTLER_WEBDAV_AUTH'] 

127 except KeyError: 

128 raise KeyError("Environment variable LSST_BUTLER_WEBDAV_AUTH is not set, " 

129 "please use values X509 or TOKEN") 

130 

131 if env_auth_method == "TOKEN": 

132 return True 

133 return False 

134 

135 

136def refreshToken(session: requests.Session) -> None: 

137 """Set or update the 'Authorization' header of the session, 

138 configure bearer token authentication, with the value fetched 

139 from LSST_BUTLER_WEBDAV_TOKEN_FILE 

140 

141 Parameters 

142 ---------- 

143 session : `requests.Session` 

144 Session on which bearer token authentication must be configured 

145 """ 

146 try: 

147 token_path = os.environ['LSST_BUTLER_WEBDAV_TOKEN_FILE'] 

148 if not os.path.isfile(token_path): 

149 raise FileNotFoundError(f"No token file: {token_path}") 

150 bearer_token = open(os.environ['LSST_BUTLER_WEBDAV_TOKEN_FILE'], 'r').read().replace('\n', '') 

151 except KeyError: 

152 raise KeyError("Environment variable LSST_BUTLER_WEBDAV_TOKEN_FILE is not set") 

153 

154 session.headers.update({'Authorization': 'Bearer ' + bearer_token}) 

155 

156 

157def webdavCheckFileExists(path: Union[Location, ButlerURI, str], 

158 session: Optional[requests.Session] = None) -> Tuple[bool, int]: 

159 """Check that a remote HTTP resource exists. 

160 

161 Parameters 

162 ---------- 

163 path : `Location`, `ButlerURI` or `str` 

164 Location or ButlerURI containing the bucket name and filepath. 

165 session : `requests.Session`, optional 

166 Session object to query. 

167 

168 Returns 

169 ------- 

170 exists : `bool` 

171 True if resource exists, False otherwise. 

172 size : `int` 

173 Size of the resource, if it exists, in bytes, otherwise -1 

174 """ 

175 if session is None: 

176 session = getHttpSession() 

177 

178 filepath = _getFileURL(path) 

179 

180 log.debug("Checking if file exists: %s", filepath) 

181 

182 r = session.head(filepath) 

183 return (True, int(r.headers['Content-Length'])) if r.status_code == 200 else (False, -1) 

184 

185 

186def webdavDeleteFile(path: Union[Location, ButlerURI, str], 

187 session: Optional[requests.Session] = None) -> None: 

188 """Remove a remote HTTP resource. 

189 Raises a FileNotFoundError if the resource does not exist or on failure. 

190 

191 Parameters 

192 ---------- 

193 path : `Location`, `ButlerURI` or `str` 

194 Location or ButlerURI containing the bucket name and filepath. 

195 session : `requests.Session`, optional 

196 Session object to query. 

197 """ 

198 if session is None: 

199 session = getHttpSession() 

200 

201 filepath = _getFileURL(path) 

202 

203 log.debug("Removing file: %s", filepath) 

204 r = session.delete(filepath) 

205 if r.status_code not in [200, 202, 204]: 

206 raise FileNotFoundError(f"Unable to delete resource {filepath}; status code: {r.status_code}") 

207 

208 

209def folderExists(path: Union[Location, ButlerURI, str], 

210 session: Optional[requests.Session] = None) -> bool: 

211 """Check if the Webdav repository at a given URL actually exists. 

212 

213 Parameters 

214 ---------- 

215 path : `Location`, `ButlerURI` or `str` 

216 Location or ButlerURI containing the bucket name and filepath. 

217 session : `requests.Session`, optional 

218 Session object to query. 

219 

220 Returns 

221 ------- 

222 exists : `bool` 

223 True if it exists, False if no folder is found. 

224 """ 

225 if session is None: 

226 session = getHttpSession() 

227 

228 filepath = _getFileURL(path) 

229 

230 log.debug("Checking if folder exists: %s", filepath) 

231 r = session.head(filepath) 

232 return True if r.status_code == 200 else False 

233 

234 

235def isWebdavEndpoint(path: Union[Location, ButlerURI, str]) -> bool: 

236 """Check whether the remote HTTP endpoint implements Webdav features. 

237 

238 Parameters 

239 ---------- 

240 path : `Location`, `ButlerURI` or `str` 

241 Location or ButlerURI containing the bucket name and filepath. 

242 

243 Returns 

244 ------- 

245 isWebdav : `bool` 

246 True if the endpoint implements Webdav, False if it doesn't. 

247 """ 

248 ca_bundle = None 

249 try: 

250 ca_bundle = os.environ['LSST_BUTLER_WEBDAV_CA_BUNDLE'] 

251 except KeyError: 

252 log.warning("Environment variable LSST_BUTLER_WEBDAV_CA_BUNDLE is not set: " 

253 "HTTPS requests will fail. If you intend to use HTTPS, please " 

254 "export this variable.") 

255 filepath = _getFileURL(path) 

256 

257 log.debug("Detecting HTTP endpoint type...") 

258 r = requests.options(filepath, verify=ca_bundle) 

259 return True if 'DAV' in r.headers else False 

260 

261 

262def finalurl(r: requests.Response) -> str: 

263 """Check whether the remote HTTP endpoint redirects to a different 

264 endpoint, and return the final destination of the request. 

265 This is needed when using PUT operations, to avoid starting 

266 to send the data to the endpoint, before having to send it again once 

267 the 307 redirect response is received, and thus wasting bandwidth. 

268 

269 Parameters 

270 ---------- 

271 r : `requests.Response` 

272 An HTTP response received when requesting the endpoint 

273 

274 Returns 

275 ------- 

276 destination_url: `string` 

277 The final destination to which requests must be sent. 

278 """ 

279 destination_url = r.url 

280 if r.status_code == 307: 

281 destination_url = r.headers['Location'] 

282 log.debug("Request redirected to %s", destination_url) 

283 return destination_url 

284 

285 

286def _getFileURL(path: Union[Location, ButlerURI, str]) -> str: 

287 """Returns the absolute URL of the resource as a string. 

288 

289 Parameters 

290 ---------- 

291 path : `Location`, `ButlerURI` or `str` 

292 Location or ButlerURI containing the bucket name and filepath. 

293 

294 Returns 

295 ------- 

296 filepath : `str` 

297 The fully qualified URL of the resource. 

298 """ 

299 if isinstance(path, Location): 

300 filepath = path.uri.geturl() 

301 else: 

302 filepath = ButlerURI(path).geturl() 

303 return filepath