Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24import os 

25import os.path 

26import requests 

27import tempfile 

28import logging 

29import functools 

30 

31__all__ = ('ButlerHttpURI', ) 

32 

33from requests.adapters import HTTPAdapter 

34from requests.packages.urllib3.util.retry import Retry 

35 

36from typing import ( 

37 TYPE_CHECKING, 

38 Optional, 

39 Tuple, 

40 Union, 

41) 

42 

43from .utils import NoTransaction 

44from ._butlerUri import ButlerURI 

45 

46if TYPE_CHECKING: 46 ↛ 47line 46 didn't jump to line 47, because the condition on line 46 was never true

47 from ..datastore import DatastoreTransaction 

48 

49log = logging.getLogger(__name__) 

50 

51# Default timeout for all HTTP requests, in seconds 

52TIMEOUT = 20 

53 

54 

55def getHttpSession() -> requests.Session: 

56 """Create a requests.Session pre-configured with environment variable data. 

57 

58 Returns 

59 ------- 

60 session : `requests.Session` 

61 An http session used to execute requests. 

62 

63 Notes 

64 ----- 

65 The following environment variables must be set: 

66 - LSST_BUTLER_WEBDAV_CA_BUNDLE: the directory where CA 

67 certificates are stored if you intend to use HTTPS to 

68 communicate with the endpoint. 

69 - LSST_BUTLER_WEBDAV_AUTH: which authentication method to use. 

70 Possible values are X509 and TOKEN 

71 - (X509 only) LSST_BUTLER_WEBDAV_PROXY_CERT: path to proxy 

72 certificate used to authenticate requests 

73 - (TOKEN only) LSST_BUTLER_WEBDAV_TOKEN_FILE: file which 

74 contains the bearer token used to authenticate requests 

75 - (OPTIONAL) LSST_BUTLER_WEBDAV_EXPECT100: if set, we will add an 

76 "Expect: 100-Continue" header in all requests. This is required 

77 on certain endpoints where requests redirection is made. 

78 """ 

79 retries = Retry(total=3, backoff_factor=1, status_forcelist=[429, 500, 502, 503, 504]) 

80 

81 session = requests.Session() 

82 session.mount("http://", HTTPAdapter(max_retries=retries)) 

83 session.mount("https://", HTTPAdapter(max_retries=retries)) 

84 

85 log.debug("Creating new HTTP session...") 

86 

87 ca_bundle = None 

88 try: 

89 ca_bundle = os.environ['LSST_BUTLER_WEBDAV_CA_BUNDLE'] 

90 except KeyError: 

91 log.debug("Environment variable LSST_BUTLER_WEBDAV_CA_BUNDLE is not set: " 

92 "If you would like to trust additional CAs, please consider " 

93 "exporting this variable.") 

94 session.verify = ca_bundle 

95 

96 try: 

97 env_auth_method = os.environ['LSST_BUTLER_WEBDAV_AUTH'] 

98 except KeyError: 

99 log.debug("Environment variable LSST_BUTLER_WEBDAV_AUTH is not set, " 

100 "no authentication configured.") 

101 log.debug("Unauthenticated session configured and ready.") 

102 return session 

103 

104 if env_auth_method == "X509": 

105 log.debug("... using x509 authentication.") 

106 try: 

107 proxy_cert = os.environ['LSST_BUTLER_WEBDAV_PROXY_CERT'] 

108 except KeyError: 

109 raise KeyError("Environment variable LSST_BUTLER_WEBDAV_PROXY_CERT is not set") 

110 session.cert = (proxy_cert, proxy_cert) 

111 elif env_auth_method == "TOKEN": 

112 log.debug("... using bearer-token authentication.") 

113 refreshToken(session) 

114 else: 

115 raise ValueError("Environment variable LSST_BUTLER_WEBDAV_AUTH must be set to X509 or TOKEN") 

116 

117 log.debug("Authenticated session configured and ready.") 

118 return session 

119 

120 

121def useExpect100() -> bool: 

122 """Return the status of the "Expect-100" header. 

123 

124 Returns 

125 ------- 

126 useExpect100 : `bool` 

127 True if LSST_BUTLER_WEBDAV_EXPECT100 is set, False otherwise. 

128 """ 

129 # This header is required for request redirection, in dCache for example 

130 if "LSST_BUTLER_WEBDAV_EXPECT100" in os.environ: 

131 log.debug("Expect: 100-Continue header enabled.") 

132 return True 

133 return False 

134 

135 

136def isTokenAuth() -> bool: 

137 """Return the status of bearer-token authentication. 

138 

139 Returns 

140 ------- 

141 isTokenAuth : `bool` 

142 True if LSST_BUTLER_WEBDAV_AUTH is set to TOKEN, False otherwise. 

143 """ 

144 try: 

145 env_auth_method = os.environ['LSST_BUTLER_WEBDAV_AUTH'] 

146 except KeyError: 

147 raise KeyError("Environment variable LSST_BUTLER_WEBDAV_AUTH is not set, " 

148 "please use values X509 or TOKEN") 

149 

150 if env_auth_method == "TOKEN": 

151 return True 

152 return False 

153 

154 

155def refreshToken(session: requests.Session) -> None: 

156 """Refresh the session token. 

157 

158 Set or update the 'Authorization' header of the session, 

159 configure bearer token authentication, with the value fetched 

160 from LSST_BUTLER_WEBDAV_TOKEN_FILE 

161 

162 Parameters 

163 ---------- 

164 session : `requests.Session` 

165 Session on which bearer token authentication must be configured. 

166 """ 

167 try: 

168 token_path = os.environ['LSST_BUTLER_WEBDAV_TOKEN_FILE'] 

169 if not os.path.isfile(token_path): 

170 raise FileNotFoundError(f"No token file: {token_path}") 

171 with open(os.environ['LSST_BUTLER_WEBDAV_TOKEN_FILE'], "r") as fh: 

172 bearer_token = fh.read().replace('\n', '') 

173 except KeyError: 

174 raise KeyError("Environment variable LSST_BUTLER_WEBDAV_TOKEN_FILE is not set") 

175 

176 session.headers.update({'Authorization': 'Bearer ' + bearer_token}) 

177 

178 

179@functools.lru_cache 

180def isWebdavEndpoint(path: Union[ButlerURI, str]) -> bool: 

181 """Check whether the remote HTTP endpoint implements Webdav features. 

182 

183 Parameters 

184 ---------- 

185 path : `ButlerURI` or `str` 

186 URL to the resource to be checked. 

187 Should preferably refer to the root since the status is shared 

188 by all paths in that server. 

189 

190 Returns 

191 ------- 

192 isWebdav : `bool` 

193 True if the endpoint implements Webdav, False if it doesn't. 

194 """ 

195 ca_bundle = None 

196 try: 

197 ca_bundle = os.environ['LSST_BUTLER_WEBDAV_CA_BUNDLE'] 

198 except KeyError: 

199 log.warning("Environment variable LSST_BUTLER_WEBDAV_CA_BUNDLE is not set: " 

200 "some HTTPS requests will fail. If you intend to use HTTPS, please " 

201 "export this variable.") 

202 

203 log.debug("Detecting HTTP endpoint type for '%s'...", path) 

204 r = requests.options(str(path), verify=ca_bundle) 

205 return True if 'DAV' in r.headers else False 

206 

207 

208def finalurl(r: requests.Response) -> str: 

209 """Calculate the final URL, including redirects. 

210 

211 Check whether the remote HTTP endpoint redirects to a different 

212 endpoint, and return the final destination of the request. 

213 This is needed when using PUT operations, to avoid starting 

214 to send the data to the endpoint, before having to send it again once 

215 the 307 redirect response is received, and thus wasting bandwidth. 

216 

217 Parameters 

218 ---------- 

219 r : `requests.Response` 

220 An HTTP response received when requesting the endpoint 

221 

222 Returns 

223 ------- 

224 destination_url: `string` 

225 The final destination to which requests must be sent. 

226 """ 

227 destination_url = r.url 

228 if r.status_code == 307: 

229 destination_url = r.headers['Location'] 

230 log.debug("Request redirected to %s", destination_url) 

231 return destination_url 

232 

233 

234class ButlerHttpURI(ButlerURI): 

235 """General HTTP(S) resource.""" 

236 

237 _session = requests.Session() 

238 _sessionInitialized = False 

239 _is_webdav: Optional[bool] = None 

240 

241 @property 

242 def session(self) -> requests.Session: 

243 """Client object to address remote resource.""" 

244 if ButlerHttpURI._sessionInitialized: 

245 if isTokenAuth(): 

246 refreshToken(ButlerHttpURI._session) 

247 return ButlerHttpURI._session 

248 

249 s = getHttpSession() 

250 ButlerHttpURI._session = s 

251 ButlerHttpURI._sessionInitialized = True 

252 return s 

253 

254 @property 

255 def is_webdav_endpoint(self) -> bool: 

256 """Check if the current endpoint implements WebDAV features. 

257 

258 This is stored per URI but cached by root so there is 

259 only one check per hostname. 

260 """ 

261 if self._is_webdav is not None: 

262 return self._is_webdav 

263 

264 self._is_webdav = isWebdavEndpoint(self.root_uri()) 

265 return self._is_webdav 

266 

267 def exists(self) -> bool: 

268 """Check that a remote HTTP resource exists.""" 

269 log.debug("Checking if resource exists: %s", self.geturl()) 

270 r = self.session.head(self.geturl(), timeout=TIMEOUT) 

271 

272 return True if r.status_code == 200 else False 

273 

274 def size(self) -> int: 

275 """Return the size of the remote resource in bytes.""" 

276 if self.dirLike: 

277 return 0 

278 r = self.session.head(self.geturl(), timeout=TIMEOUT) 

279 if r.status_code == 200: 

280 return int(r.headers['Content-Length']) 

281 else: 

282 raise FileNotFoundError(f"Resource {self} does not exist") 

283 

284 def mkdir(self) -> None: 

285 """Create the directory resource if it does not already exist.""" 

286 # Only available on WebDAV backends 

287 if not self.is_webdav_endpoint: 

288 raise NotImplementedError("Endpoint does not implement WebDAV functionality") 

289 

290 if not self.dirLike: 

291 raise ValueError(f"Can not create a 'directory' for file-like URI {self}") 

292 

293 if not self.exists(): 

294 # We need to test the absence of the parent directory, 

295 # but also if parent URL is different from self URL, 

296 # otherwise we could be stuck in a recursive loop 

297 # where self == parent 

298 if not self.parent().exists() and self.parent().geturl() != self.geturl(): 

299 self.parent().mkdir() 

300 log.debug("Creating new directory: %s", self.geturl()) 

301 r = self.session.request("MKCOL", self.geturl(), timeout=TIMEOUT) 

302 if r.status_code != 201: 

303 if r.status_code == 405: 

304 log.debug("Can not create directory: %s may already exist: skipping.", self.geturl()) 

305 else: 

306 raise ValueError(f"Can not create directory {self}, status code: {r.status_code}") 

307 

308 def remove(self) -> None: 

309 """Remove the resource.""" 

310 log.debug("Removing resource: %s", self.geturl()) 

311 r = self.session.delete(self.geturl(), timeout=TIMEOUT) 

312 if r.status_code not in [200, 202, 204]: 

313 raise FileNotFoundError(f"Unable to delete resource {self}; status code: {r.status_code}") 

314 

315 def _as_local(self) -> Tuple[str, bool]: 

316 """Download object over HTTP and place in temporary directory. 

317 

318 Returns 

319 ------- 

320 path : `str` 

321 Path to local temporary file. 

322 temporary : `bool` 

323 Always returns `True`. This is always a temporary file. 

324 """ 

325 log.debug("Downloading remote resource as local file: %s", self.geturl()) 

326 r = self.session.get(self.geturl(), stream=True, timeout=TIMEOUT) 

327 if r.status_code != 200: 

328 raise FileNotFoundError(f"Unable to download resource {self}; status code: {r.status_code}") 

329 with tempfile.NamedTemporaryFile(suffix=self.getExtension(), delete=False) as tmpFile: 

330 for chunk in r.iter_content(): 

331 tmpFile.write(chunk) 

332 return tmpFile.name, True 

333 

334 def read(self, size: int = -1) -> bytes: 

335 """Open the resource and return the contents in bytes. 

336 

337 Parameters 

338 ---------- 

339 size : `int`, optional 

340 The number of bytes to read. Negative or omitted indicates 

341 that all data should be read. 

342 """ 

343 log.debug("Reading from remote resource: %s", self.geturl()) 

344 stream = True if size > 0 else False 

345 r = self.session.get(self.geturl(), stream=stream, timeout=TIMEOUT) 

346 if r.status_code != 200: 

347 raise FileNotFoundError(f"Unable to read resource {self}; status code: {r.status_code}") 

348 if not stream: 

349 return r.content 

350 else: 

351 return next(r.iter_content(chunk_size=size)) 

352 

353 def write(self, data: bytes, overwrite: bool = True) -> None: 

354 """Write the supplied bytes to the new resource. 

355 

356 Parameters 

357 ---------- 

358 data : `bytes` 

359 The bytes to write to the resource. The entire contents of the 

360 resource will be replaced. 

361 overwrite : `bool`, optional 

362 If `True` the resource will be overwritten if it exists. Otherwise 

363 the write will fail. 

364 """ 

365 log.debug("Writing to remote resource: %s", self.geturl()) 

366 if not overwrite: 

367 if self.exists(): 

368 raise FileExistsError(f"Remote resource {self} exists and overwrite has been disabled") 

369 dest_url = finalurl(self._emptyPut()) 

370 r = self.session.put(dest_url, data=data, timeout=TIMEOUT) 

371 if r.status_code not in [201, 202, 204]: 

372 raise ValueError(f"Can not write file {self}, status code: {r.status_code}") 

373 

374 def transfer_from(self, src: ButlerURI, transfer: str = "copy", 

375 overwrite: bool = False, 

376 transaction: Optional[Union[DatastoreTransaction, NoTransaction]] = None) -> None: 

377 """Transfer the current resource to a Webdav repository. 

378 

379 Parameters 

380 ---------- 

381 src : `ButlerURI` 

382 Source URI. 

383 transfer : `str` 

384 Mode to use for transferring the resource. Supports the following 

385 options: copy. 

386 transaction : `DatastoreTransaction`, optional 

387 Currently unused. 

388 """ 

389 # Fail early to prevent delays if remote resources are requested 

390 if transfer not in self.transferModes: 

391 raise ValueError(f"Transfer mode {transfer} not supported by URI scheme {self.scheme}") 

392 

393 log.debug(f"Transferring {src} [exists: {src.exists()}] -> " 

394 f"{self} [exists: {self.exists()}] (transfer={transfer})") 

395 

396 if self.exists(): 

397 raise FileExistsError(f"Destination path {self} already exists.") 

398 

399 if transfer == "auto": 

400 transfer = self.transferDefault 

401 

402 if isinstance(src, type(self)): 

403 # Only available on WebDAV backends 

404 if not self.is_webdav_endpoint: 

405 raise NotImplementedError("Endpoint does not implement WebDAV functionality") 

406 

407 if transfer == "move": 

408 r = self.session.request("MOVE", src.geturl(), 

409 headers={"Destination": self.geturl()}, 

410 timeout=TIMEOUT) 

411 log.debug("Running move via MOVE HTTP request.") 

412 else: 

413 r = self.session.request("COPY", src.geturl(), 

414 headers={"Destination": self.geturl()}, 

415 timeout=TIMEOUT) 

416 log.debug("Running copy via COPY HTTP request.") 

417 else: 

418 # Use local file and upload it 

419 with src.as_local() as local_uri: 

420 with open(local_uri.ospath, "rb") as f: 

421 dest_url = finalurl(self._emptyPut()) 

422 r = self.session.put(dest_url, data=f, timeout=TIMEOUT) 

423 log.debug("Uploading URI %s to %s via local file", src, self) 

424 

425 if r.status_code not in [201, 202, 204]: 

426 raise ValueError(f"Can not transfer file {self}, status code: {r.status_code}") 

427 

428 # This was an explicit move requested from a remote resource 

429 # try to remove that resource 

430 if transfer == "move": 

431 # Transactions do not work here 

432 src.remove() 

433 

434 def _emptyPut(self) -> requests.Response: 

435 """Send an empty PUT request to current URL. 

436 

437 This is used to detect if redirection is enabled before sending actual 

438 data. 

439 

440 Returns 

441 ------- 

442 response : `requests.Response` 

443 HTTP Response from the endpoint. 

444 """ 

445 headers = {"Content-Length": "0"} 

446 if useExpect100(): 

447 headers["Expect"] = "100-continue" 

448 return self.session.put(self.geturl(), data=None, headers=headers, 

449 allow_redirects=False, timeout=TIMEOUT)