Coverage for python/lsst/resources/http.py: 16%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of lsst-resources.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# Use of this source code is governed by a 3-clause BSD-style
10# license that can be found in the LICENSE file.
12from __future__ import annotations
14import functools
15import logging
16import os
17import os.path
18import tempfile
20import requests
22__all__ = ("HttpResourcePath",)
24from typing import TYPE_CHECKING, Optional, Tuple, Union
26from lsst.utils.timer import time_this
27from requests.adapters import HTTPAdapter
28from requests.packages.urllib3.util.retry import Retry
30from ._resourcePath import ResourcePath
32if TYPE_CHECKING: 32 ↛ 33line 32 didn't jump to line 33, because the condition on line 32 was never true
33 from .utils import TransactionProtocol
35log = logging.getLogger(__name__)
37# Default timeout for all HTTP requests, in seconds
38TIMEOUT = 20
41def getHttpSession() -> requests.Session:
42 """Create a requests.Session pre-configured with environment variable data.
44 Returns
45 -------
46 session : `requests.Session`
47 An http session used to execute requests.
49 Notes
50 -----
51 The following environment variables must be set:
52 - LSST_BUTLER_WEBDAV_CA_BUNDLE: the directory where CA
53 certificates are stored if you intend to use HTTPS to
54 communicate with the endpoint.
55 - LSST_BUTLER_WEBDAV_AUTH: which authentication method to use.
56 Possible values are X509 and TOKEN
57 - (X509 only) LSST_BUTLER_WEBDAV_PROXY_CERT: path to proxy
58 certificate used to authenticate requests
59 - (TOKEN only) LSST_BUTLER_WEBDAV_TOKEN_FILE: file which
60 contains the bearer token used to authenticate requests
61 - (OPTIONAL) LSST_BUTLER_WEBDAV_EXPECT100: if set, we will add an
62 "Expect: 100-Continue" header in all requests. This is required
63 on certain endpoints where requests redirection is made.
64 """
65 retries = Retry(total=3, backoff_factor=1, status_forcelist=[429, 500, 502, 503, 504])
67 session = requests.Session()
68 session.mount("http://", HTTPAdapter(max_retries=retries))
69 session.mount("https://", HTTPAdapter(max_retries=retries))
71 log.debug("Creating new HTTP session...")
73 ca_bundle = None
74 try:
75 ca_bundle = os.environ["LSST_BUTLER_WEBDAV_CA_BUNDLE"]
76 except KeyError:
77 log.debug(
78 "Environment variable LSST_BUTLER_WEBDAV_CA_BUNDLE is not set: "
79 "If you would like to trust additional CAs, please consider "
80 "exporting this variable."
81 )
82 session.verify = ca_bundle
84 try:
85 env_auth_method = os.environ["LSST_BUTLER_WEBDAV_AUTH"]
86 except KeyError:
87 log.debug("Environment variable LSST_BUTLER_WEBDAV_AUTH is not set, no authentication configured.")
88 log.debug("Unauthenticated session configured and ready.")
89 return session
91 if env_auth_method == "X509":
92 log.debug("... using x509 authentication.")
93 try:
94 proxy_cert = os.environ["LSST_BUTLER_WEBDAV_PROXY_CERT"]
95 except KeyError:
96 raise KeyError("Environment variable LSST_BUTLER_WEBDAV_PROXY_CERT is not set")
97 session.cert = (proxy_cert, proxy_cert)
98 elif env_auth_method == "TOKEN":
99 log.debug("... using bearer-token authentication.")
100 refreshToken(session)
101 else:
102 raise ValueError("Environment variable LSST_BUTLER_WEBDAV_AUTH must be set to X509 or TOKEN")
104 log.debug("Authenticated session configured and ready.")
105 return session
108def useExpect100() -> bool:
109 """Return the status of the "Expect-100" header.
111 Returns
112 -------
113 useExpect100 : `bool`
114 True if LSST_BUTLER_WEBDAV_EXPECT100 is set, False otherwise.
115 """
116 # This header is required for request redirection, in dCache for example
117 if "LSST_BUTLER_WEBDAV_EXPECT100" in os.environ:
118 log.debug("Expect: 100-Continue header enabled.")
119 return True
120 return False
123def isTokenAuth() -> bool:
124 """Return the status of bearer-token authentication.
126 Returns
127 -------
128 isTokenAuth : `bool`
129 True if LSST_BUTLER_WEBDAV_AUTH is set to TOKEN, False otherwise.
130 """
131 try:
132 env_auth_method = os.environ["LSST_BUTLER_WEBDAV_AUTH"]
133 except KeyError:
134 raise KeyError(
135 "Environment variable LSST_BUTLER_WEBDAV_AUTH is not set, please use values X509 or TOKEN"
136 )
138 if env_auth_method == "TOKEN":
139 return True
140 return False
143def refreshToken(session: requests.Session) -> None:
144 """Refresh the session token.
146 Set or update the 'Authorization' header of the session,
147 configure bearer token authentication, with the value fetched
148 from LSST_BUTLER_WEBDAV_TOKEN_FILE
150 Parameters
151 ----------
152 session : `requests.Session`
153 Session on which bearer token authentication must be configured.
154 """
155 try:
156 token_path = os.environ["LSST_BUTLER_WEBDAV_TOKEN_FILE"]
157 if not os.path.isfile(token_path):
158 raise FileNotFoundError(f"No token file: {token_path}")
159 with open(os.environ["LSST_BUTLER_WEBDAV_TOKEN_FILE"], "r") as fh:
160 bearer_token = fh.read().replace("\n", "")
161 except KeyError:
162 raise KeyError("Environment variable LSST_BUTLER_WEBDAV_TOKEN_FILE is not set")
164 session.headers.update({"Authorization": "Bearer " + bearer_token})
167@functools.lru_cache
168def isWebdavEndpoint(path: Union[ResourcePath, str]) -> bool:
169 """Check whether the remote HTTP endpoint implements Webdav features.
171 Parameters
172 ----------
173 path : `ResourcePath` or `str`
174 URL to the resource to be checked.
175 Should preferably refer to the root since the status is shared
176 by all paths in that server.
178 Returns
179 -------
180 isWebdav : `bool`
181 True if the endpoint implements Webdav, False if it doesn't.
182 """
183 ca_bundle = None
184 try:
185 ca_bundle = os.environ["LSST_BUTLER_WEBDAV_CA_BUNDLE"]
186 except KeyError:
187 log.warning(
188 "Environment variable LSST_BUTLER_WEBDAV_CA_BUNDLE is not set: "
189 "some HTTPS requests will fail. If you intend to use HTTPS, please "
190 "export this variable."
191 )
193 log.debug("Detecting HTTP endpoint type for '%s'...", path)
194 r = requests.options(str(path), verify=ca_bundle)
195 return True if "DAV" in r.headers else False
198def finalurl(r: requests.Response) -> str:
199 """Calculate the final URL, including redirects.
201 Check whether the remote HTTP endpoint redirects to a different
202 endpoint, and return the final destination of the request.
203 This is needed when using PUT operations, to avoid starting
204 to send the data to the endpoint, before having to send it again once
205 the 307 redirect response is received, and thus wasting bandwidth.
207 Parameters
208 ----------
209 r : `requests.Response`
210 An HTTP response received when requesting the endpoint
212 Returns
213 -------
214 destination_url: `string`
215 The final destination to which requests must be sent.
216 """
217 destination_url = r.url
218 if r.status_code == 307:
219 destination_url = r.headers["Location"]
220 log.debug("Request redirected to %s", destination_url)
221 return destination_url
224class HttpResourcePath(ResourcePath):
225 """General HTTP(S) resource."""
227 _session = requests.Session()
228 _sessionInitialized = False
229 _is_webdav: Optional[bool] = None
231 @property
232 def session(self) -> requests.Session:
233 """Client object to address remote resource."""
234 cls = type(self)
235 if cls._sessionInitialized:
236 if isTokenAuth():
237 refreshToken(cls._session)
238 return cls._session
240 s = getHttpSession()
241 cls._session = s
242 cls._sessionInitialized = True
243 return s
245 @property
246 def is_webdav_endpoint(self) -> bool:
247 """Check if the current endpoint implements WebDAV features.
249 This is stored per URI but cached by root so there is
250 only one check per hostname.
251 """
252 if self._is_webdav is not None:
253 return self._is_webdav
255 self._is_webdav = isWebdavEndpoint(self.root_uri())
256 return self._is_webdav
258 def exists(self) -> bool:
259 """Check that a remote HTTP resource exists."""
260 log.debug("Checking if resource exists: %s", self.geturl())
261 r = self.session.head(self.geturl(), timeout=TIMEOUT)
263 return True if r.status_code == 200 else False
265 def size(self) -> int:
266 """Return the size of the remote resource in bytes."""
267 if self.dirLike:
268 return 0
269 r = self.session.head(self.geturl(), timeout=TIMEOUT)
270 if r.status_code == 200:
271 return int(r.headers["Content-Length"])
272 else:
273 raise FileNotFoundError(f"Resource {self} does not exist")
275 def mkdir(self) -> None:
276 """Create the directory resource if it does not already exist."""
277 # Only available on WebDAV backends
278 if not self.is_webdav_endpoint:
279 raise NotImplementedError("Endpoint does not implement WebDAV functionality")
281 if not self.dirLike:
282 raise ValueError(f"Can not create a 'directory' for file-like URI {self}")
284 if not self.exists():
285 # We need to test the absence of the parent directory,
286 # but also if parent URL is different from self URL,
287 # otherwise we could be stuck in a recursive loop
288 # where self == parent
289 if not self.parent().exists() and self.parent().geturl() != self.geturl():
290 self.parent().mkdir()
291 log.debug("Creating new directory: %s", self.geturl())
292 r = self.session.request("MKCOL", self.geturl(), timeout=TIMEOUT)
293 if r.status_code != 201:
294 if r.status_code == 405:
295 log.debug("Can not create directory: %s may already exist: skipping.", self.geturl())
296 else:
297 raise ValueError(f"Can not create directory {self}, status code: {r.status_code}")
299 def remove(self) -> None:
300 """Remove the resource."""
301 log.debug("Removing resource: %s", self.geturl())
302 r = self.session.delete(self.geturl(), timeout=TIMEOUT)
303 if r.status_code not in [200, 202, 204]:
304 raise FileNotFoundError(f"Unable to delete resource {self}; status code: {r.status_code}")
306 def _as_local(self) -> Tuple[str, bool]:
307 """Download object over HTTP and place in temporary directory.
309 Returns
310 -------
311 path : `str`
312 Path to local temporary file.
313 temporary : `bool`
314 Always returns `True`. This is always a temporary file.
315 """
316 log.debug("Downloading remote resource as local file: %s", self.geturl())
317 r = self.session.get(self.geturl(), stream=True, timeout=TIMEOUT)
318 if r.status_code != 200:
319 raise FileNotFoundError(f"Unable to download resource {self}; status code: {r.status_code}")
320 with tempfile.NamedTemporaryFile(suffix=self.getExtension(), delete=False) as tmpFile:
321 with time_this(log, msg="Downloading %s to local file", args=(self,)):
322 for chunk in r.iter_content():
323 tmpFile.write(chunk)
324 return tmpFile.name, True
326 def read(self, size: int = -1) -> bytes:
327 """Open the resource and return the contents in bytes.
329 Parameters
330 ----------
331 size : `int`, optional
332 The number of bytes to read. Negative or omitted indicates
333 that all data should be read.
334 """
335 log.debug("Reading from remote resource: %s", self.geturl())
336 stream = True if size > 0 else False
337 with time_this(log, msg="Read from remote resource %s", args=(self,)):
338 r = self.session.get(self.geturl(), stream=stream, timeout=TIMEOUT)
339 if r.status_code != 200:
340 raise FileNotFoundError(f"Unable to read resource {self}; status code: {r.status_code}")
341 if not stream:
342 return r.content
343 else:
344 return next(r.iter_content(chunk_size=size))
346 def write(self, data: bytes, overwrite: bool = True) -> None:
347 """Write the supplied bytes to the new resource.
349 Parameters
350 ----------
351 data : `bytes`
352 The bytes to write to the resource. The entire contents of the
353 resource will be replaced.
354 overwrite : `bool`, optional
355 If `True` the resource will be overwritten if it exists. Otherwise
356 the write will fail.
357 """
358 log.debug("Writing to remote resource: %s", self.geturl())
359 if not overwrite:
360 if self.exists():
361 raise FileExistsError(f"Remote resource {self} exists and overwrite has been disabled")
362 dest_url = finalurl(self._emptyPut())
363 with time_this(log, msg="Write data to remote %s", args=(self,)):
364 r = self.session.put(dest_url, data=data, timeout=TIMEOUT)
365 if r.status_code not in [201, 202, 204]:
366 raise ValueError(f"Can not write file {self}, status code: {r.status_code}")
368 def transfer_from(
369 self,
370 src: ResourcePath,
371 transfer: str = "copy",
372 overwrite: bool = False,
373 transaction: Optional[TransactionProtocol] = None,
374 ) -> None:
375 """Transfer the current resource to a Webdav repository.
377 Parameters
378 ----------
379 src : `ResourcePath`
380 Source URI.
381 transfer : `str`
382 Mode to use for transferring the resource. Supports the following
383 options: copy.
384 transaction : `~lsst.resources.utils.TransactionProtocol`, optional
385 Currently unused.
386 """
387 # Fail early to prevent delays if remote resources are requested
388 if transfer not in self.transferModes:
389 raise ValueError(f"Transfer mode {transfer} not supported by URI scheme {self.scheme}")
391 # Existence checks cost time so do not call this unless we know
392 # that debugging is enabled.
393 if log.isEnabledFor(logging.DEBUG):
394 log.debug(
395 "Transferring %s [exists: %s] -> %s [exists: %s] (transfer=%s)",
396 src,
397 src.exists(),
398 self,
399 self.exists(),
400 transfer,
401 )
403 if self.exists():
404 raise FileExistsError(f"Destination path {self} already exists.")
406 if transfer == "auto":
407 transfer = self.transferDefault
409 if isinstance(src, type(self)):
410 # Only available on WebDAV backends
411 if not self.is_webdav_endpoint:
412 raise NotImplementedError("Endpoint does not implement WebDAV functionality")
414 with time_this(log, msg="Transfer from %s to %s directly", args=(src, self)):
415 if transfer == "move":
416 r = self.session.request(
417 "MOVE", src.geturl(), headers={"Destination": self.geturl()}, timeout=TIMEOUT
418 )
419 log.debug("Running move via MOVE HTTP request.")
420 else:
421 r = self.session.request(
422 "COPY", src.geturl(), headers={"Destination": self.geturl()}, timeout=TIMEOUT
423 )
424 log.debug("Running copy via COPY HTTP request.")
425 else:
426 # Use local file and upload it
427 with src.as_local() as local_uri:
428 with open(local_uri.ospath, "rb") as f:
429 dest_url = finalurl(self._emptyPut())
430 with time_this(log, msg="Transfer from %s to %s via local file", args=(src, self)):
431 r = self.session.put(dest_url, data=f, timeout=TIMEOUT)
433 if r.status_code not in [201, 202, 204]:
434 raise ValueError(f"Can not transfer file {self}, status code: {r.status_code}")
436 # This was an explicit move requested from a remote resource
437 # try to remove that resource
438 if transfer == "move":
439 # Transactions do not work here
440 src.remove()
442 def _emptyPut(self) -> requests.Response:
443 """Send an empty PUT request to current URL.
445 This is used to detect if redirection is enabled before sending actual
446 data.
448 Returns
449 -------
450 response : `requests.Response`
451 HTTP Response from the endpoint.
452 """
453 headers = {"Content-Length": "0"}
454 if useExpect100():
455 headers["Expect"] = "100-continue"
456 return self.session.put(
457 self.geturl(), data=None, headers=headers, allow_redirects=False, timeout=TIMEOUT
458 )