Coverage for python/lsst/resources/http.py: 15%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of lsst-resources.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# Use of this source code is governed by a 3-clause BSD-style
10# license that can be found in the LICENSE file.
12from __future__ import annotations
14import functools
15import logging
16import os
17import os.path
18import tempfile
20import requests
22__all__ = ("HttpResourcePath",)
24from typing import TYPE_CHECKING, Optional, Tuple, Union
26from lsst.utils.timer import time_this
27from requests.adapters import HTTPAdapter
28from urllib3.util.retry import Retry
30from ._resourcePath import ResourcePath
32if TYPE_CHECKING: 32 ↛ 33line 32 didn't jump to line 33, because the condition on line 32 was never true
33 from .utils import TransactionProtocol
35log = logging.getLogger(__name__)
37# Default timeout for all HTTP requests, in seconds
38TIMEOUT = 20
41def getHttpSession() -> requests.Session:
42 """Create a requests.Session pre-configured with environment variable data.
44 Returns
45 -------
46 session : `requests.Session`
47 An http session used to execute requests.
49 Notes
50 -----
51 The following environment variables must be set:
52 - LSST_BUTLER_WEBDAV_CA_BUNDLE: the directory where CA
53 certificates are stored if you intend to use HTTPS to
54 communicate with the endpoint.
55 - LSST_BUTLER_WEBDAV_AUTH: which authentication method to use.
56 Possible values are X509 and TOKEN
57 - (X509 only) LSST_BUTLER_WEBDAV_PROXY_CERT: path to proxy
58 certificate used to authenticate requests
59 - (TOKEN only) LSST_BUTLER_WEBDAV_TOKEN_FILE: file which
60 contains the bearer token used to authenticate requests
61 - (OPTIONAL) LSST_BUTLER_WEBDAV_EXPECT100: if set, we will add an
62 "Expect: 100-Continue" header in all requests. This is required
63 on certain endpoints where requests redirection is made.
64 """
65 retries = Retry(total=3, backoff_factor=1, status_forcelist=[429, 500, 502, 503, 504])
67 session = requests.Session()
68 session.mount("http://", HTTPAdapter(max_retries=retries))
69 session.mount("https://", HTTPAdapter(max_retries=retries))
71 log.debug("Creating new HTTP session...")
73 ca_bundle = None
74 try:
75 ca_bundle = os.environ["LSST_BUTLER_WEBDAV_CA_BUNDLE"]
76 except KeyError:
77 log.debug(
78 "Environment variable LSST_BUTLER_WEBDAV_CA_BUNDLE is not set: "
79 "If you would like to trust additional CAs, please consider "
80 "exporting this variable."
81 )
82 session.verify = ca_bundle
84 try:
85 env_auth_method = os.environ["LSST_BUTLER_WEBDAV_AUTH"]
86 except KeyError:
87 log.debug("Environment variable LSST_BUTLER_WEBDAV_AUTH is not set, no authentication configured.")
88 log.debug("Unauthenticated session configured and ready.")
89 return session
91 if env_auth_method == "X509":
92 log.debug("... using x509 authentication.")
93 try:
94 proxy_cert = os.environ["LSST_BUTLER_WEBDAV_PROXY_CERT"]
95 except KeyError:
96 raise KeyError("Environment variable LSST_BUTLER_WEBDAV_PROXY_CERT is not set")
97 session.cert = (proxy_cert, proxy_cert)
98 elif env_auth_method == "TOKEN":
99 log.debug("... using bearer-token authentication.")
100 refreshToken(session)
101 else:
102 raise ValueError("Environment variable LSST_BUTLER_WEBDAV_AUTH must be set to X509 or TOKEN")
104 log.debug("Authenticated session configured and ready.")
105 return session
108def useExpect100() -> bool:
109 """Return the status of the "Expect-100" header.
111 Returns
112 -------
113 useExpect100 : `bool`
114 True if LSST_BUTLER_WEBDAV_EXPECT100 is set, False otherwise.
115 """
116 # This header is required for request redirection, in dCache for example
117 if "LSST_BUTLER_WEBDAV_EXPECT100" in os.environ:
118 log.debug("Expect: 100-Continue header enabled.")
119 return True
120 return False
123def isTokenAuth() -> bool:
124 """Return the status of bearer-token authentication.
126 Returns
127 -------
128 isTokenAuth : `bool`
129 True if LSST_BUTLER_WEBDAV_AUTH is set to TOKEN, False otherwise.
130 """
131 try:
132 env_auth_method = os.environ["LSST_BUTLER_WEBDAV_AUTH"]
133 except KeyError:
134 raise KeyError(
135 "Environment variable LSST_BUTLER_WEBDAV_AUTH is not set, please use values X509 or TOKEN"
136 )
138 if env_auth_method == "TOKEN":
139 return True
140 return False
143def refreshToken(session: requests.Session) -> None:
144 """Refresh the session token.
146 Set or update the 'Authorization' header of the session,
147 configure bearer token authentication, with the value fetched
148 from LSST_BUTLER_WEBDAV_TOKEN_FILE
150 Parameters
151 ----------
152 session : `requests.Session`
153 Session on which bearer token authentication must be configured.
154 """
155 try:
156 token_path = os.environ["LSST_BUTLER_WEBDAV_TOKEN_FILE"]
157 if not os.path.isfile(token_path):
158 raise FileNotFoundError(f"No token file: {token_path}")
159 with open(os.environ["LSST_BUTLER_WEBDAV_TOKEN_FILE"], "r") as fh:
160 bearer_token = fh.read().replace("\n", "")
161 except KeyError:
162 raise KeyError("Environment variable LSST_BUTLER_WEBDAV_TOKEN_FILE is not set")
164 session.headers.update({"Authorization": "Bearer " + bearer_token})
167@functools.lru_cache
168def isWebdavEndpoint(path: Union[ResourcePath, str]) -> bool:
169 """Check whether the remote HTTP endpoint implements Webdav features.
171 Parameters
172 ----------
173 path : `ResourcePath` or `str`
174 URL to the resource to be checked.
175 Should preferably refer to the root since the status is shared
176 by all paths in that server.
178 Returns
179 -------
180 isWebdav : `bool`
181 True if the endpoint implements Webdav, False if it doesn't.
182 """
183 ca_bundle = None
184 try:
185 ca_bundle = os.environ["LSST_BUTLER_WEBDAV_CA_BUNDLE"]
186 except KeyError:
187 log.warning(
188 "Environment variable LSST_BUTLER_WEBDAV_CA_BUNDLE is not set: "
189 "some HTTPS requests will fail. If you intend to use HTTPS, please "
190 "export this variable."
191 )
193 log.debug("Detecting HTTP endpoint type for '%s'...", path)
194 r = requests.options(str(path), verify=ca_bundle)
195 return True if "DAV" in r.headers else False
198def finalurl(r: requests.Response) -> str:
199 """Calculate the final URL, including redirects.
201 Check whether the remote HTTP endpoint redirects to a different
202 endpoint, and return the final destination of the request.
203 This is needed when using PUT operations, to avoid starting
204 to send the data to the endpoint, before having to send it again once
205 the 307 redirect response is received, and thus wasting bandwidth.
207 Parameters
208 ----------
209 r : `requests.Response`
210 An HTTP response received when requesting the endpoint
212 Returns
213 -------
214 destination_url: `string`
215 The final destination to which requests must be sent.
216 """
217 destination_url = r.url
218 if r.status_code == 307:
219 destination_url = r.headers["Location"]
220 log.debug("Request redirected to %s", destination_url)
221 return destination_url
224# Tuple (path, block_size) pointing to the location of a local directory
225# to save temporary files and the block size of the underlying file system
226_TMPDIR: Optional[Tuple[str, int]] = None
229def _get_temp_dir() -> Tuple[str, int]:
230 """Return the temporary directory path and block size.
232 This function caches its results in _TMPDIR.
233 """
234 global _TMPDIR
235 if _TMPDIR:
236 return _TMPDIR
238 # Use the value of environment variables 'LSST_BUTLER_TMPDIR' or
239 # 'TMPDIR', if defined. Otherwise use current working directory
240 tmpdir = os.getcwd()
241 for dir in (os.getenv(v) for v in ("LSST_RESOURCES_TMPDIR", "TMPDIR")):
242 if dir and os.path.isdir(dir):
243 tmpdir = dir
244 break
246 # Compute the block size as 256 blocks of typical size
247 # (i.e. 4096 bytes) or 10 times the file system block size,
248 # whichever is higher. This is a reasonable compromise between
249 # using memory for buffering and the number of system calls
250 # issued to read from or write to temporary files
251 fsstats = os.statvfs(tmpdir)
252 return (_TMPDIR := (tmpdir, max(10 * fsstats.f_bsize, 256 * 4096)))
255class HttpResourcePath(ResourcePath):
256 """General HTTP(S) resource."""
258 _session = requests.Session()
259 _sessionInitialized = False
260 _is_webdav: Optional[bool] = None
262 @property
263 def session(self) -> requests.Session:
264 """Client object to address remote resource."""
265 cls = type(self)
266 if cls._sessionInitialized:
267 if isTokenAuth():
268 refreshToken(cls._session)
269 return cls._session
271 s = getHttpSession()
272 cls._session = s
273 cls._sessionInitialized = True
274 return s
276 @property
277 def is_webdav_endpoint(self) -> bool:
278 """Check if the current endpoint implements WebDAV features.
280 This is stored per URI but cached by root so there is
281 only one check per hostname.
282 """
283 if self._is_webdav is not None:
284 return self._is_webdav
286 self._is_webdav = isWebdavEndpoint(self.root_uri())
287 return self._is_webdav
289 def exists(self) -> bool:
290 """Check that a remote HTTP resource exists."""
291 log.debug("Checking if resource exists: %s", self.geturl())
292 r = self.session.head(self.geturl(), timeout=TIMEOUT)
294 return True if r.status_code == 200 else False
296 def size(self) -> int:
297 """Return the size of the remote resource in bytes."""
298 if self.dirLike:
299 return 0
300 r = self.session.head(self.geturl(), timeout=TIMEOUT)
301 if r.status_code == 200:
302 return int(r.headers["Content-Length"])
303 else:
304 raise FileNotFoundError(f"Resource {self} does not exist")
306 def mkdir(self) -> None:
307 """Create the directory resource if it does not already exist."""
308 # Only available on WebDAV backends
309 if not self.is_webdav_endpoint:
310 raise NotImplementedError("Endpoint does not implement WebDAV functionality")
312 if not self.dirLike:
313 raise ValueError(f"Can not create a 'directory' for file-like URI {self}")
315 if not self.exists():
316 # We need to test the absence of the parent directory,
317 # but also if parent URL is different from self URL,
318 # otherwise we could be stuck in a recursive loop
319 # where self == parent
320 if not self.parent().exists() and self.parent().geturl() != self.geturl():
321 self.parent().mkdir()
322 log.debug("Creating new directory: %s", self.geturl())
323 r = self.session.request("MKCOL", self.geturl(), timeout=TIMEOUT)
324 if r.status_code != 201:
325 if r.status_code == 405:
326 log.debug("Can not create directory: %s may already exist: skipping.", self.geturl())
327 else:
328 raise ValueError(f"Can not create directory {self}, status code: {r.status_code}")
330 def remove(self) -> None:
331 """Remove the resource."""
332 log.debug("Removing resource: %s", self.geturl())
333 r = self.session.delete(self.geturl(), timeout=TIMEOUT)
334 if r.status_code not in [200, 202, 204]:
335 raise FileNotFoundError(f"Unable to delete resource {self}; status code: {r.status_code}")
337 def _as_local(self) -> Tuple[str, bool]:
338 """Download object over HTTP and place in temporary directory.
340 Returns
341 -------
342 path : `str`
343 Path to local temporary file.
344 temporary : `bool`
345 Always returns `True`. This is always a temporary file.
346 """
347 r = self.session.get(self.geturl(), stream=True, timeout=TIMEOUT)
348 if r.status_code != 200:
349 raise FileNotFoundError(f"Unable to download resource {self}; status code: {r.status_code}")
351 tmpdir, buffering = _get_temp_dir()
352 with tempfile.NamedTemporaryFile(
353 suffix=self.getExtension(), buffering=buffering, dir=tmpdir, delete=False
354 ) as tmpFile:
355 with time_this(
356 log,
357 msg="Downloading %s [length=%s] to local file %s [chunk_size=%d]",
358 args=(self, r.headers.get("Content-Length"), tmpFile.name, buffering),
359 ):
360 for chunk in r.iter_content(chunk_size=buffering):
361 tmpFile.write(chunk)
362 return tmpFile.name, True
364 def read(self, size: int = -1) -> bytes:
365 """Open the resource and return the contents in bytes.
367 Parameters
368 ----------
369 size : `int`, optional
370 The number of bytes to read. Negative or omitted indicates
371 that all data should be read.
372 """
373 log.debug("Reading from remote resource: %s", self.geturl())
374 stream = True if size > 0 else False
375 with time_this(log, msg="Read from remote resource %s", args=(self,)):
376 r = self.session.get(self.geturl(), stream=stream, timeout=TIMEOUT)
377 if r.status_code != 200:
378 raise FileNotFoundError(f"Unable to read resource {self}; status code: {r.status_code}")
379 if not stream:
380 return r.content
381 else:
382 return next(r.iter_content(chunk_size=size))
384 def write(self, data: bytes, overwrite: bool = True) -> None:
385 """Write the supplied bytes to the new resource.
387 Parameters
388 ----------
389 data : `bytes`
390 The bytes to write to the resource. The entire contents of the
391 resource will be replaced.
392 overwrite : `bool`, optional
393 If `True` the resource will be overwritten if it exists. Otherwise
394 the write will fail.
395 """
396 log.debug("Writing to remote resource: %s", self.geturl())
397 if not overwrite:
398 if self.exists():
399 raise FileExistsError(f"Remote resource {self} exists and overwrite has been disabled")
400 dest_url = finalurl(self._emptyPut())
401 with time_this(log, msg="Write data to remote %s", args=(self,)):
402 r = self.session.put(dest_url, data=data, timeout=TIMEOUT)
403 if r.status_code not in [201, 202, 204]:
404 raise ValueError(f"Can not write file {self}, status code: {r.status_code}")
406 def transfer_from(
407 self,
408 src: ResourcePath,
409 transfer: str = "copy",
410 overwrite: bool = False,
411 transaction: Optional[TransactionProtocol] = None,
412 ) -> None:
413 """Transfer the current resource to a Webdav repository.
415 Parameters
416 ----------
417 src : `ResourcePath`
418 Source URI.
419 transfer : `str`
420 Mode to use for transferring the resource. Supports the following
421 options: copy.
422 transaction : `~lsst.resources.utils.TransactionProtocol`, optional
423 Currently unused.
424 """
425 # Fail early to prevent delays if remote resources are requested
426 if transfer not in self.transferModes:
427 raise ValueError(f"Transfer mode {transfer} not supported by URI scheme {self.scheme}")
429 # Existence checks cost time so do not call this unless we know
430 # that debugging is enabled.
431 if log.isEnabledFor(logging.DEBUG):
432 log.debug(
433 "Transferring %s [exists: %s] -> %s [exists: %s] (transfer=%s)",
434 src,
435 src.exists(),
436 self,
437 self.exists(),
438 transfer,
439 )
441 if self.exists():
442 raise FileExistsError(f"Destination path {self} already exists.")
444 if transfer == "auto":
445 transfer = self.transferDefault
447 if isinstance(src, type(self)):
448 # Only available on WebDAV backends
449 if not self.is_webdav_endpoint:
450 raise NotImplementedError("Endpoint does not implement WebDAV functionality")
452 with time_this(log, msg="Transfer from %s to %s directly", args=(src, self)):
453 if transfer == "move":
454 r = self.session.request(
455 "MOVE", src.geturl(), headers={"Destination": self.geturl()}, timeout=TIMEOUT
456 )
457 log.debug("Running move via MOVE HTTP request.")
458 else:
459 r = self.session.request(
460 "COPY", src.geturl(), headers={"Destination": self.geturl()}, timeout=TIMEOUT
461 )
462 log.debug("Running copy via COPY HTTP request.")
463 else:
464 # Use local file and upload it
465 with src.as_local() as local_uri:
466 with open(local_uri.ospath, "rb") as f:
467 dest_url = finalurl(self._emptyPut())
468 with time_this(log, msg="Transfer from %s to %s via local file", args=(src, self)):
469 r = self.session.put(dest_url, data=f, timeout=TIMEOUT)
471 if r.status_code not in [201, 202, 204]:
472 raise ValueError(f"Can not transfer file {self}, status code: {r.status_code}")
474 # This was an explicit move requested from a remote resource
475 # try to remove that resource
476 if transfer == "move":
477 # Transactions do not work here
478 src.remove()
480 def _emptyPut(self) -> requests.Response:
481 """Send an empty PUT request to current URL.
483 This is used to detect if redirection is enabled before sending actual
484 data.
486 Returns
487 -------
488 response : `requests.Response`
489 HTTP Response from the endpoint.
490 """
491 headers = {"Content-Length": "0"}
492 if useExpect100():
493 headers["Expect"] = "100-continue"
494 return self.session.put(
495 self.geturl(), data=None, headers=headers, allow_redirects=False, timeout=TIMEOUT
496 )