Coverage for python/lsst/daf/butler/core/_butlerUri/http.py : 16%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24import os
25import os.path
26import requests
27import tempfile
28import logging
29import functools
31__all__ = ('ButlerHttpURI', )
33from requests.adapters import HTTPAdapter
34from requests.packages.urllib3.util.retry import Retry
36from typing import (
37 TYPE_CHECKING,
38 Optional,
39 Tuple,
40 Union,
41)
43from .utils import NoTransaction
44from ._butlerUri import ButlerURI
46if TYPE_CHECKING: 46 ↛ 47line 46 didn't jump to line 47, because the condition on line 46 was never true
47 from ..datastore import DatastoreTransaction
49log = logging.getLogger(__name__)
51# Default timeout for all HTTP requests, in seconds
52TIMEOUT = 20
55def getHttpSession() -> requests.Session:
56 """Create a requests.Session pre-configured with environment variable data.
58 Returns
59 -------
60 session : `requests.Session`
61 An http session used to execute requests.
63 Notes
64 -----
65 The following environment variables must be set:
66 - LSST_BUTLER_WEBDAV_CA_BUNDLE: the directory where CA
67 certificates are stored if you intend to use HTTPS to
68 communicate with the endpoint.
69 - LSST_BUTLER_WEBDAV_AUTH: which authentication method to use.
70 Possible values are X509 and TOKEN
71 - (X509 only) LSST_BUTLER_WEBDAV_PROXY_CERT: path to proxy
72 certificate used to authenticate requests
73 - (TOKEN only) LSST_BUTLER_WEBDAV_TOKEN_FILE: file which
74 contains the bearer token used to authenticate requests
75 - (OPTIONAL) LSST_BUTLER_WEBDAV_EXPECT100: if set, we will add an
76 "Expect: 100-Continue" header in all requests. This is required
77 on certain endpoints where requests redirection is made.
78 """
79 retries = Retry(total=3, backoff_factor=1, status_forcelist=[429, 500, 502, 503, 504])
81 session = requests.Session()
82 session.mount("http://", HTTPAdapter(max_retries=retries))
83 session.mount("https://", HTTPAdapter(max_retries=retries))
85 log.debug("Creating new HTTP session...")
87 ca_bundle = None
88 try:
89 ca_bundle = os.environ['LSST_BUTLER_WEBDAV_CA_BUNDLE']
90 except KeyError:
91 log.debug("Environment variable LSST_BUTLER_WEBDAV_CA_BUNDLE is not set: "
92 "If you would like to trust additional CAs, please consider "
93 "exporting this variable.")
94 session.verify = ca_bundle
96 try:
97 env_auth_method = os.environ['LSST_BUTLER_WEBDAV_AUTH']
98 except KeyError:
99 log.debug("Environment variable LSST_BUTLER_WEBDAV_AUTH is not set, "
100 "no authentication configured.")
101 log.debug("Unauthenticated session configured and ready.")
102 return session
104 if env_auth_method == "X509":
105 log.debug("... using x509 authentication.")
106 try:
107 proxy_cert = os.environ['LSST_BUTLER_WEBDAV_PROXY_CERT']
108 except KeyError:
109 raise KeyError("Environment variable LSST_BUTLER_WEBDAV_PROXY_CERT is not set")
110 session.cert = (proxy_cert, proxy_cert)
111 elif env_auth_method == "TOKEN":
112 log.debug("... using bearer-token authentication.")
113 refreshToken(session)
114 else:
115 raise ValueError("Environment variable LSST_BUTLER_WEBDAV_AUTH must be set to X509 or TOKEN")
117 log.debug("Authenticated session configured and ready.")
118 return session
121def useExpect100() -> bool:
122 """Return the status of the "Expect-100" header.
124 Returns
125 -------
126 useExpect100 : `bool`
127 True if LSST_BUTLER_WEBDAV_EXPECT100 is set, False otherwise.
128 """
129 # This header is required for request redirection, in dCache for example
130 if "LSST_BUTLER_WEBDAV_EXPECT100" in os.environ:
131 log.debug("Expect: 100-Continue header enabled.")
132 return True
133 return False
136def isTokenAuth() -> bool:
137 """Return the status of bearer-token authentication.
139 Returns
140 -------
141 isTokenAuth : `bool`
142 True if LSST_BUTLER_WEBDAV_AUTH is set to TOKEN, False otherwise.
143 """
144 try:
145 env_auth_method = os.environ['LSST_BUTLER_WEBDAV_AUTH']
146 except KeyError:
147 raise KeyError("Environment variable LSST_BUTLER_WEBDAV_AUTH is not set, "
148 "please use values X509 or TOKEN")
150 if env_auth_method == "TOKEN":
151 return True
152 return False
155def refreshToken(session: requests.Session) -> None:
156 """Refresh the session token.
158 Set or update the 'Authorization' header of the session,
159 configure bearer token authentication, with the value fetched
160 from LSST_BUTLER_WEBDAV_TOKEN_FILE
162 Parameters
163 ----------
164 session : `requests.Session`
165 Session on which bearer token authentication must be configured.
166 """
167 try:
168 token_path = os.environ['LSST_BUTLER_WEBDAV_TOKEN_FILE']
169 if not os.path.isfile(token_path):
170 raise FileNotFoundError(f"No token file: {token_path}")
171 with open(os.environ['LSST_BUTLER_WEBDAV_TOKEN_FILE'], "r") as fh:
172 bearer_token = fh.read().replace('\n', '')
173 except KeyError:
174 raise KeyError("Environment variable LSST_BUTLER_WEBDAV_TOKEN_FILE is not set")
176 session.headers.update({'Authorization': 'Bearer ' + bearer_token})
179@functools.lru_cache
180def isWebdavEndpoint(path: Union[ButlerURI, str]) -> bool:
181 """Check whether the remote HTTP endpoint implements Webdav features.
183 Parameters
184 ----------
185 path : `ButlerURI` or `str`
186 URL to the resource to be checked.
187 Should preferably refer to the root since the status is shared
188 by all paths in that server.
190 Returns
191 -------
192 isWebdav : `bool`
193 True if the endpoint implements Webdav, False if it doesn't.
194 """
195 ca_bundle = None
196 try:
197 ca_bundle = os.environ['LSST_BUTLER_WEBDAV_CA_BUNDLE']
198 except KeyError:
199 log.warning("Environment variable LSST_BUTLER_WEBDAV_CA_BUNDLE is not set: "
200 "some HTTPS requests will fail. If you intend to use HTTPS, please "
201 "export this variable.")
203 log.debug("Detecting HTTP endpoint type for '%s'...", path)
204 r = requests.options(str(path), verify=ca_bundle)
205 return True if 'DAV' in r.headers else False
208def finalurl(r: requests.Response) -> str:
209 """Calculate the final URL, including redirects.
211 Check whether the remote HTTP endpoint redirects to a different
212 endpoint, and return the final destination of the request.
213 This is needed when using PUT operations, to avoid starting
214 to send the data to the endpoint, before having to send it again once
215 the 307 redirect response is received, and thus wasting bandwidth.
217 Parameters
218 ----------
219 r : `requests.Response`
220 An HTTP response received when requesting the endpoint
222 Returns
223 -------
224 destination_url: `string`
225 The final destination to which requests must be sent.
226 """
227 destination_url = r.url
228 if r.status_code == 307:
229 destination_url = r.headers['Location']
230 log.debug("Request redirected to %s", destination_url)
231 return destination_url
234class ButlerHttpURI(ButlerURI):
235 """General HTTP(S) resource."""
237 _session = requests.Session()
238 _sessionInitialized = False
239 _is_webdav: Optional[bool] = None
241 @property
242 def session(self) -> requests.Session:
243 """Client object to address remote resource."""
244 if ButlerHttpURI._sessionInitialized:
245 if isTokenAuth():
246 refreshToken(ButlerHttpURI._session)
247 return ButlerHttpURI._session
249 s = getHttpSession()
250 ButlerHttpURI._session = s
251 ButlerHttpURI._sessionInitialized = True
252 return s
254 @property
255 def is_webdav_endpoint(self) -> bool:
256 """Check if the current endpoint implements WebDAV features.
258 This is stored per URI but cached by root so there is
259 only one check per hostname.
260 """
261 if self._is_webdav is not None:
262 return self._is_webdav
264 self._is_webdav = isWebdavEndpoint(self.root_uri())
265 return self._is_webdav
267 def exists(self) -> bool:
268 """Check that a remote HTTP resource exists."""
269 log.debug("Checking if resource exists: %s", self.geturl())
270 r = self.session.head(self.geturl(), timeout=TIMEOUT)
272 return True if r.status_code == 200 else False
274 def size(self) -> int:
275 """Return the size of the remote resource in bytes."""
276 if self.dirLike:
277 return 0
278 r = self.session.head(self.geturl(), timeout=TIMEOUT)
279 if r.status_code == 200:
280 return int(r.headers['Content-Length'])
281 else:
282 raise FileNotFoundError(f"Resource {self} does not exist")
284 def mkdir(self) -> None:
285 """Create the directory resource if it does not already exist."""
286 # Only available on WebDAV backends
287 if not self.is_webdav_endpoint:
288 raise NotImplementedError("Endpoint does not implement WebDAV functionality")
290 if not self.dirLike:
291 raise ValueError(f"Can not create a 'directory' for file-like URI {self}")
293 if not self.exists():
294 # We need to test the absence of the parent directory,
295 # but also if parent URL is different from self URL,
296 # otherwise we could be stuck in a recursive loop
297 # where self == parent
298 if not self.parent().exists() and self.parent().geturl() != self.geturl():
299 self.parent().mkdir()
300 log.debug("Creating new directory: %s", self.geturl())
301 r = self.session.request("MKCOL", self.geturl(), timeout=TIMEOUT)
302 if r.status_code != 201:
303 if r.status_code == 405:
304 log.debug("Can not create directory: %s may already exist: skipping.", self.geturl())
305 else:
306 raise ValueError(f"Can not create directory {self}, status code: {r.status_code}")
308 def remove(self) -> None:
309 """Remove the resource."""
310 log.debug("Removing resource: %s", self.geturl())
311 r = self.session.delete(self.geturl(), timeout=TIMEOUT)
312 if r.status_code not in [200, 202, 204]:
313 raise FileNotFoundError(f"Unable to delete resource {self}; status code: {r.status_code}")
315 def _as_local(self) -> Tuple[str, bool]:
316 """Download object over HTTP and place in temporary directory.
318 Returns
319 -------
320 path : `str`
321 Path to local temporary file.
322 temporary : `bool`
323 Always returns `True`. This is always a temporary file.
324 """
325 log.debug("Downloading remote resource as local file: %s", self.geturl())
326 r = self.session.get(self.geturl(), stream=True, timeout=TIMEOUT)
327 if r.status_code != 200:
328 raise FileNotFoundError(f"Unable to download resource {self}; status code: {r.status_code}")
329 with tempfile.NamedTemporaryFile(suffix=self.getExtension(), delete=False) as tmpFile:
330 for chunk in r.iter_content():
331 tmpFile.write(chunk)
332 return tmpFile.name, True
334 def read(self, size: int = -1) -> bytes:
335 """Open the resource and return the contents in bytes.
337 Parameters
338 ----------
339 size : `int`, optional
340 The number of bytes to read. Negative or omitted indicates
341 that all data should be read.
342 """
343 log.debug("Reading from remote resource: %s", self.geturl())
344 stream = True if size > 0 else False
345 r = self.session.get(self.geturl(), stream=stream, timeout=TIMEOUT)
346 if r.status_code != 200:
347 raise FileNotFoundError(f"Unable to read resource {self}; status code: {r.status_code}")
348 if not stream:
349 return r.content
350 else:
351 return next(r.iter_content(chunk_size=size))
353 def write(self, data: bytes, overwrite: bool = True) -> None:
354 """Write the supplied bytes to the new resource.
356 Parameters
357 ----------
358 data : `bytes`
359 The bytes to write to the resource. The entire contents of the
360 resource will be replaced.
361 overwrite : `bool`, optional
362 If `True` the resource will be overwritten if it exists. Otherwise
363 the write will fail.
364 """
365 log.debug("Writing to remote resource: %s", self.geturl())
366 if not overwrite:
367 if self.exists():
368 raise FileExistsError(f"Remote resource {self} exists and overwrite has been disabled")
369 dest_url = finalurl(self._emptyPut())
370 r = self.session.put(dest_url, data=data, timeout=TIMEOUT)
371 if r.status_code not in [201, 202, 204]:
372 raise ValueError(f"Can not write file {self}, status code: {r.status_code}")
374 def transfer_from(self, src: ButlerURI, transfer: str = "copy",
375 overwrite: bool = False,
376 transaction: Optional[Union[DatastoreTransaction, NoTransaction]] = None) -> None:
377 """Transfer the current resource to a Webdav repository.
379 Parameters
380 ----------
381 src : `ButlerURI`
382 Source URI.
383 transfer : `str`
384 Mode to use for transferring the resource. Supports the following
385 options: copy.
386 transaction : `DatastoreTransaction`, optional
387 Currently unused.
388 """
389 # Fail early to prevent delays if remote resources are requested
390 if transfer not in self.transferModes:
391 raise ValueError(f"Transfer mode {transfer} not supported by URI scheme {self.scheme}")
393 log.debug(f"Transferring {src} [exists: {src.exists()}] -> "
394 f"{self} [exists: {self.exists()}] (transfer={transfer})")
396 if self.exists():
397 raise FileExistsError(f"Destination path {self} already exists.")
399 if transfer == "auto":
400 transfer = self.transferDefault
402 if isinstance(src, type(self)):
403 # Only available on WebDAV backends
404 if not self.is_webdav_endpoint:
405 raise NotImplementedError("Endpoint does not implement WebDAV functionality")
407 if transfer == "move":
408 r = self.session.request("MOVE", src.geturl(),
409 headers={"Destination": self.geturl()},
410 timeout=TIMEOUT)
411 log.debug("Running move via MOVE HTTP request.")
412 else:
413 r = self.session.request("COPY", src.geturl(),
414 headers={"Destination": self.geturl()},
415 timeout=TIMEOUT)
416 log.debug("Running copy via COPY HTTP request.")
417 else:
418 # Use local file and upload it
419 with src.as_local() as local_uri:
420 with open(local_uri.ospath, "rb") as f:
421 dest_url = finalurl(self._emptyPut())
422 r = self.session.put(dest_url, data=f, timeout=TIMEOUT)
423 log.debug("Uploading URI %s to %s via local file", src, self)
425 if r.status_code not in [201, 202, 204]:
426 raise ValueError(f"Can not transfer file {self}, status code: {r.status_code}")
428 # This was an explicit move requested from a remote resource
429 # try to remove that resource
430 if transfer == "move":
431 # Transactions do not work here
432 src.remove()
434 def _emptyPut(self) -> requests.Response:
435 """Send an empty PUT request to current URL.
437 This is used to detect if redirection is enabled before sending actual
438 data.
440 Returns
441 -------
442 response : `requests.Response`
443 HTTP Response from the endpoint.
444 """
445 headers = {"Content-Length": "0"}
446 if useExpect100():
447 headers["Expect"] = "100-continue"
448 return self.session.put(self.geturl(), data=None, headers=headers,
449 allow_redirects=False, timeout=TIMEOUT)