Coverage for python/lsst/daf/butler/core/_butlerUri/http.py : 14%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24import os
25import os.path
26import requests
27import tempfile
28import logging
30__all__ = ('ButlerHttpURI', )
32from requests.adapters import HTTPAdapter
33from requests.packages.urllib3.util.retry import Retry
35from typing import (
36 TYPE_CHECKING,
37 Optional,
38 Tuple,
39 Union,
40)
42from .utils import NoTransaction
43from ._butlerUri import ButlerURI
44from ..location import Location
46if TYPE_CHECKING: 46 ↛ 47line 46 didn't jump to line 47, because the condition on line 46 was never true
47 from ..datastore import DatastoreTransaction
49log = logging.getLogger(__name__)
52def getHttpSession() -> requests.Session:
53 """Create a requests.Session pre-configured with environment variable data
55 Returns
56 -------
57 session : `requests.Session`
58 An http session used to execute requests.
60 Notes
61 -----
62 The following environment variables must be set:
63 - LSST_BUTLER_WEBDAV_CA_BUNDLE: the directory where CA
64 certificates are stored if you intend to use HTTPS to
65 communicate with the endpoint.
66 - LSST_BUTLER_WEBDAV_AUTH: which authentication method to use.
67 Possible values are X509 and TOKEN
68 - (X509 only) LSST_BUTLER_WEBDAV_PROXY_CERT: path to proxy
69 certificate used to authenticate requests
70 - (TOKEN only) LSST_BUTLER_WEBDAV_TOKEN_FILE: file which
71 contains the bearer token used to authenticate requests
72 - (OPTIONAL) LSST_BUTLER_WEBDAV_EXPECT100: if set, we will add an
73 "Expect: 100-Continue" header in all requests. This is required
74 on certain endpoints where requests redirection is made.
75 """
77 retries = Retry(total=3, backoff_factor=1, status_forcelist=[429, 500, 502, 503, 504])
79 session = requests.Session()
80 session.mount("http://", HTTPAdapter(max_retries=retries))
81 session.mount("https://", HTTPAdapter(max_retries=retries))
83 log.debug("Creating new HTTP session...")
85 try:
86 env_auth_method = os.environ['LSST_BUTLER_WEBDAV_AUTH']
87 except KeyError:
88 raise KeyError("Environment variable LSST_BUTLER_WEBDAV_AUTH is not set, "
89 "please use values X509 or TOKEN")
91 if env_auth_method == "X509":
92 log.debug("... using x509 authentication.")
93 try:
94 proxy_cert = os.environ['LSST_BUTLER_WEBDAV_PROXY_CERT']
95 except KeyError:
96 raise KeyError("Environment variable LSST_BUTLER_WEBDAV_PROXY_CERT is not set")
97 session.cert = (proxy_cert, proxy_cert)
98 elif env_auth_method == "TOKEN":
99 log.debug("... using bearer-token authentication.")
100 refreshToken(session)
101 else:
102 raise ValueError("Environment variable LSST_BUTLER_WEBDAV_AUTH must be set to X509 or TOKEN")
104 ca_bundle = None
105 try:
106 ca_bundle = os.environ['LSST_BUTLER_WEBDAV_CA_BUNDLE']
107 except KeyError:
108 log.warning("Environment variable LSST_BUTLER_WEBDAV_CA_BUNDLE is not set: "
109 "HTTPS requests will fail. If you intend to use HTTPS, please "
110 "export this variable.")
112 session.verify = ca_bundle
114 # This header is required for request redirection, in dCache for example
115 if "LSST_BUTLER_WEBDAV_EXPECT100" in os.environ:
116 log.debug("Expect: 100-Continue header enabled.")
117 session.headers.update({'Expect': '100-continue'})
119 log.debug("Session configured and ready.")
121 return session
124def isTokenAuth() -> bool:
125 """Returns the status of bearer-token authentication.
127 Returns
128 -------
129 isTokenAuth : `bool`
130 True if LSST_BUTLER_WEBDAV_AUTH is set to TOKEN, False otherwise.
131 """
132 try:
133 env_auth_method = os.environ['LSST_BUTLER_WEBDAV_AUTH']
134 except KeyError:
135 raise KeyError("Environment variable LSST_BUTLER_WEBDAV_AUTH is not set, "
136 "please use values X509 or TOKEN")
138 if env_auth_method == "TOKEN":
139 return True
140 return False
143def refreshToken(session: requests.Session) -> None:
144 """Set or update the 'Authorization' header of the session,
145 configure bearer token authentication, with the value fetched
146 from LSST_BUTLER_WEBDAV_TOKEN_FILE
148 Parameters
149 ----------
150 session : `requests.Session`
151 Session on which bearer token authentication must be configured
152 """
153 try:
154 token_path = os.environ['LSST_BUTLER_WEBDAV_TOKEN_FILE']
155 if not os.path.isfile(token_path):
156 raise FileNotFoundError(f"No token file: {token_path}")
157 bearer_token = open(os.environ['LSST_BUTLER_WEBDAV_TOKEN_FILE'], 'r').read().replace('\n', '')
158 except KeyError:
159 raise KeyError("Environment variable LSST_BUTLER_WEBDAV_TOKEN_FILE is not set")
161 session.headers.update({'Authorization': 'Bearer ' + bearer_token})
164def webdavCheckFileExists(path: Union[Location, ButlerURI, str],
165 session: Optional[requests.Session] = None) -> Tuple[bool, int]:
166 """Check that a remote HTTP resource exists.
168 Parameters
169 ----------
170 path : `Location`, `ButlerURI` or `str`
171 Location or ButlerURI containing the bucket name and filepath.
172 session : `requests.Session`, optional
173 Session object to query.
175 Returns
176 -------
177 exists : `bool`
178 True if resource exists, False otherwise.
179 size : `int`
180 Size of the resource, if it exists, in bytes, otherwise -1
181 """
182 if session is None:
183 session = getHttpSession()
185 filepath = _getFileURL(path)
187 log.debug("Checking if file exists: %s", filepath)
189 r = session.head(filepath)
190 return (True, int(r.headers['Content-Length'])) if r.status_code == 200 else (False, -1)
193def webdavDeleteFile(path: Union[Location, ButlerURI, str],
194 session: Optional[requests.Session] = None) -> None:
195 """Remove a remote HTTP resource.
196 Raises a FileNotFoundError if the resource does not exist or on failure.
198 Parameters
199 ----------
200 path : `Location`, `ButlerURI` or `str`
201 Location or ButlerURI containing the bucket name and filepath.
202 session : `requests.Session`, optional
203 Session object to query.
204 """
205 if session is None:
206 session = getHttpSession()
208 filepath = _getFileURL(path)
210 log.debug("Removing file: %s", filepath)
211 r = session.delete(filepath)
212 if r.status_code not in [200, 202, 204]:
213 raise FileNotFoundError(f"Unable to delete resource {filepath}; status code: {r.status_code}")
216def folderExists(path: Union[Location, ButlerURI, str],
217 session: Optional[requests.Session] = None) -> bool:
218 """Check if the Webdav repository at a given URL actually exists.
220 Parameters
221 ----------
222 path : `Location`, `ButlerURI` or `str`
223 Location or ButlerURI containing the bucket name and filepath.
224 session : `requests.Session`, optional
225 Session object to query.
227 Returns
228 -------
229 exists : `bool`
230 True if it exists, False if no folder is found.
231 """
232 if session is None:
233 session = getHttpSession()
235 filepath = _getFileURL(path)
237 log.debug("Checking if folder exists: %s", filepath)
238 r = session.head(filepath)
239 return True if r.status_code == 200 else False
242def isWebdavEndpoint(path: Union[Location, ButlerURI, str]) -> bool:
243 """Check whether the remote HTTP endpoint implements Webdav features.
245 Parameters
246 ----------
247 path : `Location`, `ButlerURI` or `str`
248 Location or ButlerURI containing the bucket name and filepath.
250 Returns
251 -------
252 isWebdav : `bool`
253 True if the endpoint implements Webdav, False if it doesn't.
254 """
255 ca_bundle = None
256 try:
257 ca_bundle = os.environ['LSST_BUTLER_WEBDAV_CA_BUNDLE']
258 except KeyError:
259 log.warning("Environment variable LSST_BUTLER_WEBDAV_CA_BUNDLE is not set: "
260 "HTTPS requests will fail. If you intend to use HTTPS, please "
261 "export this variable.")
262 filepath = _getFileURL(path)
264 log.debug("Detecting HTTP endpoint type...")
265 r = requests.options(filepath, verify=ca_bundle)
266 return True if 'DAV' in r.headers else False
269def finalurl(r: requests.Response) -> str:
270 """Check whether the remote HTTP endpoint redirects to a different
271 endpoint, and return the final destination of the request.
272 This is needed when using PUT operations, to avoid starting
273 to send the data to the endpoint, before having to send it again once
274 the 307 redirect response is received, and thus wasting bandwidth.
276 Parameters
277 ----------
278 r : `requests.Response`
279 An HTTP response received when requesting the endpoint
281 Returns
282 -------
283 destination_url: `string`
284 The final destination to which requests must be sent.
285 """
286 destination_url = r.url
287 if r.status_code == 307:
288 destination_url = r.headers['Location']
289 log.debug("Request redirected to %s", destination_url)
290 return destination_url
293def _getFileURL(path: Union[Location, ButlerURI, str]) -> str:
294 """Returns the absolute URL of the resource as a string.
296 Parameters
297 ----------
298 path : `Location`, `ButlerURI` or `str`
299 Location or ButlerURI containing the bucket name and filepath.
301 Returns
302 -------
303 filepath : `str`
304 The fully qualified URL of the resource.
305 """
306 if isinstance(path, Location):
307 filepath = path.uri.geturl()
308 else:
309 filepath = ButlerURI(path).geturl()
310 return filepath
313class ButlerHttpURI(ButlerURI):
314 """General HTTP(S) resource."""
315 _session = requests.Session()
316 _sessionInitialized = False
318 @property
319 def session(self) -> requests.Session:
320 """Client object to address remote resource."""
321 if ButlerHttpURI._sessionInitialized:
322 if isTokenAuth():
323 refreshToken(ButlerHttpURI._session)
324 return ButlerHttpURI._session
326 baseURL = self.scheme + "://" + self.netloc
328 if isWebdavEndpoint(baseURL):
329 log.debug("%s looks like a Webdav endpoint.", baseURL)
330 s = getHttpSession()
332 ButlerHttpURI._session = s
333 ButlerHttpURI._sessionInitialized = True
334 return s
336 def exists(self) -> bool:
337 """Check that a remote HTTP resource exists."""
338 log.debug("Checking if resource exists: %s", self.geturl())
339 r = self.session.head(self.geturl())
341 return True if r.status_code == 200 else False
343 def size(self) -> int:
344 if self.dirLike:
345 return 0
346 r = self.session.head(self.geturl())
347 if r.status_code == 200:
348 return int(r.headers['Content-Length'])
349 else:
350 raise FileNotFoundError(f"Resource {self} does not exist")
352 def mkdir(self) -> None:
353 """For a dir-like URI, create the directory resource if it does not
354 already exist.
355 """
356 if not self.dirLike:
357 raise ValueError(f"Can not create a 'directory' for file-like URI {self}")
359 if not self.exists():
360 # We need to test the absence of the parent directory,
361 # but also if parent URL is different from self URL,
362 # otherwise we could be stuck in a recursive loop
363 # where self == parent
364 if not self.parent().exists() and self.parent().geturl() != self.geturl():
365 self.parent().mkdir()
366 log.debug("Creating new directory: %s", self.geturl())
367 r = self.session.request("MKCOL", self.geturl())
368 if r.status_code != 201:
369 if r.status_code == 405:
370 log.debug("Can not create directory: %s may already exist: skipping.", self.geturl())
371 else:
372 raise ValueError(f"Can not create directory {self}, status code: {r.status_code}")
374 def remove(self) -> None:
375 """Remove the resource."""
376 log.debug("Removing resource: %s", self.geturl())
377 r = self.session.delete(self.geturl())
378 if r.status_code not in [200, 202, 204]:
379 raise FileNotFoundError(f"Unable to delete resource {self}; status code: {r.status_code}")
381 def as_local(self) -> Tuple[str, bool]:
382 """Download object over HTTP and place in temporary directory.
384 Returns
385 -------
386 path : `str`
387 Path to local temporary file.
388 temporary : `bool`
389 Always returns `True`. This is always a temporary file.
390 """
391 log.debug("Downloading remote resource as local file: %s", self.geturl())
392 r = self.session.get(self.geturl(), stream=True)
393 if r.status_code != 200:
394 raise FileNotFoundError(f"Unable to download resource {self}; status code: {r.status_code}")
395 with tempfile.NamedTemporaryFile(suffix=self.getExtension(), delete=False) as tmpFile:
396 for chunk in r.iter_content():
397 tmpFile.write(chunk)
398 return tmpFile.name, True
400 def read(self, size: int = -1) -> bytes:
401 """Open the resource and return the contents in bytes.
403 Parameters
404 ----------
405 size : `int`, optional
406 The number of bytes to read. Negative or omitted indicates
407 that all data should be read.
408 """
409 log.debug("Reading from remote resource: %s", self.geturl())
410 stream = True if size > 0 else False
411 r = self.session.get(self.geturl(), stream=stream)
412 if r.status_code != 200:
413 raise FileNotFoundError(f"Unable to read resource {self}; status code: {r.status_code}")
414 if not stream:
415 return r.content
416 else:
417 return next(r.iter_content(chunk_size=size))
419 def write(self, data: bytes, overwrite: bool = True) -> None:
420 """Write the supplied bytes to the new resource.
422 Parameters
423 ----------
424 data : `bytes`
425 The bytes to write to the resource. The entire contents of the
426 resource will be replaced.
427 overwrite : `bool`, optional
428 If `True` the resource will be overwritten if it exists. Otherwise
429 the write will fail.
430 """
431 log.debug("Writing to remote resource: %s", self.geturl())
432 if not overwrite:
433 if self.exists():
434 raise FileExistsError(f"Remote resource {self} exists and overwrite has been disabled")
435 dest_url = finalurl(self._emptyPut())
436 r = self.session.put(dest_url, data=data)
437 if r.status_code not in [201, 202, 204]:
438 raise ValueError(f"Can not write file {self}, status code: {r.status_code}")
440 def transfer_from(self, src: ButlerURI, transfer: str = "copy",
441 overwrite: bool = False,
442 transaction: Optional[Union[DatastoreTransaction, NoTransaction]] = None) -> None:
443 """Transfer the current resource to a Webdav repository.
445 Parameters
446 ----------
447 src : `ButlerURI`
448 Source URI.
449 transfer : `str`
450 Mode to use for transferring the resource. Supports the following
451 options: copy.
452 transaction : `DatastoreTransaction`, optional
453 Currently unused.
454 """
455 # Fail early to prevent delays if remote resources are requested
456 if transfer not in self.transferModes:
457 raise ValueError(f"Transfer mode {transfer} not supported by URI scheme {self.scheme}")
459 log.debug(f"Transferring {src} [exists: {src.exists()}] -> "
460 f"{self} [exists: {self.exists()}] (transfer={transfer})")
462 if self.exists():
463 raise FileExistsError(f"Destination path {self} already exists.")
465 if transfer == "auto":
466 transfer = self.transferDefault
468 if isinstance(src, type(self)):
469 if transfer == "move":
470 r = self.session.request("MOVE", src.geturl(), headers={"Destination": self.geturl()})
471 log.debug("Running move via MOVE HTTP request.")
472 else:
473 r = self.session.request("COPY", src.geturl(), headers={"Destination": self.geturl()})
474 log.debug("Running copy via COPY HTTP request.")
475 else:
476 # Use local file and upload it
477 local_src, is_temporary = src.as_local()
478 f = open(local_src, "rb")
479 dest_url = finalurl(self._emptyPut())
480 r = self.session.put(dest_url, data=f)
481 f.close()
482 if is_temporary:
483 os.remove(local_src)
484 log.debug("Running transfer from a local copy of the file.")
486 if r.status_code not in [201, 202, 204]:
487 raise ValueError(f"Can not transfer file {self}, status code: {r.status_code}")
489 def _emptyPut(self) -> requests.Response:
490 """Send an empty PUT request to current URL. This is used to detect
491 if redirection is enabled before sending actual data.
493 Returns
494 -------
495 response : `requests.Response`
496 HTTP Response from the endpoint.
497 """
498 return self.session.put(self.geturl(), data=None,
499 headers={"Content-Length": "0"}, allow_redirects=False)