Coverage for python/lsst/daf/butler/core/_butlerUri/http.py : 14%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24import os
25import os.path
26import requests
27import tempfile
28import logging
30__all__ = ('ButlerHttpURI', )
32from requests.adapters import HTTPAdapter
33from requests.packages.urllib3.util.retry import Retry
35from typing import (
36 TYPE_CHECKING,
37 Optional,
38 Tuple,
39 Union,
40)
42from .utils import NoTransaction
43from ._butlerUri import ButlerURI
44from ..location import Location
46if TYPE_CHECKING: 46 ↛ 47line 46 didn't jump to line 47, because the condition on line 46 was never true
47 from ..datastore import DatastoreTransaction
49log = logging.getLogger(__name__)
52def getHttpSession() -> requests.Session:
53 """Create a requests.Session pre-configured with environment variable data
55 Returns
56 -------
57 session : `requests.Session`
58 An http session used to execute requests.
60 Notes
61 -----
62 The following environment variables must be set:
63 - LSST_BUTLER_WEBDAV_CA_BUNDLE: the directory where CA
64 certificates are stored if you intend to use HTTPS to
65 communicate with the endpoint.
66 - LSST_BUTLER_WEBDAV_AUTH: which authentication method to use.
67 Possible values are X509 and TOKEN
68 - (X509 only) LSST_BUTLER_WEBDAV_PROXY_CERT: path to proxy
69 certificate used to authenticate requests
70 - (TOKEN only) LSST_BUTLER_WEBDAV_TOKEN_FILE: file which
71 contains the bearer token used to authenticate requests
72 - (OPTIONAL) LSST_BUTLER_WEBDAV_EXPECT100: if set, we will add an
73 "Expect: 100-Continue" header in all requests. This is required
74 on certain endpoints where requests redirection is made.
75 """
77 retries = Retry(total=3, backoff_factor=1, status_forcelist=[429, 500, 502, 503, 504])
79 session = requests.Session()
80 session.mount("http://", HTTPAdapter(max_retries=retries))
81 session.mount("https://", HTTPAdapter(max_retries=retries))
83 log.debug("Creating new HTTP session...")
85 try:
86 env_auth_method = os.environ['LSST_BUTLER_WEBDAV_AUTH']
87 except KeyError:
88 raise KeyError("Environment variable LSST_BUTLER_WEBDAV_AUTH is not set, "
89 "please use values X509 or TOKEN")
91 if env_auth_method == "X509":
92 log.debug("... using x509 authentication.")
93 try:
94 proxy_cert = os.environ['LSST_BUTLER_WEBDAV_PROXY_CERT']
95 except KeyError:
96 raise KeyError("Environment variable LSST_BUTLER_WEBDAV_PROXY_CERT is not set")
97 session.cert = (proxy_cert, proxy_cert)
98 elif env_auth_method == "TOKEN":
99 log.debug("... using bearer-token authentication.")
100 refreshToken(session)
101 else:
102 raise ValueError("Environment variable LSST_BUTLER_WEBDAV_AUTH must be set to X509 or TOKEN")
104 ca_bundle = None
105 try:
106 ca_bundle = os.environ['LSST_BUTLER_WEBDAV_CA_BUNDLE']
107 except KeyError:
108 log.warning("Environment variable LSST_BUTLER_WEBDAV_CA_BUNDLE is not set: "
109 "HTTPS requests will fail. If you intend to use HTTPS, please "
110 "export this variable.")
112 session.verify = ca_bundle
114 # This header is required for request redirection, in dCache for example
115 if "LSST_BUTLER_WEBDAV_EXPECT100" in os.environ:
116 log.debug("Expect: 100-Continue header enabled.")
117 session.headers.update({'Expect': '100-continue'})
119 log.debug("Session configured and ready.")
121 return session
124def isTokenAuth() -> bool:
125 """Returns the status of bearer-token authentication.
127 Returns
128 -------
129 isTokenAuth : `bool`
130 True if LSST_BUTLER_WEBDAV_AUTH is set to TOKEN, False otherwise.
131 """
132 try:
133 env_auth_method = os.environ['LSST_BUTLER_WEBDAV_AUTH']
134 except KeyError:
135 raise KeyError("Environment variable LSST_BUTLER_WEBDAV_AUTH is not set, "
136 "please use values X509 or TOKEN")
138 if env_auth_method == "TOKEN":
139 return True
140 return False
143def refreshToken(session: requests.Session) -> None:
144 """Set or update the 'Authorization' header of the session,
145 configure bearer token authentication, with the value fetched
146 from LSST_BUTLER_WEBDAV_TOKEN_FILE
148 Parameters
149 ----------
150 session : `requests.Session`
151 Session on which bearer token authentication must be configured
152 """
153 try:
154 token_path = os.environ['LSST_BUTLER_WEBDAV_TOKEN_FILE']
155 if not os.path.isfile(token_path):
156 raise FileNotFoundError(f"No token file: {token_path}")
157 with open(os.environ['LSST_BUTLER_WEBDAV_TOKEN_FILE'], "r") as fh:
158 bearer_token = fh.read().replace('\n', '')
159 except KeyError:
160 raise KeyError("Environment variable LSST_BUTLER_WEBDAV_TOKEN_FILE is not set")
162 session.headers.update({'Authorization': 'Bearer ' + bearer_token})
165def webdavCheckFileExists(path: Union[Location, ButlerURI, str],
166 session: Optional[requests.Session] = None) -> Tuple[bool, int]:
167 """Check that a remote HTTP resource exists.
169 Parameters
170 ----------
171 path : `Location`, `ButlerURI` or `str`
172 Location or ButlerURI containing the bucket name and filepath.
173 session : `requests.Session`, optional
174 Session object to query.
176 Returns
177 -------
178 exists : `bool`
179 True if resource exists, False otherwise.
180 size : `int`
181 Size of the resource, if it exists, in bytes, otherwise -1
182 """
183 if session is None:
184 session = getHttpSession()
186 filepath = _getFileURL(path)
188 log.debug("Checking if file exists: %s", filepath)
190 r = session.head(filepath)
191 return (True, int(r.headers['Content-Length'])) if r.status_code == 200 else (False, -1)
194def webdavDeleteFile(path: Union[Location, ButlerURI, str],
195 session: Optional[requests.Session] = None) -> None:
196 """Remove a remote HTTP resource.
197 Raises a FileNotFoundError if the resource does not exist or on failure.
199 Parameters
200 ----------
201 path : `Location`, `ButlerURI` or `str`
202 Location or ButlerURI containing the bucket name and filepath.
203 session : `requests.Session`, optional
204 Session object to query.
205 """
206 if session is None:
207 session = getHttpSession()
209 filepath = _getFileURL(path)
211 log.debug("Removing file: %s", filepath)
212 r = session.delete(filepath)
213 if r.status_code not in [200, 202, 204]:
214 raise FileNotFoundError(f"Unable to delete resource {filepath}; status code: {r.status_code}")
217def folderExists(path: Union[Location, ButlerURI, str],
218 session: Optional[requests.Session] = None) -> bool:
219 """Check if the Webdav repository at a given URL actually exists.
221 Parameters
222 ----------
223 path : `Location`, `ButlerURI` or `str`
224 Location or ButlerURI containing the bucket name and filepath.
225 session : `requests.Session`, optional
226 Session object to query.
228 Returns
229 -------
230 exists : `bool`
231 True if it exists, False if no folder is found.
232 """
233 if session is None:
234 session = getHttpSession()
236 filepath = _getFileURL(path)
238 log.debug("Checking if folder exists: %s", filepath)
239 r = session.head(filepath)
240 return True if r.status_code == 200 else False
243def isWebdavEndpoint(path: Union[Location, ButlerURI, str]) -> bool:
244 """Check whether the remote HTTP endpoint implements Webdav features.
246 Parameters
247 ----------
248 path : `Location`, `ButlerURI` or `str`
249 Location or ButlerURI containing the bucket name and filepath.
251 Returns
252 -------
253 isWebdav : `bool`
254 True if the endpoint implements Webdav, False if it doesn't.
255 """
256 ca_bundle = None
257 try:
258 ca_bundle = os.environ['LSST_BUTLER_WEBDAV_CA_BUNDLE']
259 except KeyError:
260 log.warning("Environment variable LSST_BUTLER_WEBDAV_CA_BUNDLE is not set: "
261 "HTTPS requests will fail. If you intend to use HTTPS, please "
262 "export this variable.")
263 filepath = _getFileURL(path)
265 log.debug("Detecting HTTP endpoint type...")
266 r = requests.options(filepath, verify=ca_bundle)
267 return True if 'DAV' in r.headers else False
270def finalurl(r: requests.Response) -> str:
271 """Check whether the remote HTTP endpoint redirects to a different
272 endpoint, and return the final destination of the request.
273 This is needed when using PUT operations, to avoid starting
274 to send the data to the endpoint, before having to send it again once
275 the 307 redirect response is received, and thus wasting bandwidth.
277 Parameters
278 ----------
279 r : `requests.Response`
280 An HTTP response received when requesting the endpoint
282 Returns
283 -------
284 destination_url: `string`
285 The final destination to which requests must be sent.
286 """
287 destination_url = r.url
288 if r.status_code == 307:
289 destination_url = r.headers['Location']
290 log.debug("Request redirected to %s", destination_url)
291 return destination_url
294def _getFileURL(path: Union[Location, ButlerURI, str]) -> str:
295 """Returns the absolute URL of the resource as a string.
297 Parameters
298 ----------
299 path : `Location`, `ButlerURI` or `str`
300 Location or ButlerURI containing the bucket name and filepath.
302 Returns
303 -------
304 filepath : `str`
305 The fully qualified URL of the resource.
306 """
307 if isinstance(path, Location):
308 filepath = path.uri.geturl()
309 else:
310 filepath = ButlerURI(path).geturl()
311 return filepath
314class ButlerHttpURI(ButlerURI):
315 """General HTTP(S) resource."""
316 _session = requests.Session()
317 _sessionInitialized = False
319 @property
320 def session(self) -> requests.Session:
321 """Client object to address remote resource."""
322 if ButlerHttpURI._sessionInitialized:
323 if isTokenAuth():
324 refreshToken(ButlerHttpURI._session)
325 return ButlerHttpURI._session
327 baseURL = self.scheme + "://" + self.netloc
329 if isWebdavEndpoint(baseURL):
330 log.debug("%s looks like a Webdav endpoint.", baseURL)
331 s = getHttpSession()
332 else:
333 raise RuntimeError(f"Only Webdav endpoints are supported; got base URL '{baseURL}'.")
335 ButlerHttpURI._session = s
336 ButlerHttpURI._sessionInitialized = True
337 return s
339 def exists(self) -> bool:
340 """Check that a remote HTTP resource exists."""
341 log.debug("Checking if resource exists: %s", self.geturl())
342 r = self.session.head(self.geturl())
344 return True if r.status_code == 200 else False
346 def size(self) -> int:
347 if self.dirLike:
348 return 0
349 r = self.session.head(self.geturl())
350 if r.status_code == 200:
351 return int(r.headers['Content-Length'])
352 else:
353 raise FileNotFoundError(f"Resource {self} does not exist")
355 def mkdir(self) -> None:
356 """For a dir-like URI, create the directory resource if it does not
357 already exist.
358 """
359 if not self.dirLike:
360 raise ValueError(f"Can not create a 'directory' for file-like URI {self}")
362 if not self.exists():
363 # We need to test the absence of the parent directory,
364 # but also if parent URL is different from self URL,
365 # otherwise we could be stuck in a recursive loop
366 # where self == parent
367 if not self.parent().exists() and self.parent().geturl() != self.geturl():
368 self.parent().mkdir()
369 log.debug("Creating new directory: %s", self.geturl())
370 r = self.session.request("MKCOL", self.geturl())
371 if r.status_code != 201:
372 if r.status_code == 405:
373 log.debug("Can not create directory: %s may already exist: skipping.", self.geturl())
374 else:
375 raise ValueError(f"Can not create directory {self}, status code: {r.status_code}")
377 def remove(self) -> None:
378 """Remove the resource."""
379 log.debug("Removing resource: %s", self.geturl())
380 r = self.session.delete(self.geturl())
381 if r.status_code not in [200, 202, 204]:
382 raise FileNotFoundError(f"Unable to delete resource {self}; status code: {r.status_code}")
384 def _as_local(self) -> Tuple[str, bool]:
385 """Download object over HTTP and place in temporary directory.
387 Returns
388 -------
389 path : `str`
390 Path to local temporary file.
391 temporary : `bool`
392 Always returns `True`. This is always a temporary file.
393 """
394 log.debug("Downloading remote resource as local file: %s", self.geturl())
395 r = self.session.get(self.geturl(), stream=True)
396 if r.status_code != 200:
397 raise FileNotFoundError(f"Unable to download resource {self}; status code: {r.status_code}")
398 with tempfile.NamedTemporaryFile(suffix=self.getExtension(), delete=False) as tmpFile:
399 for chunk in r.iter_content():
400 tmpFile.write(chunk)
401 return tmpFile.name, True
403 def read(self, size: int = -1) -> bytes:
404 """Open the resource and return the contents in bytes.
406 Parameters
407 ----------
408 size : `int`, optional
409 The number of bytes to read. Negative or omitted indicates
410 that all data should be read.
411 """
412 log.debug("Reading from remote resource: %s", self.geturl())
413 stream = True if size > 0 else False
414 r = self.session.get(self.geturl(), stream=stream)
415 if r.status_code != 200:
416 raise FileNotFoundError(f"Unable to read resource {self}; status code: {r.status_code}")
417 if not stream:
418 return r.content
419 else:
420 return next(r.iter_content(chunk_size=size))
422 def write(self, data: bytes, overwrite: bool = True) -> None:
423 """Write the supplied bytes to the new resource.
425 Parameters
426 ----------
427 data : `bytes`
428 The bytes to write to the resource. The entire contents of the
429 resource will be replaced.
430 overwrite : `bool`, optional
431 If `True` the resource will be overwritten if it exists. Otherwise
432 the write will fail.
433 """
434 log.debug("Writing to remote resource: %s", self.geturl())
435 if not overwrite:
436 if self.exists():
437 raise FileExistsError(f"Remote resource {self} exists and overwrite has been disabled")
438 dest_url = finalurl(self._emptyPut())
439 r = self.session.put(dest_url, data=data)
440 if r.status_code not in [201, 202, 204]:
441 raise ValueError(f"Can not write file {self}, status code: {r.status_code}")
443 def transfer_from(self, src: ButlerURI, transfer: str = "copy",
444 overwrite: bool = False,
445 transaction: Optional[Union[DatastoreTransaction, NoTransaction]] = None) -> None:
446 """Transfer the current resource to a Webdav repository.
448 Parameters
449 ----------
450 src : `ButlerURI`
451 Source URI.
452 transfer : `str`
453 Mode to use for transferring the resource. Supports the following
454 options: copy.
455 transaction : `DatastoreTransaction`, optional
456 Currently unused.
457 """
458 # Fail early to prevent delays if remote resources are requested
459 if transfer not in self.transferModes:
460 raise ValueError(f"Transfer mode {transfer} not supported by URI scheme {self.scheme}")
462 log.debug(f"Transferring {src} [exists: {src.exists()}] -> "
463 f"{self} [exists: {self.exists()}] (transfer={transfer})")
465 if self.exists():
466 raise FileExistsError(f"Destination path {self} already exists.")
468 if transfer == "auto":
469 transfer = self.transferDefault
471 if isinstance(src, type(self)):
472 if transfer == "move":
473 r = self.session.request("MOVE", src.geturl(), headers={"Destination": self.geturl()})
474 log.debug("Running move via MOVE HTTP request.")
475 else:
476 r = self.session.request("COPY", src.geturl(), headers={"Destination": self.geturl()})
477 log.debug("Running copy via COPY HTTP request.")
478 else:
479 # Use local file and upload it
480 with src.as_local() as local_uri:
481 with open(local_uri.ospath, "rb") as f:
482 dest_url = finalurl(self._emptyPut())
483 r = self.session.put(dest_url, data=f)
484 log.debug("Uploading URI %s to %s via local file", src, self)
486 if r.status_code not in [201, 202, 204]:
487 raise ValueError(f"Can not transfer file {self}, status code: {r.status_code}")
489 # This was an explicit move requested from a remote resource
490 # try to remove that resource
491 if transfer == "move":
492 # Transactions do not work here
493 src.remove()
495 def _emptyPut(self) -> requests.Response:
496 """Send an empty PUT request to current URL. This is used to detect
497 if redirection is enabled before sending actual data.
499 Returns
500 -------
501 response : `requests.Response`
502 HTTP Response from the endpoint.
503 """
504 return self.session.put(self.geturl(), data=None,
505 headers={"Content-Length": "0"}, allow_redirects=False)