Coverage for python/lsst/daf/butler/core/webdavutils.py : 15%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ("getHttpSession", "isWebdavEndpoint", "webdavCheckFileExists",
25 "folderExists", "webdavDeleteFile", "refreshToken",
26 "finalurl")
28import os
29import requests
30import logging
31from requests.adapters import HTTPAdapter
32from requests.packages.urllib3.util.retry import Retry
34from typing import (
35 Optional,
36 Tuple,
37 Union,
38)
40from .location import ButlerURI, Location
42log = logging.getLogger(__name__)
45def getHttpSession() -> requests.Session:
46 """Create a requests.Session pre-configured with environment variable data
48 Returns
49 -------
50 session : `requests.Session`
51 An http session used to execute requests.
53 Notes
54 -----
55 The following environment variables must be set:
56 - LSST_BUTLER_WEBDAV_CA_BUNDLE: the directory where CA
57 certificates are stored if you intend to use HTTPS to
58 communicate with the endpoint.
59 - LSST_BUTLER_WEBDAV_AUTH: which authentication method to use.
60 Possible values are X509 and TOKEN
61 - (X509 only) LSST_BUTLER_WEBDAV_PROXY_CERT: path to proxy
62 certificate used to authenticate requests
63 - (TOKEN only) LSST_BUTLER_WEBDAV_TOKEN_FILE: file which
64 contains the bearer token used to authenticate requests
65 - (OPTIONAL) LSST_BUTLER_WEBDAV_EXPECT100: if set, we will add an
66 "Expect: 100-Continue" header in all requests. This is required
67 on certain endpoints where requests redirection is made.
68 """
70 retries = Retry(total=3, backoff_factor=1, status_forcelist=[429, 500, 502, 503, 504])
72 session = requests.Session()
73 session.mount("http://", HTTPAdapter(max_retries=retries))
74 session.mount("https://", HTTPAdapter(max_retries=retries))
76 log.debug("Creating new HTTP session...")
78 try:
79 env_auth_method = os.environ['LSST_BUTLER_WEBDAV_AUTH']
80 except KeyError:
81 raise KeyError("Environment variable LSST_BUTLER_WEBDAV_AUTH is not set, "
82 "please use values X509 or TOKEN")
84 if env_auth_method == "X509":
85 log.debug("... using x509 authentication.")
86 try:
87 proxy_cert = os.environ['LSST_BUTLER_WEBDAV_PROXY_CERT']
88 except KeyError:
89 raise KeyError("Environment variable LSST_BUTLER_WEBDAV_PROXY_CERT is not set")
90 session.cert = (proxy_cert, proxy_cert)
91 elif env_auth_method == "TOKEN":
92 log.debug("... using bearer-token authentication.")
93 refreshToken(session)
94 else:
95 raise ValueError("Environment variable LSST_BUTLER_WEBDAV_AUTH must be set to X509 or TOKEN")
97 ca_bundle = None
98 try:
99 ca_bundle = os.environ['LSST_BUTLER_WEBDAV_CA_BUNDLE']
100 except KeyError:
101 log.warning("Environment variable LSST_BUTLER_WEBDAV_CA_BUNDLE is not set: "
102 "HTTPS requests will fail. If you intend to use HTTPS, please "
103 "export this variable.")
105 session.verify = ca_bundle
107 # This header is required for request redirection, in dCache for example
108 if "LSST_BUTLER_WEBDAV_EXPECT100" in os.environ:
109 log.debug("Expect: 100-Continue header enabled.")
110 session.headers.update({'Expect': '100-continue'})
112 log.debug("Session configured and ready.")
114 return session
117def isTokenAuth() -> bool:
118 """Returns the status of bearer-token authentication.
120 Returns
121 -------
122 isTokenAuth : `bool`
123 True if LSST_BUTLER_WEBDAV_AUTH is set to TOKEN, False otherwise.
124 """
125 try:
126 env_auth_method = os.environ['LSST_BUTLER_WEBDAV_AUTH']
127 except KeyError:
128 raise KeyError("Environment variable LSST_BUTLER_WEBDAV_AUTH is not set, "
129 "please use values X509 or TOKEN")
131 if env_auth_method == "TOKEN":
132 return True
133 return False
136def refreshToken(session: requests.Session) -> None:
137 """Set or update the 'Authorization' header of the session,
138 configure bearer token authentication, with the value fetched
139 from LSST_BUTLER_WEBDAV_TOKEN_FILE
141 Parameters
142 ----------
143 session : `requests.Session`
144 Session on which bearer token authentication must be configured
145 """
146 try:
147 token_path = os.environ['LSST_BUTLER_WEBDAV_TOKEN_FILE']
148 if not os.path.isfile(token_path):
149 raise FileNotFoundError(f"No token file: {token_path}")
150 bearer_token = open(os.environ['LSST_BUTLER_WEBDAV_TOKEN_FILE'], 'r').read().replace('\n', '')
151 except KeyError:
152 raise KeyError("Environment variable LSST_BUTLER_WEBDAV_TOKEN_FILE is not set")
154 session.headers.update({'Authorization': 'Bearer ' + bearer_token})
157def webdavCheckFileExists(path: Union[Location, ButlerURI, str],
158 session: Optional[requests.Session] = None) -> Tuple[bool, int]:
159 """Check that a remote HTTP resource exists.
161 Parameters
162 ----------
163 path : `Location`, `ButlerURI` or `str`
164 Location or ButlerURI containing the bucket name and filepath.
165 session : `requests.Session`, optional
166 Session object to query.
168 Returns
169 -------
170 exists : `bool`
171 True if resource exists, False otherwise.
172 size : `int`
173 Size of the resource, if it exists, in bytes, otherwise -1
174 """
175 if session is None:
176 session = getHttpSession()
178 filepath = _getFileURL(path)
180 log.debug("Checking if file exists: %s", filepath)
182 r = session.head(filepath)
183 return (True, int(r.headers['Content-Length'])) if r.status_code == 200 else (False, -1)
186def webdavDeleteFile(path: Union[Location, ButlerURI, str],
187 session: Optional[requests.Session] = None) -> None:
188 """Remove a remote HTTP resource.
189 Raises a FileNotFoundError if the resource does not exist or on failure.
191 Parameters
192 ----------
193 path : `Location`, `ButlerURI` or `str`
194 Location or ButlerURI containing the bucket name and filepath.
195 session : `requests.Session`, optional
196 Session object to query.
197 """
198 if session is None:
199 session = getHttpSession()
201 filepath = _getFileURL(path)
203 log.debug("Removing file: %s", filepath)
204 r = session.delete(filepath)
205 if r.status_code not in [200, 202, 204]:
206 raise FileNotFoundError(f"Unable to delete resource {filepath}; status code: {r.status_code}")
209def folderExists(path: Union[Location, ButlerURI, str],
210 session: Optional[requests.Session] = None) -> bool:
211 """Check if the Webdav repository at a given URL actually exists.
213 Parameters
214 ----------
215 path : `Location`, `ButlerURI` or `str`
216 Location or ButlerURI containing the bucket name and filepath.
217 session : `requests.Session`, optional
218 Session object to query.
220 Returns
221 -------
222 exists : `bool`
223 True if it exists, False if no folder is found.
224 """
225 if session is None:
226 session = getHttpSession()
228 filepath = _getFileURL(path)
230 log.debug("Checking if folder exists: %s", filepath)
231 r = session.head(filepath)
232 return True if r.status_code == 200 else False
235def isWebdavEndpoint(path: Union[Location, ButlerURI, str]) -> bool:
236 """Check whether the remote HTTP endpoint implements Webdav features.
238 Parameters
239 ----------
240 path : `Location`, `ButlerURI` or `str`
241 Location or ButlerURI containing the bucket name and filepath.
243 Returns
244 -------
245 isWebdav : `bool`
246 True if the endpoint implements Webdav, False if it doesn't.
247 """
248 ca_bundle = None
249 try:
250 ca_bundle = os.environ['LSST_BUTLER_WEBDAV_CA_BUNDLE']
251 except KeyError:
252 log.warning("Environment variable LSST_BUTLER_WEBDAV_CA_BUNDLE is not set: "
253 "HTTPS requests will fail. If you intend to use HTTPS, please "
254 "export this variable.")
255 filepath = _getFileURL(path)
257 log.debug("Detecting HTTP endpoint type...")
258 r = requests.options(filepath, verify=ca_bundle)
259 return True if 'DAV' in r.headers else False
262def finalurl(r: requests.Response) -> str:
263 """Check whether the remote HTTP endpoint redirects to a different
264 endpoint, and return the final destination of the request.
265 This is needed when using PUT operations, to avoid starting
266 to send the data to the endpoint, before having to send it again once
267 the 307 redirect response is received, and thus wasting bandwidth.
269 Parameters
270 ----------
271 r : `requests.Response`
272 An HTTP response received when requesting the endpoint
274 Returns
275 -------
276 destination_url: `string`
277 The final destination to which requests must be sent.
278 """
279 destination_url = r.url
280 if r.status_code == 307:
281 destination_url = r.headers['Location']
282 log.debug("Request redirected to %s", destination_url)
283 return destination_url
286def _getFileURL(path: Union[Location, ButlerURI, str]) -> str:
287 """Returns the absolute URL of the resource as a string.
289 Parameters
290 ----------
291 path : `Location`, `ButlerURI` or `str`
292 Location or ButlerURI containing the bucket name and filepath.
294 Returns
295 -------
296 filepath : `str`
297 The fully qualified URL of the resource.
298 """
299 if isinstance(path, Location):
300 filepath = path.uri.geturl()
301 else:
302 filepath = ButlerURI(path).geturl()
303 return filepath