Coverage for python/lsst/resources/s3utils.py: 23%
111 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-15 02:25 -0700
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-15 02:25 -0700
1# This file is part of lsst-resources.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# Use of this source code is governed by a 3-clause BSD-style
10# license that can be found in the LICENSE file.
12from __future__ import annotations
14from typing import TYPE_CHECKING
16if TYPE_CHECKING:
17 from unittest import TestCase
19__all__ = (
20 "clean_test_environment",
21 "getS3Client",
22 "s3CheckFileExists",
23 "bucketExists",
24 "setAwsEnvCredentials",
25 "unsetAwsEnvCredentials",
26 "backoff",
27 "all_retryable_errors",
28 "max_retry_time",
29 "retryable_io_errors",
30 "retryable_client_errors",
31 "_TooManyRequestsException",
32)
34import functools
35import os
36import re
37from http.client import HTTPException, ImproperConnectionState
38from types import ModuleType
39from typing import Any, Callable, Optional, Tuple, Union, cast
41from botocore.exceptions import ClientError
42from botocore.handlers import validate_bucket_name
43from urllib3.exceptions import HTTPError, RequestError
45try:
46 import boto3
47except ImportError:
48 boto3 = None
50try:
51 import botocore
52except ImportError:
53 botocore = None
56from ._resourcePath import ResourcePath
57from .location import Location
59# https://pypi.org/project/backoff/
60try:
61 import backoff
62except ImportError:
64 class Backoff:
65 @staticmethod
66 def expo(func: Callable, *args: Any, **kwargs: Any) -> Callable:
67 return func
69 @staticmethod
70 def on_exception(func: Callable, *args: Any, **kwargs: Any) -> Callable:
71 return func
73 backoff = cast(ModuleType, Backoff)
76class _TooManyRequestsException(Exception):
77 """Private exception that can be used for 429 retry.
79 botocore refuses to deal with 429 error itself so issues a generic
80 ClientError.
81 """
83 pass
86# settings for "backoff" retry decorators. these retries are belt-and-
87# suspenders along with the retries built into Boto3, to account for
88# semantic differences in errors between S3-like providers.
89retryable_io_errors = (
90 # http.client
91 ImproperConnectionState,
92 HTTPException,
93 # urllib3.exceptions
94 RequestError,
95 HTTPError,
96 # built-ins
97 TimeoutError,
98 ConnectionError,
99 # private
100 _TooManyRequestsException,
101)
103# Client error can include NoSuchKey so retry may not be the right
104# thing. This may require more consideration if it is to be used.
105retryable_client_errors = (
106 # botocore.exceptions
107 ClientError,
108 # built-ins
109 PermissionError,
110)
113# Combine all errors into an easy package. For now client errors
114# are not included.
115all_retryable_errors = retryable_io_errors
116max_retry_time = 60
119def clean_test_environment(testcase: TestCase) -> None:
120 """Clear S3_ENDPOINT_URL then restore it at the end of a test.
122 Parameters
123 ----------
124 testcase: `unittest.TestCase`
125 Reference to the test being run; used to add a cleanup function.
126 """
127 endpoint = os.environ.get("S3_ENDPOINT_URL")
129 if not endpoint:
130 return
131 os.environ["S3_ENDPOINT_URL"] = ""
133 def cleanup() -> None:
134 if endpoint is not None:
135 os.environ["S3_ENDPOINT_URL"] = endpoint
137 testcase.addCleanup(cleanup)
140def getS3Client() -> boto3.client:
141 """Create a S3 client with AWS (default) or the specified endpoint.
143 Returns
144 -------
145 s3client : `botocore.client.S3`
146 A client of the S3 service.
148 Notes
149 -----
150 The endpoint URL is from the environment variable S3_ENDPOINT_URL.
151 If none is specified, the default AWS one is used.
153 If the environment variable LSST_DISABLE_BUCKET_VALIDATION exists
154 and has a value that is not empty, "0", "f", "n", or "false"
155 (case-insensitive), then bucket name validation is disabled. This
156 disabling allows Ceph multi-tenancy colon separators to appear in
157 bucket names.
158 """
159 if boto3 is None:
160 raise ModuleNotFoundError("Could not find boto3. Are you sure it is installed?")
161 if botocore is None:
162 raise ModuleNotFoundError("Could not find botocore. Are you sure it is installed?")
164 endpoint = os.environ.get("S3_ENDPOINT_URL", None)
165 if not endpoint:
166 endpoint = None # Handle ""
167 disable_value = os.environ.get("LSST_DISABLE_BUCKET_VALIDATION", "0")
168 skip_validation = not re.search(r"^(0|f|n|false)?$", disable_value, re.I)
170 return _get_s3_client(endpoint, skip_validation)
173@functools.lru_cache()
174def _get_s3_client(endpoint: str, skip_validation: bool) -> boto3.client:
175 # Helper function to cache the client for this endpoint
176 config = botocore.config.Config(read_timeout=180, retries={"mode": "adaptive", "max_attempts": 10})
178 client = boto3.client("s3", endpoint_url=endpoint, config=config)
179 if skip_validation:
180 client.meta.events.unregister("before-parameter-build.s3", validate_bucket_name)
181 return client
184def s3CheckFileExists(
185 path: Union[Location, ResourcePath, str],
186 bucket: Optional[str] = None,
187 client: Optional[boto3.client] = None,
188) -> Tuple[bool, int]:
189 """Return if the file exists in the bucket or not.
191 Parameters
192 ----------
193 path : `Location`, `ResourcePath` or `str`
194 Location or ResourcePath containing the bucket name and filepath.
195 bucket : `str`, optional
196 Name of the bucket in which to look. If provided, path will be assumed
197 to correspond to be relative to the given bucket.
198 client : `boto3.client`, optional
199 S3 Client object to query, if not supplied boto3 will try to resolve
200 the credentials as in order described in its manual_.
202 Returns
203 -------
204 exists : `bool`
205 True if key exists, False otherwise.
206 size : `int`
207 Size of the key, if key exists, in bytes, otherwise -1.
209 Notes
210 -----
211 S3 Paths are sensitive to leading and trailing path separators.
213 .. _manual: https://boto3.amazonaws.com/v1/documentation/api/latest/guide/\
214 configuration.html#configuring-credentials
215 """
216 if boto3 is None:
217 raise ModuleNotFoundError("Could not find boto3. Are you sure it is installed?")
219 if client is None:
220 client = getS3Client()
222 if isinstance(path, str):
223 if bucket is not None:
224 filepath = path
225 else:
226 uri = ResourcePath(path)
227 bucket = uri.netloc
228 filepath = uri.relativeToPathRoot
229 elif isinstance(path, (ResourcePath, Location)):
230 bucket = path.netloc
231 filepath = path.relativeToPathRoot
232 else:
233 raise TypeError(f"Unsupported path type: {path!r}.")
235 try:
236 obj = client.head_object(Bucket=bucket, Key=filepath)
237 return (True, obj["ContentLength"])
238 except client.exceptions.ClientError as err:
239 # resource unreachable error means key does not exist
240 if err.response["ResponseMetadata"]["HTTPStatusCode"] == 404:
241 return (False, -1)
242 # head_object returns 404 when object does not exist only when user has
243 # s3:ListBucket permission. If list permission does not exist a 403 is
244 # returned. In practical terms this generally means that the file does
245 # not exist, but it could also mean user lacks s3:GetObject permission:
246 # https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectHEAD.html
247 # I don't think its possible to discern which case is it with certainty
248 if err.response["ResponseMetadata"]["HTTPStatusCode"] == 403:
249 raise PermissionError(
250 "Forbidden HEAD operation error occured. "
251 "Verify s3:ListBucket and s3:GetObject "
252 "permissions are granted for your IAM user. "
253 ) from err
254 raise
257def bucketExists(bucketName: str, client: Optional[boto3.client] = None) -> bool:
258 """Check if the S3 bucket with the given name actually exists.
260 Parameters
261 ----------
262 bucketName : `str`
263 Name of the S3 Bucket
264 client : `boto3.client`, optional
265 S3 Client object to query, if not supplied boto3 will try to resolve
266 the credentials as in order described in its manual_.
268 Returns
269 -------
270 exists : `bool`
271 True if it exists, False if no Bucket with specified parameters is
272 found.
274 .. _manual: https://boto3.amazonaws.com/v1/documentation/api/latest/guide/\
275 configuration.html#configuring-credentials
276 """
277 if boto3 is None:
278 raise ModuleNotFoundError("Could not find boto3. Are you sure it is installed?")
280 if client is None:
281 client = getS3Client()
282 try:
283 client.get_bucket_location(Bucket=bucketName)
284 return True
285 except client.exceptions.NoSuchBucket:
286 return False
289def setAwsEnvCredentials(
290 accessKeyId: str = "dummyAccessKeyId", secretAccessKey: str = "dummySecretAccessKey"
291) -> bool:
292 """Set AWS credentials environmental variables.
294 Parameters
295 ----------
296 accessKeyId : `str`
297 Value given to AWS_ACCESS_KEY_ID environmental variable. Defaults to
298 `dummyAccessKeyId`.
299 secretAccessKey : `str`
300 Value given to AWS_SECRET_ACCESS_KEY environmental variable. Defaults
301 to `dummySecretAccessKey`.
303 Returns
304 -------
305 setEnvCredentials : `bool`
306 True when environmental variables were set, False otherwise.
308 Notes
309 -----
310 If either AWS_ACCESS_KEY_ID or AWS_SECRET_ACCESS_KEY are not set, both
311 values are overwritten to ensure that the values are consistent.
312 """
313 if "AWS_ACCESS_KEY_ID" not in os.environ or "AWS_SECRET_ACCESS_KEY" not in os.environ:
314 os.environ["AWS_ACCESS_KEY_ID"] = accessKeyId
315 os.environ["AWS_SECRET_ACCESS_KEY"] = secretAccessKey
316 return True
317 return False
320def unsetAwsEnvCredentials() -> None:
321 """Unset AWS credential environment variables.
323 Unsets the AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environmental
324 variables.
325 """
326 if "AWS_ACCESS_KEY_ID" in os.environ:
327 del os.environ["AWS_ACCESS_KEY_ID"]
328 if "AWS_SECRET_ACCESS_KEY" in os.environ:
329 del os.environ["AWS_SECRET_ACCESS_KEY"]