Coverage for python/lsst/resources/s3utils.py: 23%
104 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-04-11 02:04 -0700
« prev ^ index » next coverage.py v6.5.0, created at 2023-04-11 02:04 -0700
1# This file is part of lsst-resources.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# Use of this source code is governed by a 3-clause BSD-style
10# license that can be found in the LICENSE file.
12from __future__ import annotations
14from typing import TYPE_CHECKING
16if TYPE_CHECKING:
17 from unittest import TestCase
19__all__ = (
20 "clean_test_environment",
21 "getS3Client",
22 "s3CheckFileExists",
23 "bucketExists",
24 "setAwsEnvCredentials",
25 "unsetAwsEnvCredentials",
26 "backoff",
27 "all_retryable_errors",
28 "max_retry_time",
29 "retryable_io_errors",
30 "retryable_client_errors",
31 "_TooManyRequestsException",
32)
34import functools
35import os
36from http.client import HTTPException, ImproperConnectionState
37from types import ModuleType
38from typing import Any, Callable, Optional, Tuple, Union, cast
40from botocore.exceptions import ClientError
41from urllib3.exceptions import HTTPError, RequestError
43try:
44 import boto3
45except ImportError:
46 boto3 = None
48try:
49 import botocore
50except ImportError:
51 botocore = None
54from ._resourcePath import ResourcePath
55from .location import Location
57# https://pypi.org/project/backoff/
58try:
59 import backoff
60except ImportError:
62 class Backoff:
63 @staticmethod
64 def expo(func: Callable, *args: Any, **kwargs: Any) -> Callable:
65 return func
67 @staticmethod
68 def on_exception(func: Callable, *args: Any, **kwargs: Any) -> Callable:
69 return func
71 backoff = cast(ModuleType, Backoff)
74class _TooManyRequestsException(Exception):
75 """Private exception that can be used for 429 retry.
77 botocore refuses to deal with 429 error itself so issues a generic
78 ClientError.
79 """
81 pass
84# settings for "backoff" retry decorators. these retries are belt-and-
85# suspenders along with the retries built into Boto3, to account for
86# semantic differences in errors between S3-like providers.
87retryable_io_errors = (
88 # http.client
89 ImproperConnectionState,
90 HTTPException,
91 # urllib3.exceptions
92 RequestError,
93 HTTPError,
94 # built-ins
95 TimeoutError,
96 ConnectionError,
97 # private
98 _TooManyRequestsException,
99)
101# Client error can include NoSuchKey so retry may not be the right
102# thing. This may require more consideration if it is to be used.
103retryable_client_errors = (
104 # botocore.exceptions
105 ClientError,
106 # built-ins
107 PermissionError,
108)
111# Combine all errors into an easy package. For now client errors
112# are not included.
113all_retryable_errors = retryable_io_errors
114max_retry_time = 60
117def clean_test_environment(testcase: TestCase) -> None:
118 """Clear S3_ENDPOINT_URL then restore it at the end of a test.
120 Parameters
121 ----------
122 testcase: `unittest.TestCase`
123 Reference to the test being run; used to add a cleanup function.
124 """
125 endpoint = os.environ.get("S3_ENDPOINT_URL")
127 if not endpoint:
128 return
129 os.environ["S3_ENDPOINT_URL"] = ""
131 def cleanup() -> None:
132 if endpoint is not None:
133 os.environ["S3_ENDPOINT_URL"] = endpoint
135 testcase.addCleanup(cleanup)
138def getS3Client() -> boto3.client:
139 """Create a S3 client with AWS (default) or the specified endpoint.
141 Returns
142 -------
143 s3client : `botocore.client.S3`
144 A client of the S3 service.
146 Notes
147 -----
148 The endpoint URL is from the environment variable S3_ENDPOINT_URL.
149 If none is specified, the default AWS one is used.
150 """
151 if boto3 is None:
152 raise ModuleNotFoundError("Could not find boto3. Are you sure it is installed?")
153 if botocore is None:
154 raise ModuleNotFoundError("Could not find botocore. Are you sure it is installed?")
156 endpoint = os.environ.get("S3_ENDPOINT_URL", None)
157 if not endpoint:
158 endpoint = None # Handle ""
160 return _get_s3_client(endpoint)
163@functools.lru_cache()
164def _get_s3_client(endpoint: str) -> boto3.client:
165 # Helper function to cache the client for this endpoint
166 config = botocore.config.Config(read_timeout=180, retries={"mode": "adaptive", "max_attempts": 10})
168 return boto3.client("s3", endpoint_url=endpoint, config=config)
171def s3CheckFileExists(
172 path: Union[Location, ResourcePath, str],
173 bucket: Optional[str] = None,
174 client: Optional[boto3.client] = None,
175) -> Tuple[bool, int]:
176 """Return if the file exists in the bucket or not.
178 Parameters
179 ----------
180 path : `Location`, `ResourcePath` or `str`
181 Location or ResourcePath containing the bucket name and filepath.
182 bucket : `str`, optional
183 Name of the bucket in which to look. If provided, path will be assumed
184 to correspond to be relative to the given bucket.
185 client : `boto3.client`, optional
186 S3 Client object to query, if not supplied boto3 will try to resolve
187 the credentials as in order described in its manual_.
189 Returns
190 -------
191 exists : `bool`
192 True if key exists, False otherwise.
193 size : `int`
194 Size of the key, if key exists, in bytes, otherwise -1.
196 Notes
197 -----
198 S3 Paths are sensitive to leading and trailing path separators.
200 .. _manual: https://boto3.amazonaws.com/v1/documentation/api/latest/guide/\
201 configuration.html#configuring-credentials
202 """
203 if boto3 is None:
204 raise ModuleNotFoundError("Could not find boto3. Are you sure it is installed?")
206 if client is None:
207 client = getS3Client()
209 if isinstance(path, str):
210 if bucket is not None:
211 filepath = path
212 else:
213 uri = ResourcePath(path)
214 bucket = uri.netloc
215 filepath = uri.relativeToPathRoot
216 elif isinstance(path, (ResourcePath, Location)):
217 bucket = path.netloc
218 filepath = path.relativeToPathRoot
219 else:
220 raise TypeError(f"Unsupported path type: {path!r}.")
222 try:
223 obj = client.head_object(Bucket=bucket, Key=filepath)
224 return (True, obj["ContentLength"])
225 except client.exceptions.ClientError as err:
226 # resource unreachable error means key does not exist
227 if err.response["ResponseMetadata"]["HTTPStatusCode"] == 404:
228 return (False, -1)
229 # head_object returns 404 when object does not exist only when user has
230 # s3:ListBucket permission. If list permission does not exist a 403 is
231 # returned. In practical terms this generally means that the file does
232 # not exist, but it could also mean user lacks s3:GetObject permission:
233 # https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectHEAD.html
234 # I don't think its possible to discern which case is it with certainty
235 if err.response["ResponseMetadata"]["HTTPStatusCode"] == 403:
236 raise PermissionError(
237 "Forbidden HEAD operation error occured. "
238 "Verify s3:ListBucket and s3:GetObject "
239 "permissions are granted for your IAM user. "
240 ) from err
241 raise
244def bucketExists(bucketName: str, client: Optional[boto3.client] = None) -> bool:
245 """Check if the S3 bucket with the given name actually exists.
247 Parameters
248 ----------
249 bucketName : `str`
250 Name of the S3 Bucket
251 client : `boto3.client`, optional
252 S3 Client object to query, if not supplied boto3 will try to resolve
253 the credentials as in order described in its manual_.
255 Returns
256 -------
257 exists : `bool`
258 True if it exists, False if no Bucket with specified parameters is
259 found.
261 .. _manual: https://boto3.amazonaws.com/v1/documentation/api/latest/guide/\
262 configuration.html#configuring-credentials
263 """
264 if boto3 is None:
265 raise ModuleNotFoundError("Could not find boto3. Are you sure it is installed?")
267 if client is None:
268 client = getS3Client()
269 try:
270 client.get_bucket_location(Bucket=bucketName)
271 return True
272 except client.exceptions.NoSuchBucket:
273 return False
276def setAwsEnvCredentials(
277 accessKeyId: str = "dummyAccessKeyId", secretAccessKey: str = "dummySecretAccessKey"
278) -> bool:
279 """Set AWS credentials environmental variables.
281 Parameters
282 ----------
283 accessKeyId : `str`
284 Value given to AWS_ACCESS_KEY_ID environmental variable. Defaults to
285 `dummyAccessKeyId`.
286 secretAccessKey : `str`
287 Value given to AWS_SECRET_ACCESS_KEY environmental variable. Defaults
288 to `dummySecretAccessKey`.
290 Returns
291 -------
292 setEnvCredentials : `bool`
293 True when environmental variables were set, False otherwise.
295 Notes
296 -----
297 If either AWS_ACCESS_KEY_ID or AWS_SECRET_ACCESS_KEY are not set, both
298 values are overwritten to ensure that the values are consistent.
299 """
300 if "AWS_ACCESS_KEY_ID" not in os.environ or "AWS_SECRET_ACCESS_KEY" not in os.environ:
301 os.environ["AWS_ACCESS_KEY_ID"] = accessKeyId
302 os.environ["AWS_SECRET_ACCESS_KEY"] = secretAccessKey
303 return True
304 return False
307def unsetAwsEnvCredentials() -> None:
308 """Unset AWS credential environment variables.
310 Unsets the AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environmental
311 variables.
312 """
313 if "AWS_ACCESS_KEY_ID" in os.environ:
314 del os.environ["AWS_ACCESS_KEY_ID"]
315 if "AWS_SECRET_ACCESS_KEY" in os.environ:
316 del os.environ["AWS_SECRET_ACCESS_KEY"]