Coverage for python/lsst/resources/s3utils.py: 24%
94 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-02-01 02:02 -0800
« prev ^ index » next coverage.py v6.5.0, created at 2023-02-01 02:02 -0800
1# This file is part of lsst-resources.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# Use of this source code is governed by a 3-clause BSD-style
10# license that can be found in the LICENSE file.
12from __future__ import annotations
14__all__ = (
15 "getS3Client",
16 "s3CheckFileExists",
17 "bucketExists",
18 "setAwsEnvCredentials",
19 "unsetAwsEnvCredentials",
20 "backoff",
21 "all_retryable_errors",
22 "max_retry_time",
23 "retryable_io_errors",
24 "retryable_client_errors",
25 "_TooManyRequestsException",
26)
28import functools
29import os
30from http.client import HTTPException, ImproperConnectionState
31from types import ModuleType
32from typing import Any, Callable, Optional, Tuple, Union, cast
34from botocore.exceptions import ClientError
35from urllib3.exceptions import HTTPError, RequestError
37try:
38 import boto3
39except ImportError:
40 boto3 = None
42try:
43 import botocore
44except ImportError:
45 botocore = None
48from ._resourcePath import ResourcePath
49from .location import Location
51# https://pypi.org/project/backoff/
52try:
53 import backoff
54except ImportError:
56 class Backoff:
57 @staticmethod
58 def expo(func: Callable, *args: Any, **kwargs: Any) -> Callable:
59 return func
61 @staticmethod
62 def on_exception(func: Callable, *args: Any, **kwargs: Any) -> Callable:
63 return func
65 backoff = cast(ModuleType, Backoff)
68class _TooManyRequestsException(Exception):
69 """Private exception that can be used for 429 retry.
71 botocore refuses to deal with 429 error itself so issues a generic
72 ClientError.
73 """
75 pass
78# settings for "backoff" retry decorators. these retries are belt-and-
79# suspenders along with the retries built into Boto3, to account for
80# semantic differences in errors between S3-like providers.
81retryable_io_errors = (
82 # http.client
83 ImproperConnectionState,
84 HTTPException,
85 # urllib3.exceptions
86 RequestError,
87 HTTPError,
88 # built-ins
89 TimeoutError,
90 ConnectionError,
91 # private
92 _TooManyRequestsException,
93)
95# Client error can include NoSuchKey so retry may not be the right
96# thing. This may require more consideration if it is to be used.
97retryable_client_errors = (
98 # botocore.exceptions
99 ClientError,
100 # built-ins
101 PermissionError,
102)
105# Combine all errors into an easy package. For now client errors
106# are not included.
107all_retryable_errors = retryable_io_errors
108max_retry_time = 60
111def getS3Client() -> boto3.client:
112 """Create a S3 client with AWS (default) or the specified endpoint.
114 Returns
115 -------
116 s3client : `botocore.client.S3`
117 A client of the S3 service.
119 Notes
120 -----
121 The endpoint URL is from the environment variable S3_ENDPOINT_URL.
122 If none is specified, the default AWS one is used.
123 """
124 if boto3 is None:
125 raise ModuleNotFoundError("Could not find boto3. Are you sure it is installed?")
126 if botocore is None:
127 raise ModuleNotFoundError("Could not find botocore. Are you sure it is installed?")
129 endpoint = os.environ.get("S3_ENDPOINT_URL", None)
130 if not endpoint:
131 endpoint = None # Handle ""
133 return _get_s3_client(endpoint)
136@functools.lru_cache()
137def _get_s3_client(endpoint: str) -> boto3.client:
138 # Helper function to cache the client for this endpoint
139 config = botocore.config.Config(read_timeout=180, retries={"mode": "adaptive", "max_attempts": 10})
141 return boto3.client("s3", endpoint_url=endpoint, config=config)
144def s3CheckFileExists(
145 path: Union[Location, ResourcePath, str],
146 bucket: Optional[str] = None,
147 client: Optional[boto3.client] = None,
148) -> Tuple[bool, int]:
149 """Return if the file exists in the bucket or not.
151 Parameters
152 ----------
153 path : `Location`, `ResourcePath` or `str`
154 Location or ResourcePath containing the bucket name and filepath.
155 bucket : `str`, optional
156 Name of the bucket in which to look. If provided, path will be assumed
157 to correspond to be relative to the given bucket.
158 client : `boto3.client`, optional
159 S3 Client object to query, if not supplied boto3 will try to resolve
160 the credentials as in order described in its manual_.
162 Returns
163 -------
164 exists : `bool`
165 True if key exists, False otherwise.
166 size : `int`
167 Size of the key, if key exists, in bytes, otherwise -1.
169 Notes
170 -----
171 S3 Paths are sensitive to leading and trailing path separators.
173 .. _manual: https://boto3.amazonaws.com/v1/documentation/api/latest/guide/\
174 configuration.html#configuring-credentials
175 """
176 if boto3 is None:
177 raise ModuleNotFoundError("Could not find boto3. Are you sure it is installed?")
179 if client is None:
180 client = getS3Client()
182 if isinstance(path, str):
183 if bucket is not None:
184 filepath = path
185 else:
186 uri = ResourcePath(path)
187 bucket = uri.netloc
188 filepath = uri.relativeToPathRoot
189 elif isinstance(path, (ResourcePath, Location)):
190 bucket = path.netloc
191 filepath = path.relativeToPathRoot
192 else:
193 raise TypeError(f"Unsupported path type: {path!r}.")
195 try:
196 obj = client.head_object(Bucket=bucket, Key=filepath)
197 return (True, obj["ContentLength"])
198 except client.exceptions.ClientError as err:
199 # resource unreachable error means key does not exist
200 if err.response["ResponseMetadata"]["HTTPStatusCode"] == 404:
201 return (False, -1)
202 # head_object returns 404 when object does not exist only when user has
203 # s3:ListBucket permission. If list permission does not exist a 403 is
204 # returned. In practical terms this generally means that the file does
205 # not exist, but it could also mean user lacks s3:GetObject permission:
206 # https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectHEAD.html
207 # I don't think its possible to discern which case is it with certainty
208 if err.response["ResponseMetadata"]["HTTPStatusCode"] == 403:
209 raise PermissionError(
210 "Forbidden HEAD operation error occured. "
211 "Verify s3:ListBucket and s3:GetObject "
212 "permissions are granted for your IAM user. "
213 ) from err
214 raise
217def bucketExists(bucketName: str, client: Optional[boto3.client] = None) -> bool:
218 """Check if the S3 bucket with the given name actually exists.
220 Parameters
221 ----------
222 bucketName : `str`
223 Name of the S3 Bucket
224 client : `boto3.client`, optional
225 S3 Client object to query, if not supplied boto3 will try to resolve
226 the credentials as in order described in its manual_.
228 Returns
229 -------
230 exists : `bool`
231 True if it exists, False if no Bucket with specified parameters is
232 found.
234 .. _manual: https://boto3.amazonaws.com/v1/documentation/api/latest/guide/\
235 configuration.html#configuring-credentials
236 """
237 if boto3 is None:
238 raise ModuleNotFoundError("Could not find boto3. Are you sure it is installed?")
240 if client is None:
241 client = getS3Client()
242 try:
243 client.get_bucket_location(Bucket=bucketName)
244 return True
245 except client.exceptions.NoSuchBucket:
246 return False
249def setAwsEnvCredentials(
250 accessKeyId: str = "dummyAccessKeyId", secretAccessKey: str = "dummySecretAccessKey"
251) -> bool:
252 """Set AWS credentials environmental variables.
254 Parameters
255 ----------
256 accessKeyId : `str`
257 Value given to AWS_ACCESS_KEY_ID environmental variable. Defaults to
258 `dummyAccessKeyId`.
259 secretAccessKey : `str`
260 Value given to AWS_SECRET_ACCESS_KEY environmental variable. Defaults
261 to `dummySecretAccessKey`.
263 Returns
264 -------
265 setEnvCredentials : `bool`
266 True when environmental variables were set, False otherwise.
268 Notes
269 -----
270 If either AWS_ACCESS_KEY_ID or AWS_SECRET_ACCESS_KEY are not set, both
271 values are overwritten to ensure that the values are consistent.
272 """
273 if "AWS_ACCESS_KEY_ID" not in os.environ or "AWS_SECRET_ACCESS_KEY" not in os.environ:
274 os.environ["AWS_ACCESS_KEY_ID"] = accessKeyId
275 os.environ["AWS_SECRET_ACCESS_KEY"] = secretAccessKey
276 return True
277 return False
280def unsetAwsEnvCredentials() -> None:
281 """Unset AWS credential environment variables.
283 Unsets the AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environmental
284 variables.
285 """
286 if "AWS_ACCESS_KEY_ID" in os.environ:
287 del os.environ["AWS_ACCESS_KEY_ID"]
288 if "AWS_SECRET_ACCESS_KEY" in os.environ:
289 del os.environ["AWS_SECRET_ACCESS_KEY"]