Coverage for python/lsst/resources/s3utils.py: 23%
111 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-10 09:42 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-10 09:42 +0000
1# This file is part of lsst-resources.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# Use of this source code is governed by a 3-clause BSD-style
10# license that can be found in the LICENSE file.
12from __future__ import annotations
14__all__ = (
15 "clean_test_environment",
16 "getS3Client",
17 "s3CheckFileExists",
18 "bucketExists",
19 "setAwsEnvCredentials",
20 "unsetAwsEnvCredentials",
21 "backoff",
22 "all_retryable_errors",
23 "max_retry_time",
24 "retryable_io_errors",
25 "retryable_client_errors",
26 "_TooManyRequestsError",
27)
29import functools
30import os
31import re
32from collections.abc import Callable
33from http.client import HTTPException, ImproperConnectionState
34from types import ModuleType
35from typing import TYPE_CHECKING, Any, cast
37from botocore.exceptions import ClientError
38from botocore.handlers import validate_bucket_name
39from urllib3.exceptions import HTTPError, RequestError
41if TYPE_CHECKING:
42 from unittest import TestCase
44try:
45 import boto3
46except ImportError:
47 boto3 = None
49try:
50 import botocore
51except ImportError:
52 botocore = None
55from ._resourcePath import ResourcePath
56from .location import Location
58# https://pypi.org/project/backoff/
59try:
60 import backoff
61except ImportError:
63 class Backoff:
64 """Mock implementation of the backoff class."""
66 @staticmethod
67 def expo(func: Callable, *args: Any, **kwargs: Any) -> Callable:
68 return func
70 @staticmethod
71 def on_exception(func: Callable, *args: Any, **kwargs: Any) -> Callable:
72 return func
74 backoff = cast(ModuleType, Backoff)
77class _TooManyRequestsError(Exception):
78 """Private exception that can be used for 429 retry.
80 botocore refuses to deal with 429 error itself so issues a generic
81 ClientError.
82 """
84 pass
87# settings for "backoff" retry decorators. these retries are belt-and-
88# suspenders along with the retries built into Boto3, to account for
89# semantic differences in errors between S3-like providers.
90retryable_io_errors = (
91 # http.client
92 ImproperConnectionState,
93 HTTPException,
94 # urllib3.exceptions
95 RequestError,
96 HTTPError,
97 # built-ins
98 TimeoutError,
99 ConnectionError,
100 # private
101 _TooManyRequestsError,
102)
104# Client error can include NoSuchKey so retry may not be the right
105# thing. This may require more consideration if it is to be used.
106retryable_client_errors = (
107 # botocore.exceptions
108 ClientError,
109 # built-ins
110 PermissionError,
111)
114# Combine all errors into an easy package. For now client errors
115# are not included.
116all_retryable_errors = retryable_io_errors
117max_retry_time = 60
120def clean_test_environment(testcase: TestCase) -> None:
121 """Clear S3_ENDPOINT_URL then restore it at the end of a test.
123 Parameters
124 ----------
125 testcase: `unittest.TestCase`
126 Reference to the test being run; used to add a cleanup function.
127 """
128 endpoint = os.environ.get("S3_ENDPOINT_URL")
130 if not endpoint:
131 return
132 os.environ["S3_ENDPOINT_URL"] = ""
134 def cleanup() -> None:
135 if endpoint is not None:
136 os.environ["S3_ENDPOINT_URL"] = endpoint
138 testcase.addCleanup(cleanup)
141def getS3Client() -> boto3.client:
142 """Create a S3 client with AWS (default) or the specified endpoint.
144 Returns
145 -------
146 s3client : `botocore.client.S3`
147 A client of the S3 service.
149 Notes
150 -----
151 The endpoint URL is from the environment variable S3_ENDPOINT_URL.
152 If none is specified, the default AWS one is used.
154 If the environment variable LSST_DISABLE_BUCKET_VALIDATION exists
155 and has a value that is not empty, "0", "f", "n", or "false"
156 (case-insensitive), then bucket name validation is disabled. This
157 disabling allows Ceph multi-tenancy colon separators to appear in
158 bucket names.
159 """
160 if boto3 is None:
161 raise ModuleNotFoundError("Could not find boto3. Are you sure it is installed?")
162 if botocore is None:
163 raise ModuleNotFoundError("Could not find botocore. Are you sure it is installed?")
165 endpoint = os.environ.get("S3_ENDPOINT_URL", None)
166 if not endpoint:
167 endpoint = None # Handle ""
168 disable_value = os.environ.get("LSST_DISABLE_BUCKET_VALIDATION", "0")
169 skip_validation = not re.search(r"^(0|f|n|false)?$", disable_value, re.I)
171 return _get_s3_client(endpoint, skip_validation)
174@functools.lru_cache
175def _get_s3_client(endpoint: str, skip_validation: bool) -> boto3.client:
176 # Helper function to cache the client for this endpoint
177 config = botocore.config.Config(read_timeout=180, retries={"mode": "adaptive", "max_attempts": 10})
179 client = boto3.client("s3", endpoint_url=endpoint, config=config)
180 if skip_validation:
181 client.meta.events.unregister("before-parameter-build.s3", validate_bucket_name)
182 return client
185def s3CheckFileExists(
186 path: Location | ResourcePath | str,
187 bucket: str | None = None,
188 client: boto3.client | None = None,
189) -> tuple[bool, int]:
190 """Return if the file exists in the bucket or not.
192 Parameters
193 ----------
194 path : `Location`, `ResourcePath` or `str`
195 Location or ResourcePath containing the bucket name and filepath.
196 bucket : `str`, optional
197 Name of the bucket in which to look. If provided, path will be assumed
198 to correspond to be relative to the given bucket.
199 client : `boto3.client`, optional
200 S3 Client object to query, if not supplied boto3 will try to resolve
201 the credentials as in order described in its manual_.
203 Returns
204 -------
205 exists : `bool`
206 True if key exists, False otherwise.
207 size : `int`
208 Size of the key, if key exists, in bytes, otherwise -1.
210 Notes
211 -----
212 S3 Paths are sensitive to leading and trailing path separators.
214 .. _manual: https://boto3.amazonaws.com/v1/documentation/api/latest/guide/\
215 configuration.html#configuring-credentials
216 """
217 if boto3 is None:
218 raise ModuleNotFoundError("Could not find boto3. Are you sure it is installed?")
220 if client is None:
221 client = getS3Client()
223 if isinstance(path, str):
224 if bucket is not None:
225 filepath = path
226 else:
227 uri = ResourcePath(path)
228 bucket = uri.netloc
229 filepath = uri.relativeToPathRoot
230 elif isinstance(path, ResourcePath | Location):
231 bucket = path.netloc
232 filepath = path.relativeToPathRoot
233 else:
234 raise TypeError(f"Unsupported path type: {path!r}.")
236 try:
237 obj = client.head_object(Bucket=bucket, Key=filepath)
238 return (True, obj["ContentLength"])
239 except client.exceptions.ClientError as err:
240 # resource unreachable error means key does not exist
241 if err.response["ResponseMetadata"]["HTTPStatusCode"] == 404:
242 return (False, -1)
243 # head_object returns 404 when object does not exist only when user has
244 # s3:ListBucket permission. If list permission does not exist a 403 is
245 # returned. In practical terms this generally means that the file does
246 # not exist, but it could also mean user lacks s3:GetObject permission:
247 # https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectHEAD.html
248 # I don't think its possible to discern which case is it with certainty
249 if err.response["ResponseMetadata"]["HTTPStatusCode"] == 403:
250 raise PermissionError(
251 "Forbidden HEAD operation error occured. "
252 "Verify s3:ListBucket and s3:GetObject "
253 "permissions are granted for your IAM user. "
254 ) from err
255 raise
258def bucketExists(bucketName: str, client: boto3.client | None = None) -> bool:
259 """Check if the S3 bucket with the given name actually exists.
261 Parameters
262 ----------
263 bucketName : `str`
264 Name of the S3 Bucket
265 client : `boto3.client`, optional
266 S3 Client object to query, if not supplied boto3 will try to resolve
267 the credentials as in order described in its manual_.
269 Returns
270 -------
271 exists : `bool`
272 True if it exists, False if no Bucket with specified parameters is
273 found.
275 .. _manual: https://boto3.amazonaws.com/v1/documentation/api/latest/guide/\
276 configuration.html#configuring-credentials
277 """
278 if boto3 is None:
279 raise ModuleNotFoundError("Could not find boto3. Are you sure it is installed?")
281 if client is None:
282 client = getS3Client()
283 try:
284 client.get_bucket_location(Bucket=bucketName)
285 return True
286 except client.exceptions.NoSuchBucket:
287 return False
290def setAwsEnvCredentials(
291 accessKeyId: str = "dummyAccessKeyId", secretAccessKey: str = "dummySecretAccessKey"
292) -> bool:
293 """Set AWS credentials environmental variables.
295 Parameters
296 ----------
297 accessKeyId : `str`
298 Value given to AWS_ACCESS_KEY_ID environmental variable. Defaults to
299 `dummyAccessKeyId`.
300 secretAccessKey : `str`
301 Value given to AWS_SECRET_ACCESS_KEY environmental variable. Defaults
302 to `dummySecretAccessKey`.
304 Returns
305 -------
306 setEnvCredentials : `bool`
307 True when environmental variables were set, False otherwise.
309 Notes
310 -----
311 If either AWS_ACCESS_KEY_ID or AWS_SECRET_ACCESS_KEY are not set, both
312 values are overwritten to ensure that the values are consistent.
313 """
314 if "AWS_ACCESS_KEY_ID" not in os.environ or "AWS_SECRET_ACCESS_KEY" not in os.environ:
315 os.environ["AWS_ACCESS_KEY_ID"] = accessKeyId
316 os.environ["AWS_SECRET_ACCESS_KEY"] = secretAccessKey
317 return True
318 return False
321def unsetAwsEnvCredentials() -> None:
322 """Unset AWS credential environment variables.
324 Unsets the AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environmental
325 variables.
326 """
327 if "AWS_ACCESS_KEY_ID" in os.environ:
328 del os.environ["AWS_ACCESS_KEY_ID"]
329 if "AWS_SECRET_ACCESS_KEY" in os.environ:
330 del os.environ["AWS_SECRET_ACCESS_KEY"]