Coverage for python/lsst/resources/s3utils.py: 28%
127 statements
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-23 10:46 +0000
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-23 10:46 +0000
1# This file is part of lsst-resources.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# Use of this source code is governed by a 3-clause BSD-style
10# license that can be found in the LICENSE file.
12from __future__ import annotations
14__all__ = (
15 "clean_test_environment",
16 "getS3Client",
17 "s3CheckFileExists",
18 "bucketExists",
19 "setAwsEnvCredentials",
20 "unsetAwsEnvCredentials",
21 "backoff",
22 "all_retryable_errors",
23 "max_retry_time",
24 "retryable_io_errors",
25 "retryable_client_errors",
26 "_TooManyRequestsError",
27 "clean_test_environment_for_s3",
28)
30import functools
31import os
32import re
33from collections.abc import Callable, Iterator
34from contextlib import contextmanager
35from http.client import HTTPException, ImproperConnectionState
36from types import ModuleType
37from typing import TYPE_CHECKING, Any, cast
38from unittest.mock import patch
40from botocore.exceptions import ClientError
41from botocore.handlers import validate_bucket_name
42from deprecated.sphinx import deprecated
43from urllib3.exceptions import HTTPError, RequestError
45if TYPE_CHECKING:
46 from unittest import TestCase
49try:
50 import boto3
51except ImportError:
52 boto3 = None
54try:
55 import botocore
56except ImportError:
57 botocore = None
60from ._resourcePath import ResourcePath
61from .location import Location
63# https://pypi.org/project/backoff/
64try:
65 import backoff
66except ImportError:
68 class Backoff:
69 """Mock implementation of the backoff class."""
71 @staticmethod
72 def expo(func: Callable, *args: Any, **kwargs: Any) -> Callable:
73 return func
75 @staticmethod
76 def on_exception(func: Callable, *args: Any, **kwargs: Any) -> Callable:
77 return func
79 backoff = cast(ModuleType, Backoff)
82class _TooManyRequestsError(Exception):
83 """Private exception that can be used for 429 retry.
85 botocore refuses to deal with 429 error itself so issues a generic
86 ClientError.
87 """
89 pass
92# settings for "backoff" retry decorators. these retries are belt-and-
93# suspenders along with the retries built into Boto3, to account for
94# semantic differences in errors between S3-like providers.
95retryable_io_errors = (
96 # http.client
97 ImproperConnectionState,
98 HTTPException,
99 # urllib3.exceptions
100 RequestError,
101 HTTPError,
102 # built-ins
103 TimeoutError,
104 ConnectionError,
105 # private
106 _TooManyRequestsError,
107)
109# Client error can include NoSuchKey so retry may not be the right
110# thing. This may require more consideration if it is to be used.
111retryable_client_errors = (
112 # botocore.exceptions
113 ClientError,
114 # built-ins
115 PermissionError,
116)
119# Combine all errors into an easy package. For now client errors
120# are not included.
121all_retryable_errors = retryable_io_errors
122max_retry_time = 60
125@deprecated(
126 reason="This has been replaced by a new function, clean_test_environment_for_s3()."
127 " Will be removed after v26.2023.5000",
128 version="26.2023.5000",
129 category=FutureWarning,
130)
131def clean_test_environment(testcase: TestCase) -> None:
132 """Clear S3_ENDPOINT_URL then restore it at the end of a test.
134 Parameters
135 ----------
136 testcase : `unittest.TestCase`
137 Reference to the test being run; used to add a cleanup function.
138 """
139 endpoint = os.environ.get("S3_ENDPOINT_URL")
141 if not endpoint:
142 return
143 os.environ["S3_ENDPOINT_URL"] = ""
145 def cleanup() -> None:
146 if endpoint is not None:
147 os.environ["S3_ENDPOINT_URL"] = endpoint
149 testcase.addCleanup(cleanup)
152@contextmanager
153def clean_test_environment_for_s3() -> Iterator[None]:
154 """Reset S3 environment to ensure that unit tests with a mock S3 can't
155 accidentally reference real infrastructure.
156 """
157 with patch.dict(
158 os.environ,
159 {
160 "AWS_ACCESS_KEY_ID": "test-access-key",
161 "AWS_SECRET_ACCESS_KEY": "test-secret-access-key",
162 },
163 ) as patched_environ:
164 for var in (
165 "S3_ENDPOINT_URL",
166 "AWS_SECURITY_TOKEN",
167 "AWS_SESSION_TOKEN",
168 "AWS_PROFILE",
169 "AWS_SHARED_CREDENTIALS_FILE",
170 "AWS_CONFIG_FILE",
171 ):
172 patched_environ.pop(var, None)
173 # Clear the cached boto3 S3 client instances.
174 # This helps us avoid a potential situation where the client could be
175 # instantiated before moto mocks are installed, which would prevent the
176 # mocks from taking effect.
177 _get_s3_client.cache_clear()
178 yield
181def getS3Client() -> boto3.client:
182 """Create a S3 client with AWS (default) or the specified endpoint.
184 Returns
185 -------
186 s3client : `botocore.client.S3`
187 A client of the S3 service.
189 Notes
190 -----
191 The endpoint URL is from the environment variable S3_ENDPOINT_URL.
192 If none is specified, the default AWS one is used.
194 If the environment variable LSST_DISABLE_BUCKET_VALIDATION exists
195 and has a value that is not empty, "0", "f", "n", or "false"
196 (case-insensitive), then bucket name validation is disabled. This
197 disabling allows Ceph multi-tenancy colon separators to appear in
198 bucket names.
199 """
200 if boto3 is None:
201 raise ModuleNotFoundError("Could not find boto3. Are you sure it is installed?")
202 if botocore is None:
203 raise ModuleNotFoundError("Could not find botocore. Are you sure it is installed?")
205 endpoint = os.environ.get("S3_ENDPOINT_URL", None)
206 if not endpoint:
207 endpoint = None # Handle ""
208 disable_value = os.environ.get("LSST_DISABLE_BUCKET_VALIDATION", "0")
209 skip_validation = not re.search(r"^(0|f|n|false)?$", disable_value, re.I)
211 return _get_s3_client(endpoint, skip_validation)
214@functools.lru_cache
215def _get_s3_client(endpoint: str, skip_validation: bool) -> boto3.client:
216 # Helper function to cache the client for this endpoint
217 config = botocore.config.Config(read_timeout=180, retries={"mode": "adaptive", "max_attempts": 10})
219 client = boto3.client("s3", endpoint_url=endpoint, config=config)
220 if skip_validation:
221 client.meta.events.unregister("before-parameter-build.s3", validate_bucket_name)
222 return client
225def s3CheckFileExists(
226 path: Location | ResourcePath | str,
227 bucket: str | None = None,
228 client: boto3.client | None = None,
229) -> tuple[bool, int]:
230 """Return if the file exists in the bucket or not.
232 Parameters
233 ----------
234 path : `Location`, `ResourcePath` or `str`
235 Location or ResourcePath containing the bucket name and filepath.
236 bucket : `str`, optional
237 Name of the bucket in which to look. If provided, path will be assumed
238 to correspond to be relative to the given bucket.
239 client : `boto3.client`, optional
240 S3 Client object to query, if not supplied boto3 will try to resolve
241 the credentials as in order described in its manual_.
243 Returns
244 -------
245 exists : `bool`
246 True if key exists, False otherwise.
247 size : `int`
248 Size of the key, if key exists, in bytes, otherwise -1.
250 Notes
251 -----
252 S3 Paths are sensitive to leading and trailing path separators.
254 .. _manual: https://boto3.amazonaws.com/v1/documentation/api/latest/guide/\
255 configuration.html#configuring-credentials
256 """
257 if boto3 is None:
258 raise ModuleNotFoundError("Could not find boto3. Are you sure it is installed?")
260 if client is None:
261 client = getS3Client()
263 if isinstance(path, str):
264 if bucket is not None:
265 filepath = path
266 else:
267 uri = ResourcePath(path)
268 bucket = uri.netloc
269 filepath = uri.relativeToPathRoot
270 elif isinstance(path, ResourcePath | Location):
271 bucket = path.netloc
272 filepath = path.relativeToPathRoot
273 else:
274 raise TypeError(f"Unsupported path type: {path!r}.")
276 try:
277 obj = client.head_object(Bucket=bucket, Key=filepath)
278 return (True, obj["ContentLength"])
279 except client.exceptions.ClientError as err:
280 # resource unreachable error means key does not exist
281 errcode = err.response["ResponseMetadata"]["HTTPStatusCode"]
282 if errcode == 404:
283 return (False, -1)
284 # head_object returns 404 when object does not exist only when user has
285 # s3:ListBucket permission. If list permission does not exist a 403 is
286 # returned. In practical terms this generally means that the file does
287 # not exist, but it could also mean user lacks s3:GetObject permission:
288 # https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectHEAD.html
289 # I don't think its possible to discern which case is it with certainty
290 if errcode == 403:
291 raise PermissionError(
292 "Forbidden HEAD operation error occurred. "
293 "Verify s3:ListBucket and s3:GetObject "
294 "permissions are granted for your IAM user. "
295 ) from err
296 if errcode == 429:
297 # boto3, incorrectly, does not automatically retry with 429
298 # so instead we raise an explicit retry exception for backoff.
299 raise _TooManyRequestsError(str(err)) from err
300 raise
303def bucketExists(bucketName: str, client: boto3.client | None = None) -> bool:
304 """Check if the S3 bucket with the given name actually exists.
306 Parameters
307 ----------
308 bucketName : `str`
309 Name of the S3 Bucket.
310 client : `boto3.client`, optional
311 S3 Client object to query, if not supplied boto3 will try to resolve
312 the credentials by calling `getS3Client`.
314 Returns
315 -------
316 exists : `bool`
317 True if it exists, False if no Bucket with specified parameters is
318 found.
319 """
320 if boto3 is None:
321 raise ModuleNotFoundError("Could not find boto3. Are you sure it is installed?")
323 if client is None:
324 client = getS3Client()
325 try:
326 client.get_bucket_location(Bucket=bucketName)
327 return True
328 except client.exceptions.NoSuchBucket:
329 return False
332@deprecated(
333 reason="This function could accidentally leave real credentials in the environment during testing."
334 " A new function, clean_test_environment_for_s3(), can be used to set up mock credentials."
335 " Will be removed after v26.2023.5000",
336 version="26.2023.5000",
337 category=FutureWarning,
338)
339def setAwsEnvCredentials(
340 accessKeyId: str = "dummyAccessKeyId", secretAccessKey: str = "dummySecretAccessKey"
341) -> bool:
342 """Set AWS credentials environmental variables.
344 Parameters
345 ----------
346 accessKeyId : `str`
347 Value given to AWS_ACCESS_KEY_ID environmental variable. Defaults to
348 `dummyAccessKeyId`.
349 secretAccessKey : `str`
350 Value given to AWS_SECRET_ACCESS_KEY environmental variable. Defaults
351 to `dummySecretAccessKey`.
353 Returns
354 -------
355 setEnvCredentials : `bool`
356 True when environmental variables were set, False otherwise.
358 Notes
359 -----
360 If either AWS_ACCESS_KEY_ID or AWS_SECRET_ACCESS_KEY are not set, both
361 values are overwritten to ensure that the values are consistent.
362 """
363 if "AWS_ACCESS_KEY_ID" not in os.environ or "AWS_SECRET_ACCESS_KEY" not in os.environ:
364 os.environ["AWS_ACCESS_KEY_ID"] = accessKeyId
365 os.environ["AWS_SECRET_ACCESS_KEY"] = secretAccessKey
366 return True
367 return False
370@deprecated(
371 reason="This has been replaced by a new function, clean_test_environment_for_s3()."
372 " Will be removed after v26.2023.5000",
373 version="26.2023.5000",
374 category=FutureWarning,
375)
376def unsetAwsEnvCredentials() -> None:
377 """Unset AWS credential environment variables.
379 Unsets the AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environmental
380 variables.
381 """
382 if "AWS_ACCESS_KEY_ID" in os.environ:
383 del os.environ["AWS_ACCESS_KEY_ID"]
384 if "AWS_SECRET_ACCESS_KEY" in os.environ:
385 del os.environ["AWS_SECRET_ACCESS_KEY"]