Coverage for python/lsst/resources/s3utils.py: 17%
73 statements
« prev ^ index » next coverage.py v6.4.2, created at 2022-07-13 10:02 +0000
« prev ^ index » next coverage.py v6.4.2, created at 2022-07-13 10:02 +0000
1# This file is part of lsst-resources.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# Use of this source code is governed by a 3-clause BSD-style
10# license that can be found in the LICENSE file.
12from __future__ import annotations
14__all__ = (
15 "getS3Client",
16 "s3CheckFileExists",
17 "bucketExists",
18 "setAwsEnvCredentials",
19 "unsetAwsEnvCredentials",
20)
22import functools
23import os
24from typing import Optional, Tuple, Union
26try:
27 import boto3
28except ImportError:
29 boto3 = None
31try:
32 import botocore
33except ImportError:
34 botocore = None
36from ._resourcePath import ResourcePath
37from .location import Location
40def getS3Client() -> boto3.client:
41 """Create a S3 client with AWS (default) or the specified endpoint.
43 Returns
44 -------
45 s3client : `botocore.client.S3`
46 A client of the S3 service.
48 Notes
49 -----
50 The endpoint URL is from the environment variable S3_ENDPOINT_URL.
51 If none is specified, the default AWS one is used.
52 """
53 if boto3 is None:
54 raise ModuleNotFoundError("Could not find boto3. Are you sure it is installed?")
55 if botocore is None:
56 raise ModuleNotFoundError("Could not find botocore. Are you sure it is installed?")
58 endpoint = os.environ.get("S3_ENDPOINT_URL", None)
59 if not endpoint:
60 endpoint = None # Handle ""
62 return _get_s3_client(endpoint)
65@functools.lru_cache()
66def _get_s3_client(endpoint: str) -> boto3.client:
67 # Helper function to cache the client for this endpoint
68 config = botocore.config.Config(read_timeout=180, retries={"mode": "adaptive", "max_attempts": 10})
70 return boto3.client("s3", endpoint_url=endpoint, config=config)
73def s3CheckFileExists(
74 path: Union[Location, ResourcePath, str],
75 bucket: Optional[str] = None,
76 client: Optional[boto3.client] = None,
77) -> Tuple[bool, int]:
78 """Return if the file exists in the bucket or not.
80 Parameters
81 ----------
82 path : `Location`, `ResourcePath` or `str`
83 Location or ResourcePath containing the bucket name and filepath.
84 bucket : `str`, optional
85 Name of the bucket in which to look. If provided, path will be assumed
86 to correspond to be relative to the given bucket.
87 client : `boto3.client`, optional
88 S3 Client object to query, if not supplied boto3 will try to resolve
89 the credentials as in order described in its manual_.
91 Returns
92 -------
93 exists : `bool`
94 True if key exists, False otherwise.
95 size : `int`
96 Size of the key, if key exists, in bytes, otherwise -1.
98 Notes
99 -----
100 S3 Paths are sensitive to leading and trailing path separators.
102 .. _manual: https://boto3.amazonaws.com/v1/documentation/api/latest/guide/\
103 configuration.html#configuring-credentials
104 """
105 if boto3 is None:
106 raise ModuleNotFoundError("Could not find boto3. Are you sure it is installed?")
108 if client is None:
109 client = getS3Client()
111 if isinstance(path, str):
112 if bucket is not None:
113 filepath = path
114 else:
115 uri = ResourcePath(path)
116 bucket = uri.netloc
117 filepath = uri.relativeToPathRoot
118 elif isinstance(path, (ResourcePath, Location)):
119 bucket = path.netloc
120 filepath = path.relativeToPathRoot
121 else:
122 raise TypeError(f"Unsupported path type: {path!r}.")
124 try:
125 obj = client.head_object(Bucket=bucket, Key=filepath)
126 return (True, obj["ContentLength"])
127 except client.exceptions.ClientError as err:
128 # resource unreachable error means key does not exist
129 if err.response["ResponseMetadata"]["HTTPStatusCode"] == 404:
130 return (False, -1)
131 # head_object returns 404 when object does not exist only when user has
132 # s3:ListBucket permission. If list permission does not exist a 403 is
133 # returned. In practical terms this generally means that the file does
134 # not exist, but it could also mean user lacks s3:GetObject permission:
135 # https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectHEAD.html
136 # I don't think its possible to discern which case is it with certainty
137 if err.response["ResponseMetadata"]["HTTPStatusCode"] == 403:
138 raise PermissionError(
139 "Forbidden HEAD operation error occured. "
140 "Verify s3:ListBucket and s3:GetObject "
141 "permissions are granted for your IAM user. "
142 ) from err
143 raise
146def bucketExists(bucketName: str, client: Optional[boto3.client] = None) -> bool:
147 """Check if the S3 bucket with the given name actually exists.
149 Parameters
150 ----------
151 bucketName : `str`
152 Name of the S3 Bucket
153 client : `boto3.client`, optional
154 S3 Client object to query, if not supplied boto3 will try to resolve
155 the credentials as in order described in its manual_.
157 Returns
158 -------
159 exists : `bool`
160 True if it exists, False if no Bucket with specified parameters is
161 found.
163 .. _manual: https://boto3.amazonaws.com/v1/documentation/api/latest/guide/\
164 configuration.html#configuring-credentials
165 """
166 if boto3 is None:
167 raise ModuleNotFoundError("Could not find boto3. Are you sure it is installed?")
169 if client is None:
170 client = getS3Client()
171 try:
172 client.get_bucket_location(Bucket=bucketName)
173 return True
174 except client.exceptions.NoSuchBucket:
175 return False
178def setAwsEnvCredentials(
179 accessKeyId: str = "dummyAccessKeyId", secretAccessKey: str = "dummySecretAccessKey"
180) -> bool:
181 """Set AWS credentials environmental variables.
183 Parameters
184 ----------
185 accessKeyId : `str`
186 Value given to AWS_ACCESS_KEY_ID environmental variable. Defaults to
187 `dummyAccessKeyId`.
188 secretAccessKey : `str`
189 Value given to AWS_SECRET_ACCESS_KEY environmental variable. Defaults
190 to `dummySecretAccessKey`.
192 Returns
193 -------
194 setEnvCredentials : `bool`
195 True when environmental variables were set, False otherwise.
197 Notes
198 -----
199 If either AWS_ACCESS_KEY_ID or AWS_SECRET_ACCESS_KEY are not set, both
200 values are overwritten to ensure that the values are consistent.
201 """
202 if "AWS_ACCESS_KEY_ID" not in os.environ or "AWS_SECRET_ACCESS_KEY" not in os.environ:
203 os.environ["AWS_ACCESS_KEY_ID"] = accessKeyId
204 os.environ["AWS_SECRET_ACCESS_KEY"] = secretAccessKey
205 return True
206 return False
209def unsetAwsEnvCredentials() -> None:
210 """Unset AWS credential environment variables.
212 Unsets the AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environmental
213 variables.
214 """
215 if "AWS_ACCESS_KEY_ID" in os.environ:
216 del os.environ["AWS_ACCESS_KEY_ID"]
217 if "AWS_SECRET_ACCESS_KEY" in os.environ:
218 del os.environ["AWS_SECRET_ACCESS_KEY"]