Coverage for python/lsst/daf/butler/core/_butlerUri/s3utils.py: 17%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ("getS3Client", "s3CheckFileExists", "bucketExists", "setAwsEnvCredentials",
25 "unsetAwsEnvCredentials")
27import functools
28import os
30from typing import (
31 Optional,
32 Tuple,
33 Union,
34)
36try:
37 import boto3
38except ImportError:
39 boto3 = None
41try:
42 import botocore
43except ImportError:
44 botocore = None
46from ..location import Location
47from ._butlerUri import ButlerURI
50def getS3Client() -> boto3.client:
51 """Create a S3 client with AWS (default) or the specified endpoint.
53 Returns
54 -------
55 s3client : `botocore.client.S3`
56 A client of the S3 service.
58 Notes
59 -----
60 The endpoint URL is from the environment variable S3_ENDPOINT_URL.
61 If none is specified, the default AWS one is used.
62 """
63 if boto3 is None:
64 raise ModuleNotFoundError("Could not find boto3. "
65 "Are you sure it is installed?")
66 if botocore is None:
67 raise ModuleNotFoundError("Could not find botocore. "
68 "Are you sure it is installed?")
70 endpoint = os.environ.get("S3_ENDPOINT_URL", None)
71 if not endpoint:
72 endpoint = None # Handle ""
74 return _get_s3_client(endpoint)
77@functools.lru_cache()
78def _get_s3_client(endpoint: str) -> boto3.client:
79 # Helper function to cache the client for this endpoint
80 config = botocore.config.Config(
81 read_timeout=180,
82 retries={
83 'mode': 'adaptive',
84 'max_attempts': 10
85 }
86 )
88 return boto3.client("s3", endpoint_url=endpoint, config=config)
91def s3CheckFileExists(path: Union[Location, ButlerURI, str], bucket: Optional[str] = None,
92 client: Optional[boto3.client] = None) -> Tuple[bool, int]:
93 """Return if the file exists in the bucket or not.
95 Parameters
96 ----------
97 path : `Location`, `ButlerURI` or `str`
98 Location or ButlerURI containing the bucket name and filepath.
99 bucket : `str`, optional
100 Name of the bucket in which to look. If provided, path will be assumed
101 to correspond to be relative to the given bucket.
102 client : `boto3.client`, optional
103 S3 Client object to query, if not supplied boto3 will try to resolve
104 the credentials as in order described in its manual_.
106 Returns
107 -------
108 exists : `bool`
109 True if key exists, False otherwise.
110 size : `int`
111 Size of the key, if key exists, in bytes, otherwise -1.
113 Notes
114 -----
115 S3 Paths are sensitive to leading and trailing path separators.
117 .. _manual: https://boto3.amazonaws.com/v1/documentation/api/latest/guide/\
118 configuration.html#configuring-credentials
119 """
120 if boto3 is None:
121 raise ModuleNotFoundError("Could not find boto3. "
122 "Are you sure it is installed?")
124 if client is None:
125 client = getS3Client()
127 if isinstance(path, str):
128 if bucket is not None:
129 filepath = path
130 else:
131 uri = ButlerURI(path)
132 bucket = uri.netloc
133 filepath = uri.relativeToPathRoot
134 elif isinstance(path, (ButlerURI, Location)):
135 bucket = path.netloc
136 filepath = path.relativeToPathRoot
137 else:
138 raise TypeError(f"Unsupported path type: {path!r}.")
140 try:
141 obj = client.head_object(Bucket=bucket, Key=filepath)
142 return (True, obj["ContentLength"])
143 except client.exceptions.ClientError as err:
144 # resource unreachable error means key does not exist
145 if err.response["ResponseMetadata"]["HTTPStatusCode"] == 404:
146 return (False, -1)
147 # head_object returns 404 when object does not exist only when user has
148 # s3:ListBucket permission. If list permission does not exist a 403 is
149 # returned. In practical terms this generally means that the file does
150 # not exist, but it could also mean user lacks s3:GetObject permission:
151 # https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectHEAD.html
152 # I don't think its possible to discern which case is it with certainty
153 if err.response["ResponseMetadata"]["HTTPStatusCode"] == 403:
154 raise PermissionError("Forbidden HEAD operation error occured. "
155 "Verify s3:ListBucket and s3:GetObject "
156 "permissions are granted for your IAM user. ") from err
157 raise
160def bucketExists(bucketName: str, client: Optional[boto3.client] = None) -> bool:
161 """Check if the S3 bucket with the given name actually exists.
163 Parameters
164 ----------
165 bucketName : `str`
166 Name of the S3 Bucket
167 client : `boto3.client`, optional
168 S3 Client object to query, if not supplied boto3 will try to resolve
169 the credentials as in order described in its manual_.
171 Returns
172 -------
173 exists : `bool`
174 True if it exists, False if no Bucket with specified parameters is
175 found.
177 .. _manual: https://boto3.amazonaws.com/v1/documentation/api/latest/guide/\
178 configuration.html#configuring-credentials
179 """
180 if boto3 is None:
181 raise ModuleNotFoundError("Could not find boto3. "
182 "Are you sure it is installed?")
184 if client is None:
185 client = getS3Client()
186 try:
187 client.get_bucket_location(Bucket=bucketName)
188 return True
189 except client.exceptions.NoSuchBucket:
190 return False
193def setAwsEnvCredentials(accessKeyId: str = 'dummyAccessKeyId',
194 secretAccessKey: str = "dummySecretAccessKey") -> bool:
195 """Set AWS credentials environmental variables.
197 Parameters
198 ----------
199 accessKeyId : `str`
200 Value given to AWS_ACCESS_KEY_ID environmental variable. Defaults to
201 `dummyAccessKeyId`.
202 secretAccessKey : `str`
203 Value given to AWS_SECRET_ACCESS_KEY environmental variable. Defaults
204 to `dummySecretAccessKey`.
206 Returns
207 -------
208 setEnvCredentials : `bool`
209 True when environmental variables were set, False otherwise.
211 Notes
212 -----
213 If either AWS_ACCESS_KEY_ID or AWS_SECRET_ACCESS_KEY are not set, both
214 values are overwritten to ensure that the values are consistent.
215 """
216 if "AWS_ACCESS_KEY_ID" not in os.environ or "AWS_SECRET_ACCESS_KEY" not in os.environ:
217 os.environ["AWS_ACCESS_KEY_ID"] = accessKeyId
218 os.environ["AWS_SECRET_ACCESS_KEY"] = secretAccessKey
219 return True
220 return False
223def unsetAwsEnvCredentials() -> None:
224 """Unset AWS credential environment variables.
226 Unsets the AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environmental
227 variables.
228 """
229 if "AWS_ACCESS_KEY_ID" in os.environ:
230 del os.environ["AWS_ACCESS_KEY_ID"]
231 if "AWS_SECRET_ACCESS_KEY" in os.environ:
232 del os.environ["AWS_SECRET_ACCESS_KEY"]