Coverage for python/lsst/daf/butler/core/_butlerUri/s3utils.py : 15%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ("getS3Client", "s3CheckFileExists", "bucketExists", "setAwsEnvCredentials",
25 "unsetAwsEnvCredentials")
27import os
29from typing import (
30 Optional,
31 Tuple,
32 Union,
33)
35try:
36 import boto3
37except ImportError:
38 boto3 = None
40try:
41 import botocore
42except ImportError:
43 botocore = None
45from ..location import Location
46from ._butlerUri import ButlerURI
49def getS3Client() -> boto3.client:
50 """Create a S3 client with AWS (default) or the specified endpoint
52 Returns
53 -------
54 s3client : `botocore.client.S3`
55 A client of the S3 service.
57 Notes
58 -----
59 The endpoint URL is from the environment variable S3_ENDPOINT_URL.
60 If none is specified, the default AWS one is used.
61 """
62 if boto3 is None:
63 raise ModuleNotFoundError("Could not find boto3. "
64 "Are you sure it is installed?")
65 if botocore is None:
66 raise ModuleNotFoundError("Could not find botocore. "
67 "Are you sure it is installed?")
69 endpoint = os.environ.get("S3_ENDPOINT_URL", None)
70 if not endpoint:
71 endpoint = None # Handle ""
73 config = botocore.config.Config(
74 read_timeout=180,
75 retries={
76 'mode': 'adaptive',
77 'max_attempts': 10
78 }
79 )
81 return boto3.client("s3", endpoint_url=endpoint, config=config)
84def s3CheckFileExists(path: Union[Location, ButlerURI, str], bucket: Optional[str] = None,
85 client: Optional[boto3.client] = None) -> Tuple[bool, int]:
86 """Returns (True, filesize) if file exists in the bucket and (False, -1) if
87 the file is not found.
89 Parameters
90 ----------
91 path : `Location`, `ButlerURI` or `str`
92 Location or ButlerURI containing the bucket name and filepath.
93 bucket : `str`, optional
94 Name of the bucket in which to look. If provided, path will be assumed
95 to correspond to be relative to the given bucket.
96 client : `boto3.client`, optional
97 S3 Client object to query, if not supplied boto3 will try to resolve
98 the credentials as in order described in its manual_.
100 Returns
101 -------
102 exists : `bool`
103 True if key exists, False otherwise.
104 size : `int`
105 Size of the key, if key exists, in bytes, otherwise -1
107 Notes
108 -----
109 S3 Paths are sensitive to leading and trailing path separators.
111 .. _manual: https://boto3.amazonaws.com/v1/documentation/api/latest/guide/\
112 configuration.html#configuring-credentials
113 """
114 if boto3 is None:
115 raise ModuleNotFoundError("Could not find boto3. "
116 "Are you sure it is installed?")
118 if client is None:
119 client = getS3Client()
121 if isinstance(path, str):
122 if bucket is not None:
123 filepath = path
124 else:
125 uri = ButlerURI(path)
126 bucket = uri.netloc
127 filepath = uri.relativeToPathRoot
128 elif isinstance(path, (ButlerURI, Location)):
129 bucket = path.netloc
130 filepath = path.relativeToPathRoot
132 try:
133 obj = client.head_object(Bucket=bucket, Key=filepath)
134 return (True, obj["ContentLength"])
135 except client.exceptions.ClientError as err:
136 # resource unreachable error means key does not exist
137 if err.response["ResponseMetadata"]["HTTPStatusCode"] == 404:
138 return (False, -1)
139 # head_object returns 404 when object does not exist only when user has
140 # s3:ListBucket permission. If list permission does not exist a 403 is
141 # returned. In practical terms this generally means that the file does
142 # not exist, but it could also mean user lacks s3:GetObject permission:
143 # https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectHEAD.html
144 # I don't think its possible to discern which case is it with certainty
145 if err.response["ResponseMetadata"]["HTTPStatusCode"] == 403:
146 raise PermissionError("Forbidden HEAD operation error occured. "
147 "Verify s3:ListBucket and s3:GetObject "
148 "permissions are granted for your IAM user. ") from err
149 raise
152def bucketExists(bucketName: str, client: Optional[boto3.client] = None) -> bool:
153 """Check if the S3 bucket with the given name actually exists.
155 Parameters
156 ----------
157 bucketName : `str`
158 Name of the S3 Bucket
159 client : `boto3.client`, optional
160 S3 Client object to query, if not supplied boto3 will try to resolve
161 the credentials as in order described in its manual_.
163 Returns
164 -------
165 exists : `bool`
166 True if it exists, False if no Bucket with specified parameters is
167 found.
169 .. _manual: https://boto3.amazonaws.com/v1/documentation/api/latest/guide/\
170 configuration.html#configuring-credentials
171 """
172 if boto3 is None:
173 raise ModuleNotFoundError("Could not find boto3. "
174 "Are you sure it is installed?")
176 if client is None:
177 client = getS3Client()
178 try:
179 client.get_bucket_location(Bucket=bucketName)
180 return True
181 except client.exceptions.NoSuchBucket:
182 return False
185def setAwsEnvCredentials(accessKeyId: str = 'dummyAccessKeyId',
186 secretAccessKey: str = "dummySecretAccessKey") -> bool:
187 """Set AWS credentials environmental variables AWS_ACCESS_KEY_ID and
188 AWS_SECRET_ACCESS_KEY.
190 Parameters
191 ----------
192 accessKeyId : `str`
193 Value given to AWS_ACCESS_KEY_ID environmental variable. Defaults to
194 'dummyAccessKeyId'
195 secretAccessKey : `str`
196 Value given to AWS_SECRET_ACCESS_KEY environmental variable. Defaults
197 to 'dummySecretAccessKey'
199 Returns
200 -------
201 setEnvCredentials : `bool`
202 True when environmental variables were set, False otherwise.
204 Notes
205 -----
206 If either AWS_ACCESS_KEY_ID or AWS_SECRET_ACCESS_KEY are not set, both
207 values are overwritten.
208 """
209 if "AWS_ACCESS_KEY_ID" not in os.environ or "AWS_SECRET_ACCESS_KEY" not in os.environ:
210 os.environ["AWS_ACCESS_KEY_ID"] = accessKeyId
211 os.environ["AWS_SECRET_ACCESS_KEY"] = secretAccessKey
212 return True
213 return False
216def unsetAwsEnvCredentials() -> None:
217 """Unsets AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environmental
218 variables.
219 """
220 if "AWS_ACCESS_KEY_ID" in os.environ:
221 del os.environ["AWS_ACCESS_KEY_ID"]
222 if "AWS_SECRET_ACCESS_KEY" in os.environ:
223 del os.environ["AWS_SECRET_ACCESS_KEY"]