Coverage for python/lsst/daf/butler/core/s3utils.py : 14%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ("getS3Client", "s3CheckFileExists", "bucketExists", "setAwsEnvCredentials",
25 "unsetAwsEnvCredentials")
27import os
29from typing import (
30 Optional,
31 Tuple,
32 Union,
33)
35try:
36 import boto3
37except ImportError:
38 boto3 = None
40try:
41 import botocore
42except ImportError:
43 botocore = None
45from .location import ButlerURI, Location
48def getS3Client() -> boto3.client:
49 """Create a S3 client with AWS (default) or the specified endpoint
51 Returns
52 -------
53 s3client : `botocore.client.S3`
54 A client of the S3 service.
56 Notes
57 -----
58 The endpoint URL is from the environment variable S3_ENDPOINT_URL.
59 If none is specified, the default AWS one is used.
60 """
61 if boto3 is None:
62 raise ModuleNotFoundError("Could not find boto3. "
63 "Are you sure it is installed?")
64 if botocore is None:
65 raise ModuleNotFoundError("Could not find botocore. "
66 "Are you sure it is installed?")
68 endpoint = os.environ.get("S3_ENDPOINT_URL", None)
69 if not endpoint:
70 endpoint = None # Handle ""
72 config = botocore.config.Config(
73 read_timeout=180,
74 retries={
75 'mode': 'adaptive',
76 'max_attempts': 10
77 }
78 )
80 return boto3.client("s3", endpoint_url=endpoint, config=config)
83def s3CheckFileExists(path: Union[Location, ButlerURI, str], bucket: Optional[str] = None,
84 client: Optional[boto3.client] = None) -> Tuple[bool, int]:
85 """Returns (True, filesize) if file exists in the bucket and (False, -1) if
86 the file is not found.
88 Parameters
89 ----------
90 path : `Location`, `ButlerURI` or `str`
91 Location or ButlerURI containing the bucket name and filepath.
92 bucket : `str`, optional
93 Name of the bucket in which to look. If provided, path will be assumed
94 to correspond to be relative to the given bucket.
95 client : `boto3.client`, optional
96 S3 Client object to query, if not supplied boto3 will try to resolve
97 the credentials as in order described in its manual_.
99 Returns
100 -------
101 exists : `bool`
102 True if key exists, False otherwise.
103 size : `int`
104 Size of the key, if key exists, in bytes, otherwise -1
106 Notes
107 -----
108 S3 Paths are sensitive to leading and trailing path separators.
110 .. _manual: https://boto3.amazonaws.com/v1/documentation/api/latest/guide/\
111 configuration.html#configuring-credentials
112 """
113 if boto3 is None:
114 raise ModuleNotFoundError("Could not find boto3. "
115 "Are you sure it is installed?")
117 if client is None:
118 client = getS3Client()
120 if isinstance(path, str):
121 if bucket is not None:
122 filepath = path
123 else:
124 uri = ButlerURI(path)
125 bucket = uri.netloc
126 filepath = uri.relativeToPathRoot
127 elif isinstance(path, (ButlerURI, Location)):
128 bucket = path.netloc
129 filepath = path.relativeToPathRoot
131 try:
132 obj = client.head_object(Bucket=bucket, Key=filepath)
133 return (True, obj["ContentLength"])
134 except client.exceptions.ClientError as err:
135 # resource unreachable error means key does not exist
136 if err.response["ResponseMetadata"]["HTTPStatusCode"] == 404:
137 return (False, -1)
138 # head_object returns 404 when object does not exist only when user has
139 # s3:ListBucket permission. If list permission does not exist a 403 is
140 # returned. In practical terms this generally means that the file does
141 # not exist, but it could also mean user lacks s3:GetObject permission:
142 # https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectHEAD.html
143 # I don't think its possible to discern which case is it with certainty
144 if err.response["ResponseMetadata"]["HTTPStatusCode"] == 403:
145 raise PermissionError("Forbidden HEAD operation error occured. "
146 "Verify s3:ListBucket and s3:GetObject "
147 "permissions are granted for your IAM user. ") from err
148 raise
151def bucketExists(bucketName: str, client: Optional[boto3.client] = None) -> bool:
152 """Check if the S3 bucket with the given name actually exists.
154 Parameters
155 ----------
156 bucketName : `str`
157 Name of the S3 Bucket
158 client : `boto3.client`, optional
159 S3 Client object to query, if not supplied boto3 will try to resolve
160 the credentials as in order described in its manual_.
162 Returns
163 -------
164 exists : `bool`
165 True if it exists, False if no Bucket with specified parameters is
166 found.
168 .. _manual: https://boto3.amazonaws.com/v1/documentation/api/latest/guide/\
169 configuration.html#configuring-credentials
170 """
171 if boto3 is None:
172 raise ModuleNotFoundError("Could not find boto3. "
173 "Are you sure it is installed?")
175 if client is None:
176 client = getS3Client()
177 try:
178 client.get_bucket_location(Bucket=bucketName)
179 return True
180 except client.exceptions.NoSuchBucket:
181 return False
184def setAwsEnvCredentials(accessKeyId: str = 'dummyAccessKeyId',
185 secretAccessKey: str = "dummySecretAccessKey") -> bool:
186 """Set AWS credentials environmental variables AWS_ACCESS_KEY_ID and
187 AWS_SECRET_ACCESS_KEY.
189 Parameters
190 ----------
191 accessKeyId : `str`
192 Value given to AWS_ACCESS_KEY_ID environmental variable. Defaults to
193 'dummyAccessKeyId'
194 secretAccessKey : `str`
195 Value given to AWS_SECRET_ACCESS_KEY environmental variable. Defaults
196 to 'dummySecretAccessKey'
198 Returns
199 -------
200 setEnvCredentials : `bool`
201 True when environmental variables were set, False otherwise.
203 Notes
204 -----
205 If either AWS_ACCESS_KEY_ID or AWS_SECRET_ACCESS_KEY are not set, both
206 values are overwritten.
207 """
208 if "AWS_ACCESS_KEY_ID" not in os.environ or "AWS_SECRET_ACCESS_KEY" not in os.environ:
209 os.environ["AWS_ACCESS_KEY_ID"] = accessKeyId
210 os.environ["AWS_SECRET_ACCESS_KEY"] = secretAccessKey
211 return True
212 return False
215def unsetAwsEnvCredentials() -> None:
216 """Unsets AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environmental
217 variables.
218 """
219 if "AWS_ACCESS_KEY_ID" in os.environ:
220 del os.environ["AWS_ACCESS_KEY_ID"]
221 if "AWS_SECRET_ACCESS_KEY" in os.environ:
222 del os.environ["AWS_SECRET_ACCESS_KEY"]