Coverage for python/lsst/daf/butler/core/s3utils.py : 12%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22__all__ = ("getS3Client", "s3CheckFileExists", "bucketExists", "setAwsEnvCredentials",
23 "unsetAwsEnvCredentials")
25import os
27try:
28 import boto3
29except ImportError:
30 boto3 = None
32from .location import ButlerURI, Location
35def getS3Client():
36 """Create a S3 client with AWS (default) or the specified endpoint
38 Returns
39 -------
40 s3client : `botocore.client.S3`
41 A client of the S3 service.
43 Notes
44 -----
45 The endpoint URL is from the environment variable S3_ENDPOINT_URL.
46 If none is specified, the default AWS one is used.
47 """
48 if boto3 is None:
49 raise ModuleNotFoundError("Could not find boto3. "
50 "Are you sure it is installed?")
52 endpoint = os.environ.get("S3_ENDPOINT_URL", None)
53 if not endpoint:
54 endpoint = None # Handle ""
55 return boto3.client("s3", endpoint_url=endpoint)
58def s3CheckFileExists(path, bucket=None, client=None):
59 """Returns (True, filesize) if file exists in the bucket and (False, -1) if
60 the file is not found.
62 Parameters
63 ----------
64 path : `Location`, `ButlerURI` or `str`
65 Location or ButlerURI containing the bucket name and filepath.
66 bucket : `str`, optional
67 Name of the bucket in which to look. If provided, path will be assumed
68 to correspond to be relative to the given bucket.
69 client : `boto3.client`, optional
70 S3 Client object to query, if not supplied boto3 will try to resolve
71 the credentials as in order described in its manual_.
73 Returns
74 -------
75 exists : `bool`
76 True if key exists, False otherwise.
77 size : `int`
78 Size of the key, if key exists, in bytes, otherwise -1
80 Notes
81 -----
82 S3 Paths are sensitive to leading and trailing path separators.
84 .. _manual: https://boto3.amazonaws.com/v1/documentation/api/latest/guide/\
85 configuration.html#configuring-credentials
86 """
87 if boto3 is None:
88 raise ModuleNotFoundError("Could not find boto3. "
89 "Are you sure it is installed?")
91 if client is None:
92 client = getS3Client()
94 if isinstance(path, str):
95 if bucket is not None:
96 filepath = path
97 else:
98 uri = ButlerURI(path)
99 bucket = uri.netloc
100 filepath = uri.relativeToPathRoot
101 elif isinstance(path, (ButlerURI, Location)):
102 bucket = path.netloc
103 filepath = path.relativeToPathRoot
105 try:
106 obj = client.head_object(Bucket=bucket, Key=filepath)
107 return (True, obj["ContentLength"])
108 except client.exceptions.ClientError as err:
109 # resource unreachable error means key does not exist
110 if err.response["ResponseMetadata"]["HTTPStatusCode"] == 404:
111 return (False, -1)
112 # head_object returns 404 when object does not exist only when user has
113 # s3:ListBucket permission. If list permission does not exist a 403 is
114 # returned. In practical terms this generally means that the file does
115 # not exist, but it could also mean user lacks s3:GetObject permission:
116 # https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectHEAD.html
117 # I don't think its possible to discern which case is it with certainty
118 if err.response["ResponseMetadata"]["HTTPStatusCode"] == 403:
119 raise PermissionError("Forbidden HEAD operation error occured. "
120 "Verify s3:ListBucket and s3:GetObject "
121 "permissions are granted for your IAM user. ") from err
122 raise
125def bucketExists(bucketName, client=None):
126 """Check if the S3 bucket with the given name actually exists.
128 Parameters
129 ----------
130 bucketName : `str`
131 Name of the S3 Bucket
132 client : `boto3.client`, optional
133 S3 Client object to query, if not supplied boto3 will try to resolve
134 the credentials as in order described in its manual_.
136 Returns
137 -------
138 exists : `bool`
139 True if it exists, False if no Bucket with specified parameters is
140 found.
142 .. _manual: https://boto3.amazonaws.com/v1/documentation/api/latest/guide/\
143 configuration.html#configuring-credentials
144 """
145 if boto3 is None:
146 raise ModuleNotFoundError("Could not find boto3. "
147 "Are you sure it is installed?")
149 if client is None:
150 client = getS3Client()
151 try:
152 client.get_bucket_location(Bucket=bucketName)
153 return True
154 except client.exceptions.NoSuchBucket:
155 return False
158def setAwsEnvCredentials(accessKeyId='dummyAccessKeyId', secretAccessKey="dummySecretAccessKey"):
159 """Set AWS credentials environmental variables AWS_ACCESS_KEY_ID and
160 AWS_SECRET_ACCESS_KEY.
162 Parameters
163 ----------
164 accessKeyId : `str`
165 Value given to AWS_ACCESS_KEY_ID environmental variable. Defaults to
166 'dummyAccessKeyId'
167 secretAccessKey : `str`
168 Value given to AWS_SECRET_ACCESS_KEY environmental variable. Defaults
169 to 'dummySecretAccessKey'
171 Returns
172 -------
173 setEnvCredentials : `bool`
174 True when environmental variables were set, False otherwise.
176 Notes
177 -----
178 If either AWS_ACCESS_KEY_ID or AWS_SECRET_ACCESS_KEY are not set, both
179 values are overwritten.
180 """
181 if "AWS_ACCESS_KEY_ID" not in os.environ or "AWS_SECRET_ACCESS_KEY" not in os.environ:
182 os.environ["AWS_ACCESS_KEY_ID"] = accessKeyId
183 os.environ["AWS_SECRET_ACCESS_KEY"] = secretAccessKey
184 return True
185 return False
188def unsetAwsEnvCredentials():
189 """Unsets AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environmental
190 variables.
191 """
192 if "AWS_ACCESS_KEY_ID" in os.environ:
193 del os.environ["AWS_ACCESS_KEY_ID"]
194 if "AWS_SECRET_ACCESS_KEY" in os.environ:
195 del os.environ["AWS_SECRET_ACCESS_KEY"]