Coverage for python/lsst/daf/butler/core/_butlerUri/s3utils.py : 15%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ("getS3Client", "s3CheckFileExists", "bucketExists", "setAwsEnvCredentials",
25 "unsetAwsEnvCredentials")
27import os
29from typing import (
30 Optional,
31 Tuple,
32 Union,
33)
35try:
36 import boto3
37except ImportError:
38 boto3 = None
40try:
41 import botocore
42except ImportError:
43 botocore = None
45from ..location import Location
46from ._butlerUri import ButlerURI
49def getS3Client() -> boto3.client:
50 """Create a S3 client with AWS (default) or the specified endpoint.
52 Returns
53 -------
54 s3client : `botocore.client.S3`
55 A client of the S3 service.
57 Notes
58 -----
59 The endpoint URL is from the environment variable S3_ENDPOINT_URL.
60 If none is specified, the default AWS one is used.
61 """
62 if boto3 is None:
63 raise ModuleNotFoundError("Could not find boto3. "
64 "Are you sure it is installed?")
65 if botocore is None:
66 raise ModuleNotFoundError("Could not find botocore. "
67 "Are you sure it is installed?")
69 endpoint = os.environ.get("S3_ENDPOINT_URL", None)
70 if not endpoint:
71 endpoint = None # Handle ""
73 config = botocore.config.Config(
74 read_timeout=180,
75 retries={
76 'mode': 'adaptive',
77 'max_attempts': 10
78 }
79 )
81 return boto3.client("s3", endpoint_url=endpoint, config=config)
84def s3CheckFileExists(path: Union[Location, ButlerURI, str], bucket: Optional[str] = None,
85 client: Optional[boto3.client] = None) -> Tuple[bool, int]:
86 """Return if the file exists in the bucket or not.
88 Parameters
89 ----------
90 path : `Location`, `ButlerURI` or `str`
91 Location or ButlerURI containing the bucket name and filepath.
92 bucket : `str`, optional
93 Name of the bucket in which to look. If provided, path will be assumed
94 to correspond to be relative to the given bucket.
95 client : `boto3.client`, optional
96 S3 Client object to query, if not supplied boto3 will try to resolve
97 the credentials as in order described in its manual_.
99 Returns
100 -------
101 exists : `bool`
102 True if key exists, False otherwise.
103 size : `int`
104 Size of the key, if key exists, in bytes, otherwise -1.
106 Notes
107 -----
108 S3 Paths are sensitive to leading and trailing path separators.
110 .. _manual: https://boto3.amazonaws.com/v1/documentation/api/latest/guide/\
111 configuration.html#configuring-credentials
112 """
113 if boto3 is None:
114 raise ModuleNotFoundError("Could not find boto3. "
115 "Are you sure it is installed?")
117 if client is None:
118 client = getS3Client()
120 if isinstance(path, str):
121 if bucket is not None:
122 filepath = path
123 else:
124 uri = ButlerURI(path)
125 bucket = uri.netloc
126 filepath = uri.relativeToPathRoot
127 elif isinstance(path, (ButlerURI, Location)):
128 bucket = path.netloc
129 filepath = path.relativeToPathRoot
130 else:
131 raise TypeError(f"Unsupported path type: {path!r}.")
133 try:
134 obj = client.head_object(Bucket=bucket, Key=filepath)
135 return (True, obj["ContentLength"])
136 except client.exceptions.ClientError as err:
137 # resource unreachable error means key does not exist
138 if err.response["ResponseMetadata"]["HTTPStatusCode"] == 404:
139 return (False, -1)
140 # head_object returns 404 when object does not exist only when user has
141 # s3:ListBucket permission. If list permission does not exist a 403 is
142 # returned. In practical terms this generally means that the file does
143 # not exist, but it could also mean user lacks s3:GetObject permission:
144 # https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectHEAD.html
145 # I don't think its possible to discern which case is it with certainty
146 if err.response["ResponseMetadata"]["HTTPStatusCode"] == 403:
147 raise PermissionError("Forbidden HEAD operation error occured. "
148 "Verify s3:ListBucket and s3:GetObject "
149 "permissions are granted for your IAM user. ") from err
150 raise
153def bucketExists(bucketName: str, client: Optional[boto3.client] = None) -> bool:
154 """Check if the S3 bucket with the given name actually exists.
156 Parameters
157 ----------
158 bucketName : `str`
159 Name of the S3 Bucket
160 client : `boto3.client`, optional
161 S3 Client object to query, if not supplied boto3 will try to resolve
162 the credentials as in order described in its manual_.
164 Returns
165 -------
166 exists : `bool`
167 True if it exists, False if no Bucket with specified parameters is
168 found.
170 .. _manual: https://boto3.amazonaws.com/v1/documentation/api/latest/guide/\
171 configuration.html#configuring-credentials
172 """
173 if boto3 is None:
174 raise ModuleNotFoundError("Could not find boto3. "
175 "Are you sure it is installed?")
177 if client is None:
178 client = getS3Client()
179 try:
180 client.get_bucket_location(Bucket=bucketName)
181 return True
182 except client.exceptions.NoSuchBucket:
183 return False
186def setAwsEnvCredentials(accessKeyId: str = 'dummyAccessKeyId',
187 secretAccessKey: str = "dummySecretAccessKey") -> bool:
188 """Set AWS credentials environmental variables.
190 Parameters
191 ----------
192 accessKeyId : `str`
193 Value given to AWS_ACCESS_KEY_ID environmental variable. Defaults to
194 `dummyAccessKeyId`.
195 secretAccessKey : `str`
196 Value given to AWS_SECRET_ACCESS_KEY environmental variable. Defaults
197 to `dummySecretAccessKey`.
199 Returns
200 -------
201 setEnvCredentials : `bool`
202 True when environmental variables were set, False otherwise.
204 Notes
205 -----
206 If either AWS_ACCESS_KEY_ID or AWS_SECRET_ACCESS_KEY are not set, both
207 values are overwritten to ensure that the values are consistent.
208 """
209 if "AWS_ACCESS_KEY_ID" not in os.environ or "AWS_SECRET_ACCESS_KEY" not in os.environ:
210 os.environ["AWS_ACCESS_KEY_ID"] = accessKeyId
211 os.environ["AWS_SECRET_ACCESS_KEY"] = secretAccessKey
212 return True
213 return False
216def unsetAwsEnvCredentials() -> None:
217 """Unset AWS credential environment variables.
219 Unsets the AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environmental
220 variables.
221 """
222 if "AWS_ACCESS_KEY_ID" in os.environ:
223 del os.environ["AWS_ACCESS_KEY_ID"]
224 if "AWS_SECRET_ACCESS_KEY" in os.environ:
225 del os.environ["AWS_SECRET_ACCESS_KEY"]