Coverage for python/lsst/daf/butler/core/s3utils.py : 14%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ("getS3Client", "s3CheckFileExists", "bucketExists", "setAwsEnvCredentials",
25 "unsetAwsEnvCredentials")
27import os
29from typing import (
30 Optional,
31 Tuple,
32 Union,
33)
35try:
36 import boto3
37except ImportError:
38 boto3 = None
40from .location import ButlerURI, Location
43def getS3Client() -> boto3.client:
44 """Create a S3 client with AWS (default) or the specified endpoint
46 Returns
47 -------
48 s3client : `botocore.client.S3`
49 A client of the S3 service.
51 Notes
52 -----
53 The endpoint URL is from the environment variable S3_ENDPOINT_URL.
54 If none is specified, the default AWS one is used.
55 """
56 if boto3 is None:
57 raise ModuleNotFoundError("Could not find boto3. "
58 "Are you sure it is installed?")
60 endpoint = os.environ.get("S3_ENDPOINT_URL", None)
61 if not endpoint:
62 endpoint = None # Handle ""
63 return boto3.client("s3", endpoint_url=endpoint)
66def s3CheckFileExists(path: Union[Location, ButlerURI, str], bucket: Optional[str] = None,
67 client: Optional[boto3.cient] = None) -> Tuple[bool, int]:
68 """Returns (True, filesize) if file exists in the bucket and (False, -1) if
69 the file is not found.
71 Parameters
72 ----------
73 path : `Location`, `ButlerURI` or `str`
74 Location or ButlerURI containing the bucket name and filepath.
75 bucket : `str`, optional
76 Name of the bucket in which to look. If provided, path will be assumed
77 to correspond to be relative to the given bucket.
78 client : `boto3.client`, optional
79 S3 Client object to query, if not supplied boto3 will try to resolve
80 the credentials as in order described in its manual_.
82 Returns
83 -------
84 exists : `bool`
85 True if key exists, False otherwise.
86 size : `int`
87 Size of the key, if key exists, in bytes, otherwise -1
89 Notes
90 -----
91 S3 Paths are sensitive to leading and trailing path separators.
93 .. _manual: https://boto3.amazonaws.com/v1/documentation/api/latest/guide/\
94 configuration.html#configuring-credentials
95 """
96 if boto3 is None:
97 raise ModuleNotFoundError("Could not find boto3. "
98 "Are you sure it is installed?")
100 if client is None:
101 client = getS3Client()
103 if isinstance(path, str):
104 if bucket is not None:
105 filepath = path
106 else:
107 uri = ButlerURI(path)
108 bucket = uri.netloc
109 filepath = uri.relativeToPathRoot
110 elif isinstance(path, (ButlerURI, Location)):
111 bucket = path.netloc
112 filepath = path.relativeToPathRoot
114 try:
115 obj = client.head_object(Bucket=bucket, Key=filepath)
116 return (True, obj["ContentLength"])
117 except client.exceptions.ClientError as err:
118 # resource unreachable error means key does not exist
119 if err.response["ResponseMetadata"]["HTTPStatusCode"] == 404:
120 return (False, -1)
121 # head_object returns 404 when object does not exist only when user has
122 # s3:ListBucket permission. If list permission does not exist a 403 is
123 # returned. In practical terms this generally means that the file does
124 # not exist, but it could also mean user lacks s3:GetObject permission:
125 # https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectHEAD.html
126 # I don't think its possible to discern which case is it with certainty
127 if err.response["ResponseMetadata"]["HTTPStatusCode"] == 403:
128 raise PermissionError("Forbidden HEAD operation error occured. "
129 "Verify s3:ListBucket and s3:GetObject "
130 "permissions are granted for your IAM user. ") from err
131 raise
134def bucketExists(bucketName: str, client: Optional[boto3.client] = None) -> bool:
135 """Check if the S3 bucket with the given name actually exists.
137 Parameters
138 ----------
139 bucketName : `str`
140 Name of the S3 Bucket
141 client : `boto3.client`, optional
142 S3 Client object to query, if not supplied boto3 will try to resolve
143 the credentials as in order described in its manual_.
145 Returns
146 -------
147 exists : `bool`
148 True if it exists, False if no Bucket with specified parameters is
149 found.
151 .. _manual: https://boto3.amazonaws.com/v1/documentation/api/latest/guide/\
152 configuration.html#configuring-credentials
153 """
154 if boto3 is None:
155 raise ModuleNotFoundError("Could not find boto3. "
156 "Are you sure it is installed?")
158 if client is None:
159 client = getS3Client()
160 try:
161 client.get_bucket_location(Bucket=bucketName)
162 return True
163 except client.exceptions.NoSuchBucket:
164 return False
167def setAwsEnvCredentials(accessKeyId: str = 'dummyAccessKeyId',
168 secretAccessKey: str = "dummySecretAccessKey") -> bool:
169 """Set AWS credentials environmental variables AWS_ACCESS_KEY_ID and
170 AWS_SECRET_ACCESS_KEY.
172 Parameters
173 ----------
174 accessKeyId : `str`
175 Value given to AWS_ACCESS_KEY_ID environmental variable. Defaults to
176 'dummyAccessKeyId'
177 secretAccessKey : `str`
178 Value given to AWS_SECRET_ACCESS_KEY environmental variable. Defaults
179 to 'dummySecretAccessKey'
181 Returns
182 -------
183 setEnvCredentials : `bool`
184 True when environmental variables were set, False otherwise.
186 Notes
187 -----
188 If either AWS_ACCESS_KEY_ID or AWS_SECRET_ACCESS_KEY are not set, both
189 values are overwritten.
190 """
191 if "AWS_ACCESS_KEY_ID" not in os.environ or "AWS_SECRET_ACCESS_KEY" not in os.environ:
192 os.environ["AWS_ACCESS_KEY_ID"] = accessKeyId
193 os.environ["AWS_SECRET_ACCESS_KEY"] = secretAccessKey
194 return True
195 return False
198def unsetAwsEnvCredentials() -> None:
199 """Unsets AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environmental
200 variables.
201 """
202 if "AWS_ACCESS_KEY_ID" in os.environ:
203 del os.environ["AWS_ACCESS_KEY_ID"]
204 if "AWS_SECRET_ACCESS_KEY" in os.environ:
205 del os.environ["AWS_SECRET_ACCESS_KEY"]