Coverage for python/lsst/daf/butler/core/_butlerUri/s3utils.py: 17%

73 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2022-12-01 19:55 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("getS3Client", "s3CheckFileExists", "bucketExists", "setAwsEnvCredentials", 

25 "unsetAwsEnvCredentials") 

26 

27import functools 

28import os 

29 

30from typing import ( 

31 Optional, 

32 Tuple, 

33 Union, 

34) 

35 

36try: 

37 import boto3 

38except ImportError: 

39 boto3 = None 

40 

41try: 

42 import botocore 

43except ImportError: 

44 botocore = None 

45 

46from ..location import Location 

47from ._butlerUri import ButlerURI 

48 

49 

50def getS3Client() -> boto3.client: 

51 """Create a S3 client with AWS (default) or the specified endpoint. 

52 

53 Returns 

54 ------- 

55 s3client : `botocore.client.S3` 

56 A client of the S3 service. 

57 

58 Notes 

59 ----- 

60 The endpoint URL is from the environment variable S3_ENDPOINT_URL. 

61 If none is specified, the default AWS one is used. 

62 """ 

63 if boto3 is None: 

64 raise ModuleNotFoundError("Could not find boto3. " 

65 "Are you sure it is installed?") 

66 if botocore is None: 

67 raise ModuleNotFoundError("Could not find botocore. " 

68 "Are you sure it is installed?") 

69 

70 endpoint = os.environ.get("S3_ENDPOINT_URL", None) 

71 if not endpoint: 

72 endpoint = None # Handle "" 

73 

74 return _get_s3_client(endpoint) 

75 

76 

77@functools.lru_cache() 

78def _get_s3_client(endpoint: str) -> boto3.client: 

79 # Helper function to cache the client for this endpoint 

80 config = botocore.config.Config( 

81 read_timeout=180, 

82 retries={ 

83 'mode': 'adaptive', 

84 'max_attempts': 10 

85 } 

86 ) 

87 

88 return boto3.client("s3", endpoint_url=endpoint, config=config) 

89 

90 

91def s3CheckFileExists(path: Union[Location, ButlerURI, str], bucket: Optional[str] = None, 

92 client: Optional[boto3.client] = None) -> Tuple[bool, int]: 

93 """Return if the file exists in the bucket or not. 

94 

95 Parameters 

96 ---------- 

97 path : `Location`, `ButlerURI` or `str` 

98 Location or ButlerURI containing the bucket name and filepath. 

99 bucket : `str`, optional 

100 Name of the bucket in which to look. If provided, path will be assumed 

101 to correspond to be relative to the given bucket. 

102 client : `boto3.client`, optional 

103 S3 Client object to query, if not supplied boto3 will try to resolve 

104 the credentials as in order described in its manual_. 

105 

106 Returns 

107 ------- 

108 exists : `bool` 

109 True if key exists, False otherwise. 

110 size : `int` 

111 Size of the key, if key exists, in bytes, otherwise -1. 

112 

113 Notes 

114 ----- 

115 S3 Paths are sensitive to leading and trailing path separators. 

116 

117 .. _manual: https://boto3.amazonaws.com/v1/documentation/api/latest/guide/\ 

118 configuration.html#configuring-credentials 

119 """ 

120 if boto3 is None: 

121 raise ModuleNotFoundError("Could not find boto3. " 

122 "Are you sure it is installed?") 

123 

124 if client is None: 

125 client = getS3Client() 

126 

127 if isinstance(path, str): 

128 if bucket is not None: 

129 filepath = path 

130 else: 

131 uri = ButlerURI(path) 

132 bucket = uri.netloc 

133 filepath = uri.relativeToPathRoot 

134 elif isinstance(path, (ButlerURI, Location)): 

135 bucket = path.netloc 

136 filepath = path.relativeToPathRoot 

137 else: 

138 raise TypeError(f"Unsupported path type: {path!r}.") 

139 

140 try: 

141 obj = client.head_object(Bucket=bucket, Key=filepath) 

142 return (True, obj["ContentLength"]) 

143 except client.exceptions.ClientError as err: 

144 # resource unreachable error means key does not exist 

145 if err.response["ResponseMetadata"]["HTTPStatusCode"] == 404: 

146 return (False, -1) 

147 # head_object returns 404 when object does not exist only when user has 

148 # s3:ListBucket permission. If list permission does not exist a 403 is 

149 # returned. In practical terms this generally means that the file does 

150 # not exist, but it could also mean user lacks s3:GetObject permission: 

151 # https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectHEAD.html 

152 # I don't think its possible to discern which case is it with certainty 

153 if err.response["ResponseMetadata"]["HTTPStatusCode"] == 403: 

154 raise PermissionError("Forbidden HEAD operation error occured. " 

155 "Verify s3:ListBucket and s3:GetObject " 

156 "permissions are granted for your IAM user. ") from err 

157 raise 

158 

159 

160def bucketExists(bucketName: str, client: Optional[boto3.client] = None) -> bool: 

161 """Check if the S3 bucket with the given name actually exists. 

162 

163 Parameters 

164 ---------- 

165 bucketName : `str` 

166 Name of the S3 Bucket 

167 client : `boto3.client`, optional 

168 S3 Client object to query, if not supplied boto3 will try to resolve 

169 the credentials as in order described in its manual_. 

170 

171 Returns 

172 ------- 

173 exists : `bool` 

174 True if it exists, False if no Bucket with specified parameters is 

175 found. 

176 

177 .. _manual: https://boto3.amazonaws.com/v1/documentation/api/latest/guide/\ 

178 configuration.html#configuring-credentials 

179 """ 

180 if boto3 is None: 

181 raise ModuleNotFoundError("Could not find boto3. " 

182 "Are you sure it is installed?") 

183 

184 if client is None: 

185 client = getS3Client() 

186 try: 

187 client.get_bucket_location(Bucket=bucketName) 

188 return True 

189 except client.exceptions.NoSuchBucket: 

190 return False 

191 

192 

193def setAwsEnvCredentials(accessKeyId: str = 'dummyAccessKeyId', 

194 secretAccessKey: str = "dummySecretAccessKey") -> bool: 

195 """Set AWS credentials environmental variables. 

196 

197 Parameters 

198 ---------- 

199 accessKeyId : `str` 

200 Value given to AWS_ACCESS_KEY_ID environmental variable. Defaults to 

201 `dummyAccessKeyId`. 

202 secretAccessKey : `str` 

203 Value given to AWS_SECRET_ACCESS_KEY environmental variable. Defaults 

204 to `dummySecretAccessKey`. 

205 

206 Returns 

207 ------- 

208 setEnvCredentials : `bool` 

209 True when environmental variables were set, False otherwise. 

210 

211 Notes 

212 ----- 

213 If either AWS_ACCESS_KEY_ID or AWS_SECRET_ACCESS_KEY are not set, both 

214 values are overwritten to ensure that the values are consistent. 

215 """ 

216 if "AWS_ACCESS_KEY_ID" not in os.environ or "AWS_SECRET_ACCESS_KEY" not in os.environ: 

217 os.environ["AWS_ACCESS_KEY_ID"] = accessKeyId 

218 os.environ["AWS_SECRET_ACCESS_KEY"] = secretAccessKey 

219 return True 

220 return False 

221 

222 

223def unsetAwsEnvCredentials() -> None: 

224 """Unset AWS credential environment variables. 

225 

226 Unsets the AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environmental 

227 variables. 

228 """ 

229 if "AWS_ACCESS_KEY_ID" in os.environ: 

230 del os.environ["AWS_ACCESS_KEY_ID"] 

231 if "AWS_SECRET_ACCESS_KEY" in os.environ: 

232 del os.environ["AWS_SECRET_ACCESS_KEY"]