Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("getS3Client", "s3CheckFileExists", "bucketExists", "setAwsEnvCredentials", 

25 "unsetAwsEnvCredentials") 

26 

27import os 

28 

29from typing import ( 

30 Optional, 

31 Tuple, 

32 Union, 

33) 

34 

35try: 

36 import boto3 

37except ImportError: 

38 boto3 = None 

39 

40try: 

41 import botocore 

42except ImportError: 

43 botocore = None 

44 

45from ..location import Location 

46from ._butlerUri import ButlerURI 

47 

48 

49def getS3Client() -> boto3.client: 

50 """Create a S3 client with AWS (default) or the specified endpoint. 

51 

52 Returns 

53 ------- 

54 s3client : `botocore.client.S3` 

55 A client of the S3 service. 

56 

57 Notes 

58 ----- 

59 The endpoint URL is from the environment variable S3_ENDPOINT_URL. 

60 If none is specified, the default AWS one is used. 

61 """ 

62 if boto3 is None: 

63 raise ModuleNotFoundError("Could not find boto3. " 

64 "Are you sure it is installed?") 

65 if botocore is None: 

66 raise ModuleNotFoundError("Could not find botocore. " 

67 "Are you sure it is installed?") 

68 

69 endpoint = os.environ.get("S3_ENDPOINT_URL", None) 

70 if not endpoint: 

71 endpoint = None # Handle "" 

72 

73 config = botocore.config.Config( 

74 read_timeout=180, 

75 retries={ 

76 'mode': 'adaptive', 

77 'max_attempts': 10 

78 } 

79 ) 

80 

81 return boto3.client("s3", endpoint_url=endpoint, config=config) 

82 

83 

84def s3CheckFileExists(path: Union[Location, ButlerURI, str], bucket: Optional[str] = None, 

85 client: Optional[boto3.client] = None) -> Tuple[bool, int]: 

86 """Return if the file exists in the bucket or not. 

87 

88 Parameters 

89 ---------- 

90 path : `Location`, `ButlerURI` or `str` 

91 Location or ButlerURI containing the bucket name and filepath. 

92 bucket : `str`, optional 

93 Name of the bucket in which to look. If provided, path will be assumed 

94 to correspond to be relative to the given bucket. 

95 client : `boto3.client`, optional 

96 S3 Client object to query, if not supplied boto3 will try to resolve 

97 the credentials as in order described in its manual_. 

98 

99 Returns 

100 ------- 

101 exists : `bool` 

102 True if key exists, False otherwise. 

103 size : `int` 

104 Size of the key, if key exists, in bytes, otherwise -1. 

105 

106 Notes 

107 ----- 

108 S3 Paths are sensitive to leading and trailing path separators. 

109 

110 .. _manual: https://boto3.amazonaws.com/v1/documentation/api/latest/guide/\ 

111 configuration.html#configuring-credentials 

112 """ 

113 if boto3 is None: 

114 raise ModuleNotFoundError("Could not find boto3. " 

115 "Are you sure it is installed?") 

116 

117 if client is None: 

118 client = getS3Client() 

119 

120 if isinstance(path, str): 

121 if bucket is not None: 

122 filepath = path 

123 else: 

124 uri = ButlerURI(path) 

125 bucket = uri.netloc 

126 filepath = uri.relativeToPathRoot 

127 elif isinstance(path, (ButlerURI, Location)): 

128 bucket = path.netloc 

129 filepath = path.relativeToPathRoot 

130 else: 

131 raise TypeError(f"Unsupported path type: {path!r}.") 

132 

133 try: 

134 obj = client.head_object(Bucket=bucket, Key=filepath) 

135 return (True, obj["ContentLength"]) 

136 except client.exceptions.ClientError as err: 

137 # resource unreachable error means key does not exist 

138 if err.response["ResponseMetadata"]["HTTPStatusCode"] == 404: 

139 return (False, -1) 

140 # head_object returns 404 when object does not exist only when user has 

141 # s3:ListBucket permission. If list permission does not exist a 403 is 

142 # returned. In practical terms this generally means that the file does 

143 # not exist, but it could also mean user lacks s3:GetObject permission: 

144 # https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectHEAD.html 

145 # I don't think its possible to discern which case is it with certainty 

146 if err.response["ResponseMetadata"]["HTTPStatusCode"] == 403: 

147 raise PermissionError("Forbidden HEAD operation error occured. " 

148 "Verify s3:ListBucket and s3:GetObject " 

149 "permissions are granted for your IAM user. ") from err 

150 raise 

151 

152 

153def bucketExists(bucketName: str, client: Optional[boto3.client] = None) -> bool: 

154 """Check if the S3 bucket with the given name actually exists. 

155 

156 Parameters 

157 ---------- 

158 bucketName : `str` 

159 Name of the S3 Bucket 

160 client : `boto3.client`, optional 

161 S3 Client object to query, if not supplied boto3 will try to resolve 

162 the credentials as in order described in its manual_. 

163 

164 Returns 

165 ------- 

166 exists : `bool` 

167 True if it exists, False if no Bucket with specified parameters is 

168 found. 

169 

170 .. _manual: https://boto3.amazonaws.com/v1/documentation/api/latest/guide/\ 

171 configuration.html#configuring-credentials 

172 """ 

173 if boto3 is None: 

174 raise ModuleNotFoundError("Could not find boto3. " 

175 "Are you sure it is installed?") 

176 

177 if client is None: 

178 client = getS3Client() 

179 try: 

180 client.get_bucket_location(Bucket=bucketName) 

181 return True 

182 except client.exceptions.NoSuchBucket: 

183 return False 

184 

185 

186def setAwsEnvCredentials(accessKeyId: str = 'dummyAccessKeyId', 

187 secretAccessKey: str = "dummySecretAccessKey") -> bool: 

188 """Set AWS credentials environmental variables. 

189 

190 Parameters 

191 ---------- 

192 accessKeyId : `str` 

193 Value given to AWS_ACCESS_KEY_ID environmental variable. Defaults to 

194 `dummyAccessKeyId`. 

195 secretAccessKey : `str` 

196 Value given to AWS_SECRET_ACCESS_KEY environmental variable. Defaults 

197 to `dummySecretAccessKey`. 

198 

199 Returns 

200 ------- 

201 setEnvCredentials : `bool` 

202 True when environmental variables were set, False otherwise. 

203 

204 Notes 

205 ----- 

206 If either AWS_ACCESS_KEY_ID or AWS_SECRET_ACCESS_KEY are not set, both 

207 values are overwritten to ensure that the values are consistent. 

208 """ 

209 if "AWS_ACCESS_KEY_ID" not in os.environ or "AWS_SECRET_ACCESS_KEY" not in os.environ: 

210 os.environ["AWS_ACCESS_KEY_ID"] = accessKeyId 

211 os.environ["AWS_SECRET_ACCESS_KEY"] = secretAccessKey 

212 return True 

213 return False 

214 

215 

216def unsetAwsEnvCredentials() -> None: 

217 """Unset AWS credential environment variables. 

218 

219 Unsets the AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environmental 

220 variables. 

221 """ 

222 if "AWS_ACCESS_KEY_ID" in os.environ: 

223 del os.environ["AWS_ACCESS_KEY_ID"] 

224 if "AWS_SECRET_ACCESS_KEY" in os.environ: 

225 del os.environ["AWS_SECRET_ACCESS_KEY"]