Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("getS3Client", "s3CheckFileExists", "bucketExists", "setAwsEnvCredentials", 

25 "unsetAwsEnvCredentials") 

26 

27import os 

28 

29from typing import ( 

30 Optional, 

31 Tuple, 

32 Union, 

33) 

34 

35try: 

36 import boto3 

37except ImportError: 

38 boto3 = None 

39 

40from .location import ButlerURI, Location 

41 

42 

43def getS3Client() -> boto3.client: 

44 """Create a S3 client with AWS (default) or the specified endpoint 

45 

46 Returns 

47 ------- 

48 s3client : `botocore.client.S3` 

49 A client of the S3 service. 

50 

51 Notes 

52 ----- 

53 The endpoint URL is from the environment variable S3_ENDPOINT_URL. 

54 If none is specified, the default AWS one is used. 

55 """ 

56 if boto3 is None: 

57 raise ModuleNotFoundError("Could not find boto3. " 

58 "Are you sure it is installed?") 

59 

60 endpoint = os.environ.get("S3_ENDPOINT_URL", None) 

61 if not endpoint: 

62 endpoint = None # Handle "" 

63 return boto3.client("s3", endpoint_url=endpoint) 

64 

65 

66def s3CheckFileExists(path: Union[Location, ButlerURI, str], bucket: Optional[str] = None, 

67 client: Optional[boto3.cient] = None) -> Tuple[bool, int]: 

68 """Returns (True, filesize) if file exists in the bucket and (False, -1) if 

69 the file is not found. 

70 

71 Parameters 

72 ---------- 

73 path : `Location`, `ButlerURI` or `str` 

74 Location or ButlerURI containing the bucket name and filepath. 

75 bucket : `str`, optional 

76 Name of the bucket in which to look. If provided, path will be assumed 

77 to correspond to be relative to the given bucket. 

78 client : `boto3.client`, optional 

79 S3 Client object to query, if not supplied boto3 will try to resolve 

80 the credentials as in order described in its manual_. 

81 

82 Returns 

83 ------- 

84 exists : `bool` 

85 True if key exists, False otherwise. 

86 size : `int` 

87 Size of the key, if key exists, in bytes, otherwise -1 

88 

89 Notes 

90 ----- 

91 S3 Paths are sensitive to leading and trailing path separators. 

92 

93 .. _manual: https://boto3.amazonaws.com/v1/documentation/api/latest/guide/\ 

94 configuration.html#configuring-credentials 

95 """ 

96 if boto3 is None: 

97 raise ModuleNotFoundError("Could not find boto3. " 

98 "Are you sure it is installed?") 

99 

100 if client is None: 

101 client = getS3Client() 

102 

103 if isinstance(path, str): 

104 if bucket is not None: 

105 filepath = path 

106 else: 

107 uri = ButlerURI(path) 

108 bucket = uri.netloc 

109 filepath = uri.relativeToPathRoot 

110 elif isinstance(path, (ButlerURI, Location)): 

111 bucket = path.netloc 

112 filepath = path.relativeToPathRoot 

113 

114 try: 

115 obj = client.head_object(Bucket=bucket, Key=filepath) 

116 return (True, obj["ContentLength"]) 

117 except client.exceptions.ClientError as err: 

118 # resource unreachable error means key does not exist 

119 if err.response["ResponseMetadata"]["HTTPStatusCode"] == 404: 

120 return (False, -1) 

121 # head_object returns 404 when object does not exist only when user has 

122 # s3:ListBucket permission. If list permission does not exist a 403 is 

123 # returned. In practical terms this generally means that the file does 

124 # not exist, but it could also mean user lacks s3:GetObject permission: 

125 # https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectHEAD.html 

126 # I don't think its possible to discern which case is it with certainty 

127 if err.response["ResponseMetadata"]["HTTPStatusCode"] == 403: 

128 raise PermissionError("Forbidden HEAD operation error occured. " 

129 "Verify s3:ListBucket and s3:GetObject " 

130 "permissions are granted for your IAM user. ") from err 

131 raise 

132 

133 

134def bucketExists(bucketName: str, client: Optional[boto3.client] = None) -> bool: 

135 """Check if the S3 bucket with the given name actually exists. 

136 

137 Parameters 

138 ---------- 

139 bucketName : `str` 

140 Name of the S3 Bucket 

141 client : `boto3.client`, optional 

142 S3 Client object to query, if not supplied boto3 will try to resolve 

143 the credentials as in order described in its manual_. 

144 

145 Returns 

146 ------- 

147 exists : `bool` 

148 True if it exists, False if no Bucket with specified parameters is 

149 found. 

150 

151 .. _manual: https://boto3.amazonaws.com/v1/documentation/api/latest/guide/\ 

152 configuration.html#configuring-credentials 

153 """ 

154 if boto3 is None: 

155 raise ModuleNotFoundError("Could not find boto3. " 

156 "Are you sure it is installed?") 

157 

158 if client is None: 

159 client = getS3Client() 

160 try: 

161 client.get_bucket_location(Bucket=bucketName) 

162 return True 

163 except client.exceptions.NoSuchBucket: 

164 return False 

165 

166 

167def setAwsEnvCredentials(accessKeyId: str = 'dummyAccessKeyId', 

168 secretAccessKey: str = "dummySecretAccessKey") -> bool: 

169 """Set AWS credentials environmental variables AWS_ACCESS_KEY_ID and 

170 AWS_SECRET_ACCESS_KEY. 

171 

172 Parameters 

173 ---------- 

174 accessKeyId : `str` 

175 Value given to AWS_ACCESS_KEY_ID environmental variable. Defaults to 

176 'dummyAccessKeyId' 

177 secretAccessKey : `str` 

178 Value given to AWS_SECRET_ACCESS_KEY environmental variable. Defaults 

179 to 'dummySecretAccessKey' 

180 

181 Returns 

182 ------- 

183 setEnvCredentials : `bool` 

184 True when environmental variables were set, False otherwise. 

185 

186 Notes 

187 ----- 

188 If either AWS_ACCESS_KEY_ID or AWS_SECRET_ACCESS_KEY are not set, both 

189 values are overwritten. 

190 """ 

191 if "AWS_ACCESS_KEY_ID" not in os.environ or "AWS_SECRET_ACCESS_KEY" not in os.environ: 

192 os.environ["AWS_ACCESS_KEY_ID"] = accessKeyId 

193 os.environ["AWS_SECRET_ACCESS_KEY"] = secretAccessKey 

194 return True 

195 return False 

196 

197 

198def unsetAwsEnvCredentials() -> None: 

199 """Unsets AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environmental 

200 variables. 

201 """ 

202 if "AWS_ACCESS_KEY_ID" in os.environ: 

203 del os.environ["AWS_ACCESS_KEY_ID"] 

204 if "AWS_SECRET_ACCESS_KEY" in os.environ: 

205 del os.environ["AWS_SECRET_ACCESS_KEY"]