Coverage for python/lsst/resources/s3utils.py: 17%

73 statements  

« prev     ^ index     » next       coverage.py v6.4.2, created at 2022-08-04 02:17 -0700

1# This file is part of lsst-resources. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# Use of this source code is governed by a 3-clause BSD-style 

10# license that can be found in the LICENSE file. 

11 

12from __future__ import annotations 

13 

14__all__ = ( 

15 "getS3Client", 

16 "s3CheckFileExists", 

17 "bucketExists", 

18 "setAwsEnvCredentials", 

19 "unsetAwsEnvCredentials", 

20) 

21 

22import functools 

23import os 

24from typing import Optional, Tuple, Union 

25 

26try: 

27 import boto3 

28except ImportError: 

29 boto3 = None 

30 

31try: 

32 import botocore 

33except ImportError: 

34 botocore = None 

35 

36from ._resourcePath import ResourcePath 

37from .location import Location 

38 

39 

40def getS3Client() -> boto3.client: 

41 """Create a S3 client with AWS (default) or the specified endpoint. 

42 

43 Returns 

44 ------- 

45 s3client : `botocore.client.S3` 

46 A client of the S3 service. 

47 

48 Notes 

49 ----- 

50 The endpoint URL is from the environment variable S3_ENDPOINT_URL. 

51 If none is specified, the default AWS one is used. 

52 """ 

53 if boto3 is None: 

54 raise ModuleNotFoundError("Could not find boto3. Are you sure it is installed?") 

55 if botocore is None: 

56 raise ModuleNotFoundError("Could not find botocore. Are you sure it is installed?") 

57 

58 endpoint = os.environ.get("S3_ENDPOINT_URL", None) 

59 if not endpoint: 

60 endpoint = None # Handle "" 

61 

62 return _get_s3_client(endpoint) 

63 

64 

65@functools.lru_cache() 

66def _get_s3_client(endpoint: str) -> boto3.client: 

67 # Helper function to cache the client for this endpoint 

68 config = botocore.config.Config(read_timeout=180, retries={"mode": "adaptive", "max_attempts": 10}) 

69 

70 return boto3.client("s3", endpoint_url=endpoint, config=config) 

71 

72 

73def s3CheckFileExists( 

74 path: Union[Location, ResourcePath, str], 

75 bucket: Optional[str] = None, 

76 client: Optional[boto3.client] = None, 

77) -> Tuple[bool, int]: 

78 """Return if the file exists in the bucket or not. 

79 

80 Parameters 

81 ---------- 

82 path : `Location`, `ResourcePath` or `str` 

83 Location or ResourcePath containing the bucket name and filepath. 

84 bucket : `str`, optional 

85 Name of the bucket in which to look. If provided, path will be assumed 

86 to correspond to be relative to the given bucket. 

87 client : `boto3.client`, optional 

88 S3 Client object to query, if not supplied boto3 will try to resolve 

89 the credentials as in order described in its manual_. 

90 

91 Returns 

92 ------- 

93 exists : `bool` 

94 True if key exists, False otherwise. 

95 size : `int` 

96 Size of the key, if key exists, in bytes, otherwise -1. 

97 

98 Notes 

99 ----- 

100 S3 Paths are sensitive to leading and trailing path separators. 

101 

102 .. _manual: https://boto3.amazonaws.com/v1/documentation/api/latest/guide/\ 

103 configuration.html#configuring-credentials 

104 """ 

105 if boto3 is None: 

106 raise ModuleNotFoundError("Could not find boto3. Are you sure it is installed?") 

107 

108 if client is None: 

109 client = getS3Client() 

110 

111 if isinstance(path, str): 

112 if bucket is not None: 

113 filepath = path 

114 else: 

115 uri = ResourcePath(path) 

116 bucket = uri.netloc 

117 filepath = uri.relativeToPathRoot 

118 elif isinstance(path, (ResourcePath, Location)): 

119 bucket = path.netloc 

120 filepath = path.relativeToPathRoot 

121 else: 

122 raise TypeError(f"Unsupported path type: {path!r}.") 

123 

124 try: 

125 obj = client.head_object(Bucket=bucket, Key=filepath) 

126 return (True, obj["ContentLength"]) 

127 except client.exceptions.ClientError as err: 

128 # resource unreachable error means key does not exist 

129 if err.response["ResponseMetadata"]["HTTPStatusCode"] == 404: 

130 return (False, -1) 

131 # head_object returns 404 when object does not exist only when user has 

132 # s3:ListBucket permission. If list permission does not exist a 403 is 

133 # returned. In practical terms this generally means that the file does 

134 # not exist, but it could also mean user lacks s3:GetObject permission: 

135 # https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectHEAD.html 

136 # I don't think its possible to discern which case is it with certainty 

137 if err.response["ResponseMetadata"]["HTTPStatusCode"] == 403: 

138 raise PermissionError( 

139 "Forbidden HEAD operation error occured. " 

140 "Verify s3:ListBucket and s3:GetObject " 

141 "permissions are granted for your IAM user. " 

142 ) from err 

143 raise 

144 

145 

146def bucketExists(bucketName: str, client: Optional[boto3.client] = None) -> bool: 

147 """Check if the S3 bucket with the given name actually exists. 

148 

149 Parameters 

150 ---------- 

151 bucketName : `str` 

152 Name of the S3 Bucket 

153 client : `boto3.client`, optional 

154 S3 Client object to query, if not supplied boto3 will try to resolve 

155 the credentials as in order described in its manual_. 

156 

157 Returns 

158 ------- 

159 exists : `bool` 

160 True if it exists, False if no Bucket with specified parameters is 

161 found. 

162 

163 .. _manual: https://boto3.amazonaws.com/v1/documentation/api/latest/guide/\ 

164 configuration.html#configuring-credentials 

165 """ 

166 if boto3 is None: 

167 raise ModuleNotFoundError("Could not find boto3. Are you sure it is installed?") 

168 

169 if client is None: 

170 client = getS3Client() 

171 try: 

172 client.get_bucket_location(Bucket=bucketName) 

173 return True 

174 except client.exceptions.NoSuchBucket: 

175 return False 

176 

177 

178def setAwsEnvCredentials( 

179 accessKeyId: str = "dummyAccessKeyId", secretAccessKey: str = "dummySecretAccessKey" 

180) -> bool: 

181 """Set AWS credentials environmental variables. 

182 

183 Parameters 

184 ---------- 

185 accessKeyId : `str` 

186 Value given to AWS_ACCESS_KEY_ID environmental variable. Defaults to 

187 `dummyAccessKeyId`. 

188 secretAccessKey : `str` 

189 Value given to AWS_SECRET_ACCESS_KEY environmental variable. Defaults 

190 to `dummySecretAccessKey`. 

191 

192 Returns 

193 ------- 

194 setEnvCredentials : `bool` 

195 True when environmental variables were set, False otherwise. 

196 

197 Notes 

198 ----- 

199 If either AWS_ACCESS_KEY_ID or AWS_SECRET_ACCESS_KEY are not set, both 

200 values are overwritten to ensure that the values are consistent. 

201 """ 

202 if "AWS_ACCESS_KEY_ID" not in os.environ or "AWS_SECRET_ACCESS_KEY" not in os.environ: 

203 os.environ["AWS_ACCESS_KEY_ID"] = accessKeyId 

204 os.environ["AWS_SECRET_ACCESS_KEY"] = secretAccessKey 

205 return True 

206 return False 

207 

208 

209def unsetAwsEnvCredentials() -> None: 

210 """Unset AWS credential environment variables. 

211 

212 Unsets the AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environmental 

213 variables. 

214 """ 

215 if "AWS_ACCESS_KEY_ID" in os.environ: 

216 del os.environ["AWS_ACCESS_KEY_ID"] 

217 if "AWS_SECRET_ACCESS_KEY" in os.environ: 

218 del os.environ["AWS_SECRET_ACCESS_KEY"]