Coverage for python/lsst/resources/s3utils.py: 24%

106 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-03-30 09:30 +0000

1# This file is part of lsst-resources. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# Use of this source code is governed by a 3-clause BSD-style 

10# license that can be found in the LICENSE file. 

11 

12from __future__ import annotations 

13 

14from typing import TYPE_CHECKING 

15 

16if TYPE_CHECKING: 16 ↛ 17line 16 didn't jump to line 17, because the condition on line 16 was never true

17 from unittest import TestCase 

18 

19__all__ = ( 

20 "clean_test_environment", 

21 "getS3Client", 

22 "s3CheckFileExists", 

23 "bucketExists", 

24 "setAwsEnvCredentials", 

25 "unsetAwsEnvCredentials", 

26 "backoff", 

27 "all_retryable_errors", 

28 "max_retry_time", 

29 "retryable_io_errors", 

30 "retryable_client_errors", 

31 "_TooManyRequestsException", 

32) 

33 

34import functools 

35import os 

36from http.client import HTTPException, ImproperConnectionState 

37from types import ModuleType 

38from typing import Any, Callable, Optional, Tuple, Union, cast 

39 

40from botocore.exceptions import ClientError 

41from urllib3.exceptions import HTTPError, RequestError 

42 

43try: 

44 import boto3 

45except ImportError: 

46 boto3 = None 

47 

48try: 

49 import botocore 

50except ImportError: 

51 botocore = None 

52 

53 

54from ._resourcePath import ResourcePath 

55from .location import Location 

56 

57# https://pypi.org/project/backoff/ 

58try: 

59 import backoff 

60except ImportError: 

61 

62 class Backoff: 

63 @staticmethod 

64 def expo(func: Callable, *args: Any, **kwargs: Any) -> Callable: 

65 return func 

66 

67 @staticmethod 

68 def on_exception(func: Callable, *args: Any, **kwargs: Any) -> Callable: 

69 return func 

70 

71 backoff = cast(ModuleType, Backoff) 

72 

73 

74class _TooManyRequestsException(Exception): 

75 """Private exception that can be used for 429 retry. 

76 

77 botocore refuses to deal with 429 error itself so issues a generic 

78 ClientError. 

79 """ 

80 

81 pass 

82 

83 

84# settings for "backoff" retry decorators. these retries are belt-and- 

85# suspenders along with the retries built into Boto3, to account for 

86# semantic differences in errors between S3-like providers. 

87retryable_io_errors = ( 

88 # http.client 

89 ImproperConnectionState, 

90 HTTPException, 

91 # urllib3.exceptions 

92 RequestError, 

93 HTTPError, 

94 # built-ins 

95 TimeoutError, 

96 ConnectionError, 

97 # private 

98 _TooManyRequestsException, 

99) 

100 

101# Client error can include NoSuchKey so retry may not be the right 

102# thing. This may require more consideration if it is to be used. 

103retryable_client_errors = ( 

104 # botocore.exceptions 

105 ClientError, 

106 # built-ins 

107 PermissionError, 

108) 

109 

110 

111# Combine all errors into an easy package. For now client errors 

112# are not included. 

113all_retryable_errors = retryable_io_errors 

114max_retry_time = 60 

115 

116 

117def clean_test_environment(testcase: TestCase) -> None: 

118 """Clear S3_ENDPOINT_URL then restore it at the end of a test. 

119 

120 Parameters 

121 ---------- 

122 testcase: `unittest.TestCase` 

123 Reference to the test being run; used to add a cleanup function. 

124 """ 

125 endpoint = os.environ.get("S3_ENDPOINT_URL") 

126 

127 if not endpoint: 

128 return 

129 os.environ["S3_ENDPOINT_URL"] = "" 

130 

131 def cleanup() -> None: 

132 if endpoint is not None: 

133 os.environ["S3_ENDPOINT_URL"] = endpoint 

134 

135 testcase.addCleanup(cleanup) 

136 

137 

138def getS3Client() -> boto3.client: 

139 """Create a S3 client with AWS (default) or the specified endpoint. 

140 

141 Returns 

142 ------- 

143 s3client : `botocore.client.S3` 

144 A client of the S3 service. 

145 

146 Notes 

147 ----- 

148 The endpoint URL is from the environment variable S3_ENDPOINT_URL. 

149 If none is specified, the default AWS one is used. 

150 """ 

151 if boto3 is None: 

152 raise ModuleNotFoundError("Could not find boto3. Are you sure it is installed?") 

153 if botocore is None: 

154 raise ModuleNotFoundError("Could not find botocore. Are you sure it is installed?") 

155 

156 endpoint = os.environ.get("S3_ENDPOINT_URL", None) 

157 if not endpoint: 

158 endpoint = None # Handle "" 

159 

160 return _get_s3_client(endpoint) 

161 

162 

163@functools.lru_cache() 

164def _get_s3_client(endpoint: str) -> boto3.client: 

165 # Helper function to cache the client for this endpoint 

166 config = botocore.config.Config(read_timeout=180, retries={"mode": "adaptive", "max_attempts": 10}) 

167 

168 return boto3.client("s3", endpoint_url=endpoint, config=config) 

169 

170 

171def s3CheckFileExists( 

172 path: Union[Location, ResourcePath, str], 

173 bucket: Optional[str] = None, 

174 client: Optional[boto3.client] = None, 

175) -> Tuple[bool, int]: 

176 """Return if the file exists in the bucket or not. 

177 

178 Parameters 

179 ---------- 

180 path : `Location`, `ResourcePath` or `str` 

181 Location or ResourcePath containing the bucket name and filepath. 

182 bucket : `str`, optional 

183 Name of the bucket in which to look. If provided, path will be assumed 

184 to correspond to be relative to the given bucket. 

185 client : `boto3.client`, optional 

186 S3 Client object to query, if not supplied boto3 will try to resolve 

187 the credentials as in order described in its manual_. 

188 

189 Returns 

190 ------- 

191 exists : `bool` 

192 True if key exists, False otherwise. 

193 size : `int` 

194 Size of the key, if key exists, in bytes, otherwise -1. 

195 

196 Notes 

197 ----- 

198 S3 Paths are sensitive to leading and trailing path separators. 

199 

200 .. _manual: https://boto3.amazonaws.com/v1/documentation/api/latest/guide/\ 

201 configuration.html#configuring-credentials 

202 """ 

203 if boto3 is None: 

204 raise ModuleNotFoundError("Could not find boto3. Are you sure it is installed?") 

205 

206 if client is None: 

207 client = getS3Client() 

208 

209 if isinstance(path, str): 

210 if bucket is not None: 

211 filepath = path 

212 else: 

213 uri = ResourcePath(path) 

214 bucket = uri.netloc 

215 filepath = uri.relativeToPathRoot 

216 elif isinstance(path, (ResourcePath, Location)): 

217 bucket = path.netloc 

218 filepath = path.relativeToPathRoot 

219 else: 

220 raise TypeError(f"Unsupported path type: {path!r}.") 

221 

222 try: 

223 obj = client.head_object(Bucket=bucket, Key=filepath) 

224 return (True, obj["ContentLength"]) 

225 except client.exceptions.ClientError as err: 

226 # resource unreachable error means key does not exist 

227 if err.response["ResponseMetadata"]["HTTPStatusCode"] == 404: 

228 return (False, -1) 

229 # head_object returns 404 when object does not exist only when user has 

230 # s3:ListBucket permission. If list permission does not exist a 403 is 

231 # returned. In practical terms this generally means that the file does 

232 # not exist, but it could also mean user lacks s3:GetObject permission: 

233 # https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectHEAD.html 

234 # I don't think its possible to discern which case is it with certainty 

235 if err.response["ResponseMetadata"]["HTTPStatusCode"] == 403: 

236 raise PermissionError( 

237 "Forbidden HEAD operation error occured. " 

238 "Verify s3:ListBucket and s3:GetObject " 

239 "permissions are granted for your IAM user. " 

240 ) from err 

241 raise 

242 

243 

244def bucketExists(bucketName: str, client: Optional[boto3.client] = None) -> bool: 

245 """Check if the S3 bucket with the given name actually exists. 

246 

247 Parameters 

248 ---------- 

249 bucketName : `str` 

250 Name of the S3 Bucket 

251 client : `boto3.client`, optional 

252 S3 Client object to query, if not supplied boto3 will try to resolve 

253 the credentials as in order described in its manual_. 

254 

255 Returns 

256 ------- 

257 exists : `bool` 

258 True if it exists, False if no Bucket with specified parameters is 

259 found. 

260 

261 .. _manual: https://boto3.amazonaws.com/v1/documentation/api/latest/guide/\ 

262 configuration.html#configuring-credentials 

263 """ 

264 if boto3 is None: 

265 raise ModuleNotFoundError("Could not find boto3. Are you sure it is installed?") 

266 

267 if client is None: 

268 client = getS3Client() 

269 try: 

270 client.get_bucket_location(Bucket=bucketName) 

271 return True 

272 except client.exceptions.NoSuchBucket: 

273 return False 

274 

275 

276def setAwsEnvCredentials( 

277 accessKeyId: str = "dummyAccessKeyId", secretAccessKey: str = "dummySecretAccessKey" 

278) -> bool: 

279 """Set AWS credentials environmental variables. 

280 

281 Parameters 

282 ---------- 

283 accessKeyId : `str` 

284 Value given to AWS_ACCESS_KEY_ID environmental variable. Defaults to 

285 `dummyAccessKeyId`. 

286 secretAccessKey : `str` 

287 Value given to AWS_SECRET_ACCESS_KEY environmental variable. Defaults 

288 to `dummySecretAccessKey`. 

289 

290 Returns 

291 ------- 

292 setEnvCredentials : `bool` 

293 True when environmental variables were set, False otherwise. 

294 

295 Notes 

296 ----- 

297 If either AWS_ACCESS_KEY_ID or AWS_SECRET_ACCESS_KEY are not set, both 

298 values are overwritten to ensure that the values are consistent. 

299 """ 

300 if "AWS_ACCESS_KEY_ID" not in os.environ or "AWS_SECRET_ACCESS_KEY" not in os.environ: 

301 os.environ["AWS_ACCESS_KEY_ID"] = accessKeyId 

302 os.environ["AWS_SECRET_ACCESS_KEY"] = secretAccessKey 

303 return True 

304 return False 

305 

306 

307def unsetAwsEnvCredentials() -> None: 

308 """Unset AWS credential environment variables. 

309 

310 Unsets the AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environmental 

311 variables. 

312 """ 

313 if "AWS_ACCESS_KEY_ID" in os.environ: 

314 del os.environ["AWS_ACCESS_KEY_ID"] 

315 if "AWS_SECRET_ACCESS_KEY" in os.environ: 

316 del os.environ["AWS_SECRET_ACCESS_KEY"]