Coverage for python/lsst/resources/s3utils.py: 23%

111 statements  

« prev     ^ index     » next       coverage.py v7.2.3, created at 2023-04-20 03:07 -0700

1# This file is part of lsst-resources. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# Use of this source code is governed by a 3-clause BSD-style 

10# license that can be found in the LICENSE file. 

11 

12from __future__ import annotations 

13 

14from typing import TYPE_CHECKING 

15 

16if TYPE_CHECKING: 

17 from unittest import TestCase 

18 

19__all__ = ( 

20 "clean_test_environment", 

21 "getS3Client", 

22 "s3CheckFileExists", 

23 "bucketExists", 

24 "setAwsEnvCredentials", 

25 "unsetAwsEnvCredentials", 

26 "backoff", 

27 "all_retryable_errors", 

28 "max_retry_time", 

29 "retryable_io_errors", 

30 "retryable_client_errors", 

31 "_TooManyRequestsException", 

32) 

33 

34import functools 

35import os 

36import re 

37from http.client import HTTPException, ImproperConnectionState 

38from types import ModuleType 

39from typing import Any, Callable, Optional, Tuple, Union, cast 

40 

41from botocore.exceptions import ClientError 

42from botocore.handlers import validate_bucket_name 

43from urllib3.exceptions import HTTPError, RequestError 

44 

45try: 

46 import boto3 

47except ImportError: 

48 boto3 = None 

49 

50try: 

51 import botocore 

52except ImportError: 

53 botocore = None 

54 

55 

56from ._resourcePath import ResourcePath 

57from .location import Location 

58 

59# https://pypi.org/project/backoff/ 

60try: 

61 import backoff 

62except ImportError: 

63 

64 class Backoff: 

65 @staticmethod 

66 def expo(func: Callable, *args: Any, **kwargs: Any) -> Callable: 

67 return func 

68 

69 @staticmethod 

70 def on_exception(func: Callable, *args: Any, **kwargs: Any) -> Callable: 

71 return func 

72 

73 backoff = cast(ModuleType, Backoff) 

74 

75 

76class _TooManyRequestsException(Exception): 

77 """Private exception that can be used for 429 retry. 

78 

79 botocore refuses to deal with 429 error itself so issues a generic 

80 ClientError. 

81 """ 

82 

83 pass 

84 

85 

86# settings for "backoff" retry decorators. these retries are belt-and- 

87# suspenders along with the retries built into Boto3, to account for 

88# semantic differences in errors between S3-like providers. 

89retryable_io_errors = ( 

90 # http.client 

91 ImproperConnectionState, 

92 HTTPException, 

93 # urllib3.exceptions 

94 RequestError, 

95 HTTPError, 

96 # built-ins 

97 TimeoutError, 

98 ConnectionError, 

99 # private 

100 _TooManyRequestsException, 

101) 

102 

103# Client error can include NoSuchKey so retry may not be the right 

104# thing. This may require more consideration if it is to be used. 

105retryable_client_errors = ( 

106 # botocore.exceptions 

107 ClientError, 

108 # built-ins 

109 PermissionError, 

110) 

111 

112 

113# Combine all errors into an easy package. For now client errors 

114# are not included. 

115all_retryable_errors = retryable_io_errors 

116max_retry_time = 60 

117 

118 

119def clean_test_environment(testcase: TestCase) -> None: 

120 """Clear S3_ENDPOINT_URL then restore it at the end of a test. 

121 

122 Parameters 

123 ---------- 

124 testcase: `unittest.TestCase` 

125 Reference to the test being run; used to add a cleanup function. 

126 """ 

127 endpoint = os.environ.get("S3_ENDPOINT_URL") 

128 

129 if not endpoint: 

130 return 

131 os.environ["S3_ENDPOINT_URL"] = "" 

132 

133 def cleanup() -> None: 

134 if endpoint is not None: 

135 os.environ["S3_ENDPOINT_URL"] = endpoint 

136 

137 testcase.addCleanup(cleanup) 

138 

139 

140def getS3Client() -> boto3.client: 

141 """Create a S3 client with AWS (default) or the specified endpoint. 

142 

143 Returns 

144 ------- 

145 s3client : `botocore.client.S3` 

146 A client of the S3 service. 

147 

148 Notes 

149 ----- 

150 The endpoint URL is from the environment variable S3_ENDPOINT_URL. 

151 If none is specified, the default AWS one is used. 

152 

153 If the environment variable LSST_DISABLE_BUCKET_VALIDATION exists 

154 and has a value that is not empty, "0", "f", "n", or "false" 

155 (case-insensitive), then bucket name validation is disabled. This 

156 disabling allows Ceph multi-tenancy colon separators to appear in 

157 bucket names. 

158 """ 

159 if boto3 is None: 

160 raise ModuleNotFoundError("Could not find boto3. Are you sure it is installed?") 

161 if botocore is None: 

162 raise ModuleNotFoundError("Could not find botocore. Are you sure it is installed?") 

163 

164 endpoint = os.environ.get("S3_ENDPOINT_URL", None) 

165 if not endpoint: 

166 endpoint = None # Handle "" 

167 disable_value = os.environ.get("LSST_DISABLE_BUCKET_VALIDATION", "0") 

168 skip_validation = not re.search(r"^(0|f|n|false)?$", disable_value, re.I) 

169 

170 return _get_s3_client(endpoint, skip_validation) 

171 

172 

173@functools.lru_cache() 

174def _get_s3_client(endpoint: str, skip_validation: bool) -> boto3.client: 

175 # Helper function to cache the client for this endpoint 

176 config = botocore.config.Config(read_timeout=180, retries={"mode": "adaptive", "max_attempts": 10}) 

177 

178 client = boto3.client("s3", endpoint_url=endpoint, config=config) 

179 if skip_validation: 

180 client.meta.events.unregister("before-parameter-build.s3", validate_bucket_name) 

181 return client 

182 

183 

184def s3CheckFileExists( 

185 path: Union[Location, ResourcePath, str], 

186 bucket: Optional[str] = None, 

187 client: Optional[boto3.client] = None, 

188) -> Tuple[bool, int]: 

189 """Return if the file exists in the bucket or not. 

190 

191 Parameters 

192 ---------- 

193 path : `Location`, `ResourcePath` or `str` 

194 Location or ResourcePath containing the bucket name and filepath. 

195 bucket : `str`, optional 

196 Name of the bucket in which to look. If provided, path will be assumed 

197 to correspond to be relative to the given bucket. 

198 client : `boto3.client`, optional 

199 S3 Client object to query, if not supplied boto3 will try to resolve 

200 the credentials as in order described in its manual_. 

201 

202 Returns 

203 ------- 

204 exists : `bool` 

205 True if key exists, False otherwise. 

206 size : `int` 

207 Size of the key, if key exists, in bytes, otherwise -1. 

208 

209 Notes 

210 ----- 

211 S3 Paths are sensitive to leading and trailing path separators. 

212 

213 .. _manual: https://boto3.amazonaws.com/v1/documentation/api/latest/guide/\ 

214 configuration.html#configuring-credentials 

215 """ 

216 if boto3 is None: 

217 raise ModuleNotFoundError("Could not find boto3. Are you sure it is installed?") 

218 

219 if client is None: 

220 client = getS3Client() 

221 

222 if isinstance(path, str): 

223 if bucket is not None: 

224 filepath = path 

225 else: 

226 uri = ResourcePath(path) 

227 bucket = uri.netloc 

228 filepath = uri.relativeToPathRoot 

229 elif isinstance(path, (ResourcePath, Location)): 

230 bucket = path.netloc 

231 filepath = path.relativeToPathRoot 

232 else: 

233 raise TypeError(f"Unsupported path type: {path!r}.") 

234 

235 try: 

236 obj = client.head_object(Bucket=bucket, Key=filepath) 

237 return (True, obj["ContentLength"]) 

238 except client.exceptions.ClientError as err: 

239 # resource unreachable error means key does not exist 

240 if err.response["ResponseMetadata"]["HTTPStatusCode"] == 404: 

241 return (False, -1) 

242 # head_object returns 404 when object does not exist only when user has 

243 # s3:ListBucket permission. If list permission does not exist a 403 is 

244 # returned. In practical terms this generally means that the file does 

245 # not exist, but it could also mean user lacks s3:GetObject permission: 

246 # https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectHEAD.html 

247 # I don't think its possible to discern which case is it with certainty 

248 if err.response["ResponseMetadata"]["HTTPStatusCode"] == 403: 

249 raise PermissionError( 

250 "Forbidden HEAD operation error occured. " 

251 "Verify s3:ListBucket and s3:GetObject " 

252 "permissions are granted for your IAM user. " 

253 ) from err 

254 raise 

255 

256 

257def bucketExists(bucketName: str, client: Optional[boto3.client] = None) -> bool: 

258 """Check if the S3 bucket with the given name actually exists. 

259 

260 Parameters 

261 ---------- 

262 bucketName : `str` 

263 Name of the S3 Bucket 

264 client : `boto3.client`, optional 

265 S3 Client object to query, if not supplied boto3 will try to resolve 

266 the credentials as in order described in its manual_. 

267 

268 Returns 

269 ------- 

270 exists : `bool` 

271 True if it exists, False if no Bucket with specified parameters is 

272 found. 

273 

274 .. _manual: https://boto3.amazonaws.com/v1/documentation/api/latest/guide/\ 

275 configuration.html#configuring-credentials 

276 """ 

277 if boto3 is None: 

278 raise ModuleNotFoundError("Could not find boto3. Are you sure it is installed?") 

279 

280 if client is None: 

281 client = getS3Client() 

282 try: 

283 client.get_bucket_location(Bucket=bucketName) 

284 return True 

285 except client.exceptions.NoSuchBucket: 

286 return False 

287 

288 

289def setAwsEnvCredentials( 

290 accessKeyId: str = "dummyAccessKeyId", secretAccessKey: str = "dummySecretAccessKey" 

291) -> bool: 

292 """Set AWS credentials environmental variables. 

293 

294 Parameters 

295 ---------- 

296 accessKeyId : `str` 

297 Value given to AWS_ACCESS_KEY_ID environmental variable. Defaults to 

298 `dummyAccessKeyId`. 

299 secretAccessKey : `str` 

300 Value given to AWS_SECRET_ACCESS_KEY environmental variable. Defaults 

301 to `dummySecretAccessKey`. 

302 

303 Returns 

304 ------- 

305 setEnvCredentials : `bool` 

306 True when environmental variables were set, False otherwise. 

307 

308 Notes 

309 ----- 

310 If either AWS_ACCESS_KEY_ID or AWS_SECRET_ACCESS_KEY are not set, both 

311 values are overwritten to ensure that the values are consistent. 

312 """ 

313 if "AWS_ACCESS_KEY_ID" not in os.environ or "AWS_SECRET_ACCESS_KEY" not in os.environ: 

314 os.environ["AWS_ACCESS_KEY_ID"] = accessKeyId 

315 os.environ["AWS_SECRET_ACCESS_KEY"] = secretAccessKey 

316 return True 

317 return False 

318 

319 

320def unsetAwsEnvCredentials() -> None: 

321 """Unset AWS credential environment variables. 

322 

323 Unsets the AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environmental 

324 variables. 

325 """ 

326 if "AWS_ACCESS_KEY_ID" in os.environ: 

327 del os.environ["AWS_ACCESS_KEY_ID"] 

328 if "AWS_SECRET_ACCESS_KEY" in os.environ: 

329 del os.environ["AWS_SECRET_ACCESS_KEY"]