Coverage for python/lsst/resources/s3utils.py: 28%

127 statements  

« prev     ^ index     » next       coverage.py v7.4.0, created at 2024-01-09 11:30 +0000

1# This file is part of lsst-resources. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# Use of this source code is governed by a 3-clause BSD-style 

10# license that can be found in the LICENSE file. 

11 

12from __future__ import annotations 

13 

14__all__ = ( 

15 "clean_test_environment", 

16 "getS3Client", 

17 "s3CheckFileExists", 

18 "bucketExists", 

19 "setAwsEnvCredentials", 

20 "unsetAwsEnvCredentials", 

21 "backoff", 

22 "all_retryable_errors", 

23 "max_retry_time", 

24 "retryable_io_errors", 

25 "retryable_client_errors", 

26 "_TooManyRequestsError", 

27 "clean_test_environment_for_s3", 

28) 

29 

30import functools 

31import os 

32import re 

33from collections.abc import Callable, Iterator 

34from contextlib import contextmanager 

35from http.client import HTTPException, ImproperConnectionState 

36from types import ModuleType 

37from typing import TYPE_CHECKING, Any, cast 

38from unittest.mock import patch 

39 

40from botocore.exceptions import ClientError 

41from botocore.handlers import validate_bucket_name 

42from deprecated.sphinx import deprecated 

43from urllib3.exceptions import HTTPError, RequestError 

44 

45if TYPE_CHECKING: 

46 from unittest import TestCase 

47 

48 

49try: 

50 import boto3 

51except ImportError: 

52 boto3 = None 

53 

54try: 

55 import botocore 

56except ImportError: 

57 botocore = None 

58 

59 

60from ._resourcePath import ResourcePath 

61from .location import Location 

62 

63# https://pypi.org/project/backoff/ 

64try: 

65 import backoff 

66except ImportError: 

67 

68 class Backoff: 

69 """Mock implementation of the backoff class.""" 

70 

71 @staticmethod 

72 def expo(func: Callable, *args: Any, **kwargs: Any) -> Callable: 

73 return func 

74 

75 @staticmethod 

76 def on_exception(func: Callable, *args: Any, **kwargs: Any) -> Callable: 

77 return func 

78 

79 backoff = cast(ModuleType, Backoff) 

80 

81 

82class _TooManyRequestsError(Exception): 

83 """Private exception that can be used for 429 retry. 

84 

85 botocore refuses to deal with 429 error itself so issues a generic 

86 ClientError. 

87 """ 

88 

89 pass 

90 

91 

92# settings for "backoff" retry decorators. these retries are belt-and- 

93# suspenders along with the retries built into Boto3, to account for 

94# semantic differences in errors between S3-like providers. 

95retryable_io_errors = ( 

96 # http.client 

97 ImproperConnectionState, 

98 HTTPException, 

99 # urllib3.exceptions 

100 RequestError, 

101 HTTPError, 

102 # built-ins 

103 TimeoutError, 

104 ConnectionError, 

105 # private 

106 _TooManyRequestsError, 

107) 

108 

109# Client error can include NoSuchKey so retry may not be the right 

110# thing. This may require more consideration if it is to be used. 

111retryable_client_errors = ( 

112 # botocore.exceptions 

113 ClientError, 

114 # built-ins 

115 PermissionError, 

116) 

117 

118 

119# Combine all errors into an easy package. For now client errors 

120# are not included. 

121all_retryable_errors = retryable_io_errors 

122max_retry_time = 60 

123 

124 

125@deprecated( 

126 reason="This has been replaced by a new function, clean_test_environment_for_s3()." 

127 " Will be removed after v26.2023.5000", 

128 version="26.2023.5000", 

129 category=FutureWarning, 

130) 

131def clean_test_environment(testcase: TestCase) -> None: 

132 """Clear S3_ENDPOINT_URL then restore it at the end of a test. 

133 

134 Parameters 

135 ---------- 

136 testcase : `unittest.TestCase` 

137 Reference to the test being run; used to add a cleanup function. 

138 """ 

139 endpoint = os.environ.get("S3_ENDPOINT_URL") 

140 

141 if not endpoint: 

142 return 

143 os.environ["S3_ENDPOINT_URL"] = "" 

144 

145 def cleanup() -> None: 

146 if endpoint is not None: 

147 os.environ["S3_ENDPOINT_URL"] = endpoint 

148 

149 testcase.addCleanup(cleanup) 

150 

151 

152@contextmanager 

153def clean_test_environment_for_s3() -> Iterator[None]: 

154 """Reset S3 environment to ensure that unit tests with a mock S3 can't 

155 accidentally reference real infrastructure. 

156 """ 

157 with patch.dict( 

158 os.environ, 

159 { 

160 "AWS_ACCESS_KEY_ID": "test-access-key", 

161 "AWS_SECRET_ACCESS_KEY": "test-secret-access-key", 

162 }, 

163 ) as patched_environ: 

164 for var in ( 

165 "S3_ENDPOINT_URL", 

166 "AWS_SECURITY_TOKEN", 

167 "AWS_SESSION_TOKEN", 

168 "AWS_PROFILE", 

169 "AWS_SHARED_CREDENTIALS_FILE", 

170 "AWS_CONFIG_FILE", 

171 ): 

172 patched_environ.pop(var, None) 

173 # Clear the cached boto3 S3 client instances. 

174 # This helps us avoid a potential situation where the client could be 

175 # instantiated before moto mocks are installed, which would prevent the 

176 # mocks from taking effect. 

177 _get_s3_client.cache_clear() 

178 yield 

179 

180 

181def getS3Client() -> boto3.client: 

182 """Create a S3 client with AWS (default) or the specified endpoint. 

183 

184 Returns 

185 ------- 

186 s3client : `botocore.client.S3` 

187 A client of the S3 service. 

188 

189 Notes 

190 ----- 

191 The endpoint URL is from the environment variable S3_ENDPOINT_URL. 

192 If none is specified, the default AWS one is used. 

193 

194 If the environment variable LSST_DISABLE_BUCKET_VALIDATION exists 

195 and has a value that is not empty, "0", "f", "n", or "false" 

196 (case-insensitive), then bucket name validation is disabled. This 

197 disabling allows Ceph multi-tenancy colon separators to appear in 

198 bucket names. 

199 """ 

200 if boto3 is None: 

201 raise ModuleNotFoundError("Could not find boto3. Are you sure it is installed?") 

202 if botocore is None: 

203 raise ModuleNotFoundError("Could not find botocore. Are you sure it is installed?") 

204 

205 endpoint = os.environ.get("S3_ENDPOINT_URL", None) 

206 if not endpoint: 

207 endpoint = None # Handle "" 

208 disable_value = os.environ.get("LSST_DISABLE_BUCKET_VALIDATION", "0") 

209 skip_validation = not re.search(r"^(0|f|n|false)?$", disable_value, re.I) 

210 

211 return _get_s3_client(endpoint, skip_validation) 

212 

213 

214@functools.lru_cache 

215def _get_s3_client(endpoint: str, skip_validation: bool) -> boto3.client: 

216 # Helper function to cache the client for this endpoint 

217 config = botocore.config.Config(read_timeout=180, retries={"mode": "adaptive", "max_attempts": 10}) 

218 

219 client = boto3.client("s3", endpoint_url=endpoint, config=config) 

220 if skip_validation: 

221 client.meta.events.unregister("before-parameter-build.s3", validate_bucket_name) 

222 return client 

223 

224 

225def s3CheckFileExists( 

226 path: Location | ResourcePath | str, 

227 bucket: str | None = None, 

228 client: boto3.client | None = None, 

229) -> tuple[bool, int]: 

230 """Return if the file exists in the bucket or not. 

231 

232 Parameters 

233 ---------- 

234 path : `Location`, `ResourcePath` or `str` 

235 Location or ResourcePath containing the bucket name and filepath. 

236 bucket : `str`, optional 

237 Name of the bucket in which to look. If provided, path will be assumed 

238 to correspond to be relative to the given bucket. 

239 client : `boto3.client`, optional 

240 S3 Client object to query, if not supplied boto3 will try to resolve 

241 the credentials as in order described in its manual_. 

242 

243 Returns 

244 ------- 

245 exists : `bool` 

246 True if key exists, False otherwise. 

247 size : `int` 

248 Size of the key, if key exists, in bytes, otherwise -1. 

249 

250 Notes 

251 ----- 

252 S3 Paths are sensitive to leading and trailing path separators. 

253 

254 .. _manual: https://boto3.amazonaws.com/v1/documentation/api/latest/guide/\ 

255 configuration.html#configuring-credentials 

256 """ 

257 if boto3 is None: 

258 raise ModuleNotFoundError("Could not find boto3. Are you sure it is installed?") 

259 

260 if client is None: 

261 client = getS3Client() 

262 

263 if isinstance(path, str): 

264 if bucket is not None: 

265 filepath = path 

266 else: 

267 uri = ResourcePath(path) 

268 bucket = uri.netloc 

269 filepath = uri.relativeToPathRoot 

270 elif isinstance(path, ResourcePath | Location): 

271 bucket = path.netloc 

272 filepath = path.relativeToPathRoot 

273 else: 

274 raise TypeError(f"Unsupported path type: {path!r}.") 

275 

276 try: 

277 obj = client.head_object(Bucket=bucket, Key=filepath) 

278 return (True, obj["ContentLength"]) 

279 except client.exceptions.ClientError as err: 

280 # resource unreachable error means key does not exist 

281 errcode = err.response["ResponseMetadata"]["HTTPStatusCode"] 

282 if errcode == 404: 

283 return (False, -1) 

284 # head_object returns 404 when object does not exist only when user has 

285 # s3:ListBucket permission. If list permission does not exist a 403 is 

286 # returned. In practical terms this generally means that the file does 

287 # not exist, but it could also mean user lacks s3:GetObject permission: 

288 # https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectHEAD.html 

289 # I don't think its possible to discern which case is it with certainty 

290 if errcode == 403: 

291 raise PermissionError( 

292 "Forbidden HEAD operation error occurred. " 

293 "Verify s3:ListBucket and s3:GetObject " 

294 "permissions are granted for your IAM user. " 

295 ) from err 

296 if errcode == 429: 

297 # boto3, incorrectly, does not automatically retry with 429 

298 # so instead we raise an explicit retry exception for backoff. 

299 raise _TooManyRequestsError(str(err)) from err 

300 raise 

301 

302 

303def bucketExists(bucketName: str, client: boto3.client | None = None) -> bool: 

304 """Check if the S3 bucket with the given name actually exists. 

305 

306 Parameters 

307 ---------- 

308 bucketName : `str` 

309 Name of the S3 Bucket. 

310 client : `boto3.client`, optional 

311 S3 Client object to query, if not supplied boto3 will try to resolve 

312 the credentials by calling `getS3Client`. 

313 

314 Returns 

315 ------- 

316 exists : `bool` 

317 True if it exists, False if no Bucket with specified parameters is 

318 found. 

319 """ 

320 if boto3 is None: 

321 raise ModuleNotFoundError("Could not find boto3. Are you sure it is installed?") 

322 

323 if client is None: 

324 client = getS3Client() 

325 try: 

326 client.get_bucket_location(Bucket=bucketName) 

327 return True 

328 except client.exceptions.NoSuchBucket: 

329 return False 

330 

331 

332@deprecated( 

333 reason="This function could accidentally leave real credentials in the environment during testing." 

334 " A new function, clean_test_environment_for_s3(), can be used to set up mock credentials." 

335 " Will be removed after v26.2023.5000", 

336 version="26.2023.5000", 

337 category=FutureWarning, 

338) 

339def setAwsEnvCredentials( 

340 accessKeyId: str = "dummyAccessKeyId", secretAccessKey: str = "dummySecretAccessKey" 

341) -> bool: 

342 """Set AWS credentials environmental variables. 

343 

344 Parameters 

345 ---------- 

346 accessKeyId : `str` 

347 Value given to AWS_ACCESS_KEY_ID environmental variable. Defaults to 

348 `dummyAccessKeyId`. 

349 secretAccessKey : `str` 

350 Value given to AWS_SECRET_ACCESS_KEY environmental variable. Defaults 

351 to `dummySecretAccessKey`. 

352 

353 Returns 

354 ------- 

355 setEnvCredentials : `bool` 

356 True when environmental variables were set, False otherwise. 

357 

358 Notes 

359 ----- 

360 If either AWS_ACCESS_KEY_ID or AWS_SECRET_ACCESS_KEY are not set, both 

361 values are overwritten to ensure that the values are consistent. 

362 """ 

363 if "AWS_ACCESS_KEY_ID" not in os.environ or "AWS_SECRET_ACCESS_KEY" not in os.environ: 

364 os.environ["AWS_ACCESS_KEY_ID"] = accessKeyId 

365 os.environ["AWS_SECRET_ACCESS_KEY"] = secretAccessKey 

366 return True 

367 return False 

368 

369 

370@deprecated( 

371 reason="This has been replaced by a new function, clean_test_environment_for_s3()." 

372 " Will be removed after v26.2023.5000", 

373 version="26.2023.5000", 

374 category=FutureWarning, 

375) 

376def unsetAwsEnvCredentials() -> None: 

377 """Unset AWS credential environment variables. 

378 

379 Unsets the AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environmental 

380 variables. 

381 """ 

382 if "AWS_ACCESS_KEY_ID" in os.environ: 

383 del os.environ["AWS_ACCESS_KEY_ID"] 

384 if "AWS_SECRET_ACCESS_KEY" in os.environ: 

385 del os.environ["AWS_SECRET_ACCESS_KEY"]