Coverage for python/lsst/resources/s3utils.py: 23%

114 statements  

« prev     ^ index     » next       coverage.py v7.3.1, created at 2023-09-13 09:44 +0000

1# This file is part of lsst-resources. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# Use of this source code is governed by a 3-clause BSD-style 

10# license that can be found in the LICENSE file. 

11 

12from __future__ import annotations 

13 

14__all__ = ( 

15 "clean_test_environment", 

16 "getS3Client", 

17 "s3CheckFileExists", 

18 "bucketExists", 

19 "setAwsEnvCredentials", 

20 "unsetAwsEnvCredentials", 

21 "backoff", 

22 "all_retryable_errors", 

23 "max_retry_time", 

24 "retryable_io_errors", 

25 "retryable_client_errors", 

26 "_TooManyRequestsError", 

27) 

28 

29import functools 

30import os 

31import re 

32from collections.abc import Callable 

33from http.client import HTTPException, ImproperConnectionState 

34from types import ModuleType 

35from typing import TYPE_CHECKING, Any, cast 

36 

37from botocore.exceptions import ClientError 

38from botocore.handlers import validate_bucket_name 

39from urllib3.exceptions import HTTPError, RequestError 

40 

41if TYPE_CHECKING: 

42 from unittest import TestCase 

43 

44try: 

45 import boto3 

46except ImportError: 

47 boto3 = None 

48 

49try: 

50 import botocore 

51except ImportError: 

52 botocore = None 

53 

54 

55from ._resourcePath import ResourcePath 

56from .location import Location 

57 

58# https://pypi.org/project/backoff/ 

59try: 

60 import backoff 

61except ImportError: 

62 

63 class Backoff: 

64 """Mock implementation of the backoff class.""" 

65 

66 @staticmethod 

67 def expo(func: Callable, *args: Any, **kwargs: Any) -> Callable: 

68 return func 

69 

70 @staticmethod 

71 def on_exception(func: Callable, *args: Any, **kwargs: Any) -> Callable: 

72 return func 

73 

74 backoff = cast(ModuleType, Backoff) 

75 

76 

77class _TooManyRequestsError(Exception): 

78 """Private exception that can be used for 429 retry. 

79 

80 botocore refuses to deal with 429 error itself so issues a generic 

81 ClientError. 

82 """ 

83 

84 pass 

85 

86 

87# settings for "backoff" retry decorators. these retries are belt-and- 

88# suspenders along with the retries built into Boto3, to account for 

89# semantic differences in errors between S3-like providers. 

90retryable_io_errors = ( 

91 # http.client 

92 ImproperConnectionState, 

93 HTTPException, 

94 # urllib3.exceptions 

95 RequestError, 

96 HTTPError, 

97 # built-ins 

98 TimeoutError, 

99 ConnectionError, 

100 # private 

101 _TooManyRequestsError, 

102) 

103 

104# Client error can include NoSuchKey so retry may not be the right 

105# thing. This may require more consideration if it is to be used. 

106retryable_client_errors = ( 

107 # botocore.exceptions 

108 ClientError, 

109 # built-ins 

110 PermissionError, 

111) 

112 

113 

114# Combine all errors into an easy package. For now client errors 

115# are not included. 

116all_retryable_errors = retryable_io_errors 

117max_retry_time = 60 

118 

119 

120def clean_test_environment(testcase: TestCase) -> None: 

121 """Clear S3_ENDPOINT_URL then restore it at the end of a test. 

122 

123 Parameters 

124 ---------- 

125 testcase: `unittest.TestCase` 

126 Reference to the test being run; used to add a cleanup function. 

127 """ 

128 endpoint = os.environ.get("S3_ENDPOINT_URL") 

129 

130 if not endpoint: 

131 return 

132 os.environ["S3_ENDPOINT_URL"] = "" 

133 

134 def cleanup() -> None: 

135 if endpoint is not None: 

136 os.environ["S3_ENDPOINT_URL"] = endpoint 

137 

138 testcase.addCleanup(cleanup) 

139 

140 

141def getS3Client() -> boto3.client: 

142 """Create a S3 client with AWS (default) or the specified endpoint. 

143 

144 Returns 

145 ------- 

146 s3client : `botocore.client.S3` 

147 A client of the S3 service. 

148 

149 Notes 

150 ----- 

151 The endpoint URL is from the environment variable S3_ENDPOINT_URL. 

152 If none is specified, the default AWS one is used. 

153 

154 If the environment variable LSST_DISABLE_BUCKET_VALIDATION exists 

155 and has a value that is not empty, "0", "f", "n", or "false" 

156 (case-insensitive), then bucket name validation is disabled. This 

157 disabling allows Ceph multi-tenancy colon separators to appear in 

158 bucket names. 

159 """ 

160 if boto3 is None: 

161 raise ModuleNotFoundError("Could not find boto3. Are you sure it is installed?") 

162 if botocore is None: 

163 raise ModuleNotFoundError("Could not find botocore. Are you sure it is installed?") 

164 

165 endpoint = os.environ.get("S3_ENDPOINT_URL", None) 

166 if not endpoint: 

167 endpoint = None # Handle "" 

168 disable_value = os.environ.get("LSST_DISABLE_BUCKET_VALIDATION", "0") 

169 skip_validation = not re.search(r"^(0|f|n|false)?$", disable_value, re.I) 

170 

171 return _get_s3_client(endpoint, skip_validation) 

172 

173 

174@functools.lru_cache 

175def _get_s3_client(endpoint: str, skip_validation: bool) -> boto3.client: 

176 # Helper function to cache the client for this endpoint 

177 config = botocore.config.Config(read_timeout=180, retries={"mode": "adaptive", "max_attempts": 10}) 

178 

179 client = boto3.client("s3", endpoint_url=endpoint, config=config) 

180 if skip_validation: 

181 client.meta.events.unregister("before-parameter-build.s3", validate_bucket_name) 

182 return client 

183 

184 

185def s3CheckFileExists( 

186 path: Location | ResourcePath | str, 

187 bucket: str | None = None, 

188 client: boto3.client | None = None, 

189) -> tuple[bool, int]: 

190 """Return if the file exists in the bucket or not. 

191 

192 Parameters 

193 ---------- 

194 path : `Location`, `ResourcePath` or `str` 

195 Location or ResourcePath containing the bucket name and filepath. 

196 bucket : `str`, optional 

197 Name of the bucket in which to look. If provided, path will be assumed 

198 to correspond to be relative to the given bucket. 

199 client : `boto3.client`, optional 

200 S3 Client object to query, if not supplied boto3 will try to resolve 

201 the credentials as in order described in its manual_. 

202 

203 Returns 

204 ------- 

205 exists : `bool` 

206 True if key exists, False otherwise. 

207 size : `int` 

208 Size of the key, if key exists, in bytes, otherwise -1. 

209 

210 Notes 

211 ----- 

212 S3 Paths are sensitive to leading and trailing path separators. 

213 

214 .. _manual: https://boto3.amazonaws.com/v1/documentation/api/latest/guide/\ 

215 configuration.html#configuring-credentials 

216 """ 

217 if boto3 is None: 

218 raise ModuleNotFoundError("Could not find boto3. Are you sure it is installed?") 

219 

220 if client is None: 

221 client = getS3Client() 

222 

223 if isinstance(path, str): 

224 if bucket is not None: 

225 filepath = path 

226 else: 

227 uri = ResourcePath(path) 

228 bucket = uri.netloc 

229 filepath = uri.relativeToPathRoot 

230 elif isinstance(path, ResourcePath | Location): 

231 bucket = path.netloc 

232 filepath = path.relativeToPathRoot 

233 else: 

234 raise TypeError(f"Unsupported path type: {path!r}.") 

235 

236 try: 

237 obj = client.head_object(Bucket=bucket, Key=filepath) 

238 return (True, obj["ContentLength"]) 

239 except client.exceptions.ClientError as err: 

240 # resource unreachable error means key does not exist 

241 errcode = err.response["ResponseMetadata"]["HTTPStatusCode"] 

242 if errcode == 404: 

243 return (False, -1) 

244 # head_object returns 404 when object does not exist only when user has 

245 # s3:ListBucket permission. If list permission does not exist a 403 is 

246 # returned. In practical terms this generally means that the file does 

247 # not exist, but it could also mean user lacks s3:GetObject permission: 

248 # https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectHEAD.html 

249 # I don't think its possible to discern which case is it with certainty 

250 if errcode == 403: 

251 raise PermissionError( 

252 "Forbidden HEAD operation error occurred. " 

253 "Verify s3:ListBucket and s3:GetObject " 

254 "permissions are granted for your IAM user. " 

255 ) from err 

256 if errcode == 429: 

257 # boto3, incorrectly, does not automatically retry with 429 

258 # so instead we raise an explicit retry exception for backoff. 

259 raise _TooManyRequestsError(str(err)) from err 

260 raise 

261 

262 

263def bucketExists(bucketName: str, client: boto3.client | None = None) -> bool: 

264 """Check if the S3 bucket with the given name actually exists. 

265 

266 Parameters 

267 ---------- 

268 bucketName : `str` 

269 Name of the S3 Bucket 

270 client : `boto3.client`, optional 

271 S3 Client object to query, if not supplied boto3 will try to resolve 

272 the credentials as in order described in its manual_. 

273 

274 Returns 

275 ------- 

276 exists : `bool` 

277 True if it exists, False if no Bucket with specified parameters is 

278 found. 

279 

280 .. _manual: https://boto3.amazonaws.com/v1/documentation/api/latest/guide/\ 

281 configuration.html#configuring-credentials 

282 """ 

283 if boto3 is None: 

284 raise ModuleNotFoundError("Could not find boto3. Are you sure it is installed?") 

285 

286 if client is None: 

287 client = getS3Client() 

288 try: 

289 client.get_bucket_location(Bucket=bucketName) 

290 return True 

291 except client.exceptions.NoSuchBucket: 

292 return False 

293 

294 

295def setAwsEnvCredentials( 

296 accessKeyId: str = "dummyAccessKeyId", secretAccessKey: str = "dummySecretAccessKey" 

297) -> bool: 

298 """Set AWS credentials environmental variables. 

299 

300 Parameters 

301 ---------- 

302 accessKeyId : `str` 

303 Value given to AWS_ACCESS_KEY_ID environmental variable. Defaults to 

304 `dummyAccessKeyId`. 

305 secretAccessKey : `str` 

306 Value given to AWS_SECRET_ACCESS_KEY environmental variable. Defaults 

307 to `dummySecretAccessKey`. 

308 

309 Returns 

310 ------- 

311 setEnvCredentials : `bool` 

312 True when environmental variables were set, False otherwise. 

313 

314 Notes 

315 ----- 

316 If either AWS_ACCESS_KEY_ID or AWS_SECRET_ACCESS_KEY are not set, both 

317 values are overwritten to ensure that the values are consistent. 

318 """ 

319 if "AWS_ACCESS_KEY_ID" not in os.environ or "AWS_SECRET_ACCESS_KEY" not in os.environ: 

320 os.environ["AWS_ACCESS_KEY_ID"] = accessKeyId 

321 os.environ["AWS_SECRET_ACCESS_KEY"] = secretAccessKey 

322 return True 

323 return False 

324 

325 

326def unsetAwsEnvCredentials() -> None: 

327 """Unset AWS credential environment variables. 

328 

329 Unsets the AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environmental 

330 variables. 

331 """ 

332 if "AWS_ACCESS_KEY_ID" in os.environ: 

333 del os.environ["AWS_ACCESS_KEY_ID"] 

334 if "AWS_SECRET_ACCESS_KEY" in os.environ: 

335 del os.environ["AWS_SECRET_ACCESS_KEY"]