Coverage for python/lsst/resources/s3utils.py: 23%

111 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-07-12 10:52 -0700

1# This file is part of lsst-resources. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# Use of this source code is governed by a 3-clause BSD-style 

10# license that can be found in the LICENSE file. 

11 

12from __future__ import annotations 

13 

14__all__ = ( 

15 "clean_test_environment", 

16 "getS3Client", 

17 "s3CheckFileExists", 

18 "bucketExists", 

19 "setAwsEnvCredentials", 

20 "unsetAwsEnvCredentials", 

21 "backoff", 

22 "all_retryable_errors", 

23 "max_retry_time", 

24 "retryable_io_errors", 

25 "retryable_client_errors", 

26 "_TooManyRequestsError", 

27) 

28 

29import functools 

30import os 

31import re 

32from collections.abc import Callable 

33from http.client import HTTPException, ImproperConnectionState 

34from types import ModuleType 

35from typing import TYPE_CHECKING, Any, cast 

36 

37from botocore.exceptions import ClientError 

38from botocore.handlers import validate_bucket_name 

39from urllib3.exceptions import HTTPError, RequestError 

40 

41if TYPE_CHECKING: 

42 from unittest import TestCase 

43 

44try: 

45 import boto3 

46except ImportError: 

47 boto3 = None 

48 

49try: 

50 import botocore 

51except ImportError: 

52 botocore = None 

53 

54 

55from ._resourcePath import ResourcePath 

56from .location import Location 

57 

58# https://pypi.org/project/backoff/ 

59try: 

60 import backoff 

61except ImportError: 

62 

63 class Backoff: 

64 """Mock implementation of the backoff class.""" 

65 

66 @staticmethod 

67 def expo(func: Callable, *args: Any, **kwargs: Any) -> Callable: 

68 return func 

69 

70 @staticmethod 

71 def on_exception(func: Callable, *args: Any, **kwargs: Any) -> Callable: 

72 return func 

73 

74 backoff = cast(ModuleType, Backoff) 

75 

76 

77class _TooManyRequestsError(Exception): 

78 """Private exception that can be used for 429 retry. 

79 

80 botocore refuses to deal with 429 error itself so issues a generic 

81 ClientError. 

82 """ 

83 

84 pass 

85 

86 

87# settings for "backoff" retry decorators. these retries are belt-and- 

88# suspenders along with the retries built into Boto3, to account for 

89# semantic differences in errors between S3-like providers. 

90retryable_io_errors = ( 

91 # http.client 

92 ImproperConnectionState, 

93 HTTPException, 

94 # urllib3.exceptions 

95 RequestError, 

96 HTTPError, 

97 # built-ins 

98 TimeoutError, 

99 ConnectionError, 

100 # private 

101 _TooManyRequestsError, 

102) 

103 

104# Client error can include NoSuchKey so retry may not be the right 

105# thing. This may require more consideration if it is to be used. 

106retryable_client_errors = ( 

107 # botocore.exceptions 

108 ClientError, 

109 # built-ins 

110 PermissionError, 

111) 

112 

113 

114# Combine all errors into an easy package. For now client errors 

115# are not included. 

116all_retryable_errors = retryable_io_errors 

117max_retry_time = 60 

118 

119 

120def clean_test_environment(testcase: TestCase) -> None: 

121 """Clear S3_ENDPOINT_URL then restore it at the end of a test. 

122 

123 Parameters 

124 ---------- 

125 testcase: `unittest.TestCase` 

126 Reference to the test being run; used to add a cleanup function. 

127 """ 

128 endpoint = os.environ.get("S3_ENDPOINT_URL") 

129 

130 if not endpoint: 

131 return 

132 os.environ["S3_ENDPOINT_URL"] = "" 

133 

134 def cleanup() -> None: 

135 if endpoint is not None: 

136 os.environ["S3_ENDPOINT_URL"] = endpoint 

137 

138 testcase.addCleanup(cleanup) 

139 

140 

141def getS3Client() -> boto3.client: 

142 """Create a S3 client with AWS (default) or the specified endpoint. 

143 

144 Returns 

145 ------- 

146 s3client : `botocore.client.S3` 

147 A client of the S3 service. 

148 

149 Notes 

150 ----- 

151 The endpoint URL is from the environment variable S3_ENDPOINT_URL. 

152 If none is specified, the default AWS one is used. 

153 

154 If the environment variable LSST_DISABLE_BUCKET_VALIDATION exists 

155 and has a value that is not empty, "0", "f", "n", or "false" 

156 (case-insensitive), then bucket name validation is disabled. This 

157 disabling allows Ceph multi-tenancy colon separators to appear in 

158 bucket names. 

159 """ 

160 if boto3 is None: 

161 raise ModuleNotFoundError("Could not find boto3. Are you sure it is installed?") 

162 if botocore is None: 

163 raise ModuleNotFoundError("Could not find botocore. Are you sure it is installed?") 

164 

165 endpoint = os.environ.get("S3_ENDPOINT_URL", None) 

166 if not endpoint: 

167 endpoint = None # Handle "" 

168 disable_value = os.environ.get("LSST_DISABLE_BUCKET_VALIDATION", "0") 

169 skip_validation = not re.search(r"^(0|f|n|false)?$", disable_value, re.I) 

170 

171 return _get_s3_client(endpoint, skip_validation) 

172 

173 

174@functools.lru_cache 

175def _get_s3_client(endpoint: str, skip_validation: bool) -> boto3.client: 

176 # Helper function to cache the client for this endpoint 

177 config = botocore.config.Config(read_timeout=180, retries={"mode": "adaptive", "max_attempts": 10}) 

178 

179 client = boto3.client("s3", endpoint_url=endpoint, config=config) 

180 if skip_validation: 

181 client.meta.events.unregister("before-parameter-build.s3", validate_bucket_name) 

182 return client 

183 

184 

185def s3CheckFileExists( 

186 path: Location | ResourcePath | str, 

187 bucket: str | None = None, 

188 client: boto3.client | None = None, 

189) -> tuple[bool, int]: 

190 """Return if the file exists in the bucket or not. 

191 

192 Parameters 

193 ---------- 

194 path : `Location`, `ResourcePath` or `str` 

195 Location or ResourcePath containing the bucket name and filepath. 

196 bucket : `str`, optional 

197 Name of the bucket in which to look. If provided, path will be assumed 

198 to correspond to be relative to the given bucket. 

199 client : `boto3.client`, optional 

200 S3 Client object to query, if not supplied boto3 will try to resolve 

201 the credentials as in order described in its manual_. 

202 

203 Returns 

204 ------- 

205 exists : `bool` 

206 True if key exists, False otherwise. 

207 size : `int` 

208 Size of the key, if key exists, in bytes, otherwise -1. 

209 

210 Notes 

211 ----- 

212 S3 Paths are sensitive to leading and trailing path separators. 

213 

214 .. _manual: https://boto3.amazonaws.com/v1/documentation/api/latest/guide/\ 

215 configuration.html#configuring-credentials 

216 """ 

217 if boto3 is None: 

218 raise ModuleNotFoundError("Could not find boto3. Are you sure it is installed?") 

219 

220 if client is None: 

221 client = getS3Client() 

222 

223 if isinstance(path, str): 

224 if bucket is not None: 

225 filepath = path 

226 else: 

227 uri = ResourcePath(path) 

228 bucket = uri.netloc 

229 filepath = uri.relativeToPathRoot 

230 elif isinstance(path, ResourcePath | Location): 

231 bucket = path.netloc 

232 filepath = path.relativeToPathRoot 

233 else: 

234 raise TypeError(f"Unsupported path type: {path!r}.") 

235 

236 try: 

237 obj = client.head_object(Bucket=bucket, Key=filepath) 

238 return (True, obj["ContentLength"]) 

239 except client.exceptions.ClientError as err: 

240 # resource unreachable error means key does not exist 

241 if err.response["ResponseMetadata"]["HTTPStatusCode"] == 404: 

242 return (False, -1) 

243 # head_object returns 404 when object does not exist only when user has 

244 # s3:ListBucket permission. If list permission does not exist a 403 is 

245 # returned. In practical terms this generally means that the file does 

246 # not exist, but it could also mean user lacks s3:GetObject permission: 

247 # https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectHEAD.html 

248 # I don't think its possible to discern which case is it with certainty 

249 if err.response["ResponseMetadata"]["HTTPStatusCode"] == 403: 

250 raise PermissionError( 

251 "Forbidden HEAD operation error occured. " 

252 "Verify s3:ListBucket and s3:GetObject " 

253 "permissions are granted for your IAM user. " 

254 ) from err 

255 raise 

256 

257 

258def bucketExists(bucketName: str, client: boto3.client | None = None) -> bool: 

259 """Check if the S3 bucket with the given name actually exists. 

260 

261 Parameters 

262 ---------- 

263 bucketName : `str` 

264 Name of the S3 Bucket 

265 client : `boto3.client`, optional 

266 S3 Client object to query, if not supplied boto3 will try to resolve 

267 the credentials as in order described in its manual_. 

268 

269 Returns 

270 ------- 

271 exists : `bool` 

272 True if it exists, False if no Bucket with specified parameters is 

273 found. 

274 

275 .. _manual: https://boto3.amazonaws.com/v1/documentation/api/latest/guide/\ 

276 configuration.html#configuring-credentials 

277 """ 

278 if boto3 is None: 

279 raise ModuleNotFoundError("Could not find boto3. Are you sure it is installed?") 

280 

281 if client is None: 

282 client = getS3Client() 

283 try: 

284 client.get_bucket_location(Bucket=bucketName) 

285 return True 

286 except client.exceptions.NoSuchBucket: 

287 return False 

288 

289 

290def setAwsEnvCredentials( 

291 accessKeyId: str = "dummyAccessKeyId", secretAccessKey: str = "dummySecretAccessKey" 

292) -> bool: 

293 """Set AWS credentials environmental variables. 

294 

295 Parameters 

296 ---------- 

297 accessKeyId : `str` 

298 Value given to AWS_ACCESS_KEY_ID environmental variable. Defaults to 

299 `dummyAccessKeyId`. 

300 secretAccessKey : `str` 

301 Value given to AWS_SECRET_ACCESS_KEY environmental variable. Defaults 

302 to `dummySecretAccessKey`. 

303 

304 Returns 

305 ------- 

306 setEnvCredentials : `bool` 

307 True when environmental variables were set, False otherwise. 

308 

309 Notes 

310 ----- 

311 If either AWS_ACCESS_KEY_ID or AWS_SECRET_ACCESS_KEY are not set, both 

312 values are overwritten to ensure that the values are consistent. 

313 """ 

314 if "AWS_ACCESS_KEY_ID" not in os.environ or "AWS_SECRET_ACCESS_KEY" not in os.environ: 

315 os.environ["AWS_ACCESS_KEY_ID"] = accessKeyId 

316 os.environ["AWS_SECRET_ACCESS_KEY"] = secretAccessKey 

317 return True 

318 return False 

319 

320 

321def unsetAwsEnvCredentials() -> None: 

322 """Unset AWS credential environment variables. 

323 

324 Unsets the AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environmental 

325 variables. 

326 """ 

327 if "AWS_ACCESS_KEY_ID" in os.environ: 

328 del os.environ["AWS_ACCESS_KEY_ID"] 

329 if "AWS_SECRET_ACCESS_KEY" in os.environ: 

330 del os.environ["AWS_SECRET_ACCESS_KEY"]