Coverage for tests/test_s3.py: 26%

173 statements  

« prev     ^ index     » next       coverage.py v7.5.1, created at 2024-05-16 02:51 -0700

1# This file is part of lsst-resources. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# Use of this source code is governed by a 3-clause BSD-style 

10# license that can be found in the LICENSE file. 

11 

12import os 

13import time 

14import unittest 

15from inspect import signature 

16from unittest import mock 

17from urllib.parse import parse_qs, urlparse 

18 

19from lsst.resources import ResourcePath 

20from lsst.resources.s3 import S3ResourcePath 

21from lsst.resources.s3utils import clean_test_environment_for_s3 

22from lsst.resources.tests import GenericReadWriteTestCase, GenericTestCase 

23 

24try: 

25 import boto3 

26 import botocore 

27 

28 try: 

29 from moto import mock_aws # v5 

30 except ImportError: 

31 from moto import mock_s3 as mock_aws 

32except ImportError: 

33 boto3 = None 

34 

35 def mock_aws(cls): 

36 """No-op decorator in case moto mock_aws can not be imported.""" 

37 return cls 

38 

39 

40class GenericS3TestCase(GenericTestCase, unittest.TestCase): 

41 """Generic tests of S3 URIs.""" 

42 

43 scheme = "s3" 

44 netloc = "my_bucket" 

45 

46 

47class S3ReadWriteTestCaseBase(GenericReadWriteTestCase): 

48 """Tests of reading and writing S3 URIs.""" 

49 

50 scheme = "s3" 

51 s3_endpoint_url: str | None = None 

52 

53 def setUp(self): 

54 self.enterContext(clean_test_environment_for_s3()) 

55 

56 # Enable S3 mocking of tests. 

57 self.enterContext(mock_aws()) 

58 

59 # MOTO needs to know that we expect Bucket bucketname to exist 

60 s3 = boto3.resource("s3", endpoint_url=self.s3_endpoint_url) 

61 s3.create_bucket(Bucket=self.bucket) 

62 

63 super().setUp() 

64 

65 def tearDown(self): 

66 s3 = boto3.resource("s3") 

67 bucket = s3.Bucket(self.bucket) 

68 try: 

69 bucket.objects.all().delete() 

70 except botocore.exceptions.ClientError as e: 

71 if e.response["Error"]["Code"] == "404": 

72 # the key was not reachable - pass 

73 pass 

74 else: 

75 raise 

76 

77 bucket = s3.Bucket(self.bucket) 

78 bucket.delete() 

79 

80 S3ResourcePath.use_threads = None 

81 

82 super().tearDown() 

83 

84 def test_bucket_fail(self): 

85 # Deliberately create URI with unknown bucket. 

86 uri = ResourcePath("s3://badbucket/something/") 

87 

88 with self.assertRaises(ValueError): 

89 uri.mkdir() 

90 

91 with self.assertRaises(FileNotFoundError): 

92 uri.remove() 

93 

94 def test_transfer_progress(self): 

95 """Test progress bar reporting for upload and download.""" 

96 remote = self.root_uri.join("test.dat") 

97 remote.write(b"42") 

98 with ResourcePath.temporary_uri(suffix=".dat") as tmp: 

99 # Download from S3. 

100 with self.assertLogs("lsst.resources", level="DEBUG") as cm: 

101 tmp.transfer_from(remote, transfer="auto") 

102 self.assertRegex("".join(cm.output), r"test\.dat.*100\%") 

103 

104 # Upload to S3. 

105 with self.assertLogs("lsst.resources", level="DEBUG") as cm: 

106 remote.transfer_from(tmp, transfer="auto", overwrite=True) 

107 self.assertRegex("".join(cm.output), rf"{tmp.basename()}.*100\%") 

108 

109 def test_handle(self): 

110 remote = self.root_uri.join("test_handle.dat") 

111 with remote.open("wb") as handle: 

112 self.assertTrue(handle.writable()) 

113 # write 6 megabytes to make sure partial write work 

114 handle.write(6 * 1024 * 1024 * b"a") 

115 self.assertEqual(handle.tell(), 6 * 1024 * 1024) 

116 handle.flush() 

117 self.assertGreaterEqual(len(handle._multiPartUpload), 1) 

118 

119 # verify file can't be seeked back 

120 with self.assertRaises(OSError): 

121 handle.seek(0) 

122 

123 # write more bytes 

124 handle.write(1024 * b"c") 

125 

126 # seek back and overwrite 

127 handle.seek(6 * 1024 * 1024) 

128 handle.write(1024 * b"b") 

129 

130 with remote.open("rb") as handle: 

131 self.assertTrue(handle.readable()) 

132 # read the first 6 megabytes 

133 result = handle.read(6 * 1024 * 1024) 

134 self.assertEqual(result, 6 * 1024 * 1024 * b"a") 

135 self.assertEqual(handle.tell(), 6 * 1024 * 1024) 

136 # verify additional read gets the next part 

137 result = handle.read(1024) 

138 self.assertEqual(result, 1024 * b"b") 

139 # see back to the beginning to verify seeking 

140 handle.seek(0) 

141 result = handle.read(1024) 

142 self.assertEqual(result, 1024 * b"a") 

143 

144 def test_url_signing(self): 

145 self._test_url_signing_case("url-signing-test.txt", b"test123") 

146 # A zero byte presigned S3 HTTP URL is a weird edge case, because we 

147 # emulate HEAD requests using a 1-byte GET. 

148 self._test_url_signing_case("url-signing-test-zero-bytes.txt", b"") 

149 # Should be the same as a normal case, but check it for paranoia since 

150 # it's on the boundary of the read size. 

151 self._test_url_signing_case("url-signing-test-one-byte.txt", b"t") 

152 

153 def _test_url_signing_case(self, filename: str, test_data: bytes): 

154 s3_path = self.root_uri.join(filename) 

155 

156 put_url = s3_path.generate_presigned_put_url(expiration_time_seconds=1800) 

157 self._check_presigned_url(put_url, 1800) 

158 get_url = s3_path.generate_presigned_get_url(expiration_time_seconds=3600) 

159 self._check_presigned_url(get_url, 3600) 

160 

161 # Moto monkeypatches the 'requests' library to mock access to presigned 

162 # URLs, so we are able to use HttpResourcePath to access the URLs in 

163 # this test. 

164 ResourcePath(put_url).write(test_data) 

165 get_path = ResourcePath(get_url) 

166 retrieved = get_path.read() 

167 self.assertEqual(retrieved, test_data) 

168 self.assertTrue(get_path.exists()) 

169 self.assertEqual(get_path.size(), len(test_data)) 

170 

171 def test_nonexistent_presigned_url(self): 

172 s3_path = self.root_uri.join("this-is-a-missing-file.txt") 

173 get_url = s3_path.generate_presigned_get_url(expiration_time_seconds=3600) 

174 get_path = ResourcePath(get_url) 

175 # Check the HttpResourcePath implementation for presigned S3 urls. 

176 # Nothing has been uploaded to this URL, so it shouldn't exist. 

177 self.assertFalse(get_path.exists()) 

178 with self.assertRaises(FileNotFoundError): 

179 get_path.size() 

180 

181 def _check_presigned_url(self, url: str, expiration_time_seconds: int): 

182 parsed = urlparse(url) 

183 self.assertEqual(parsed.scheme, "https") 

184 

185 actual_expiration_timestamp = int(parse_qs(parsed.query)["Expires"][0]) 

186 current_time = int(time.time()) 

187 expected_expiration_timestamp = current_time + expiration_time_seconds 

188 # Allow some flex in the expiration time in case this test process goes 

189 # out to lunch for a while on a busy CI machine 

190 self.assertLessEqual(abs(expected_expiration_timestamp - actual_expiration_timestamp), 120) 

191 

192 def test_threading_true(self): 

193 with mock.patch.dict(os.environ, {"LSST_S3_USE_THREADS": "True"}): 

194 S3ResourcePath.use_threads = None 

195 test_resource_path = self.root_uri.join("test_file.dat") 

196 self.assertTrue(test_resource_path._transfer_config.use_threads) 

197 

198 def test_implicit_default_threading(self): 

199 S3ResourcePath.use_threads = None 

200 boto_default = signature(boto3.s3.transfer.TransferConfig).parameters["use_threads"].default 

201 test_resource_path = self.root_uri.join("test_file.dat") 

202 self.assertEqual(test_resource_path._transfer_config.use_threads, boto_default) 

203 

204 def test_explicit_default_threading(self): 

205 with mock.patch.dict(os.environ, {"LSST_S3_USE_THREADS": "None"}): 

206 S3ResourcePath.use_threads = None 

207 boto_default = signature(boto3.s3.transfer.TransferConfig).parameters["use_threads"].default 

208 test_resource_path = self.root_uri.join("test_file.dat") 

209 self.assertEqual(test_resource_path._transfer_config.use_threads, boto_default) 

210 

211 def test_threading_false(self): 

212 with mock.patch.dict(os.environ, {"LSST_S3_USE_THREADS": "False"}): 

213 S3ResourcePath.use_threads = None 

214 test_resource_path = self.root_uri.join("test_file.dat") 

215 self.assertFalse(test_resource_path._transfer_config.use_threads) 

216 

217 self.test_local() 

218 

219 

220@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!") 

221class S3ReadWriteTestCase(S3ReadWriteTestCaseBase, unittest.TestCase): 

222 """Test S3 with no explicit profile/endpoint specified. 

223 (``s3://bucketname/...``). 

224 """ 

225 

226 bucket = "my_2nd_bucket" 

227 netloc = bucket 

228 

229 

230@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!") 

231class S3WithProfileReadWriteTestCase(S3ReadWriteTestCaseBase, unittest.TestCase): 

232 """Test S3 URLs with explicit profile specified. 

233 (``s3://profile@bucketname/...``). 

234 """ 

235 

236 bucket = "3rd_bucket" 

237 netloc = f"myprofile@{bucket}" 

238 s3_endpoint_url = "https://endpoint1.test.example" 

239 

240 def setUp(self): 

241 # Configure custom S3 endpoints that we can target from tests using 

242 # non-default profile. 

243 self.enterContext( 

244 mock.patch.dict( 

245 os.environ, 

246 { 

247 "MOTO_S3_CUSTOM_ENDPOINTS": self.s3_endpoint_url, 

248 "LSST_RESOURCES_S3_PROFILE_myprofile": "https://access_key:security_key@endpoint1.test.example", 

249 }, 

250 ) 

251 ) 

252 

253 super().setUp() 

254 

255 def test_missing_profile(self): 

256 with self.assertRaises(botocore.exceptions.ProfileNotFound): 

257 ResourcePath("s3://otherprofile@bucket").read() 

258 

259 def test_s3_endpoint_url(self): 

260 with mock.patch.dict( 

261 os.environ, 

262 {"S3_ENDPOINT_URL": self.s3_endpoint_url}, 

263 ): 

264 path = ResourcePath(f"s3://{self.bucket}/test-s3-endpoint-url.txt") 

265 data = b"123" 

266 path.write(data) 

267 self.assertEqual(path.read(), data) 

268 self.assertIn( 

269 "https://endpoint1.test.example", 

270 path.generate_presigned_get_url(expiration_time_seconds=3600), 

271 ) 

272 

273 def test_uri_syntax(self): 

274 path1 = ResourcePath("s3://profile@bucket/path") 

275 self.assertEqual(path1._bucket, "bucket") 

276 self.assertEqual(path1._profile, "profile") 

277 path2 = ResourcePath("s3://bucket2/path") 

278 self.assertEqual(path2._bucket, "bucket2") 

279 self.assertIsNone(path2._profile) 

280 

281 def test_ceph_uri_syntax(self): 

282 # The Ceph S3 'multi-tenant' syntax for buckets can include colons. 

283 path1 = ResourcePath("s3://profile@ceph:bucket/path") 

284 self.assertEqual(path1._bucket, "ceph:bucket") 

285 self.assertEqual(path1._profile, "profile") 

286 path2 = ResourcePath("s3://ceph:bucket2/path") 

287 self.assertEqual(path2._bucket, "ceph:bucket2") 

288 self.assertIsNone(path2._profile) 

289 

290 

291if __name__ == "__main__": 

292 unittest.main()