Coverage for tests/test_s3.py: 24%

141 statements  

« prev     ^ index     » next       coverage.py v7.4.1, created at 2024-02-01 11:14 +0000

1# This file is part of lsst-resources. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# Use of this source code is governed by a 3-clause BSD-style 

10# license that can be found in the LICENSE file. 

11 

12import os 

13import time 

14import unittest 

15from inspect import signature 

16from unittest import mock 

17from urllib.parse import parse_qs, urlparse 

18 

19from lsst.resources import ResourcePath 

20from lsst.resources.s3 import S3ResourcePath 

21from lsst.resources.s3utils import clean_test_environment_for_s3 

22from lsst.resources.tests import GenericReadWriteTestCase, GenericTestCase 

23 

24try: 

25 import boto3 

26 import botocore 

27 

28 try: 

29 from moto import mock_aws # v5 

30 except ImportError: 

31 from moto import mock_s3 as mock_aws 

32except ImportError: 

33 boto3 = None 

34 

35 def mock_aws(cls): 

36 """No-op decorator in case moto mock_aws can not be imported.""" 

37 return cls 

38 

39 

40class GenericS3TestCase(GenericTestCase, unittest.TestCase): 

41 """Generic tests of S3 URIs.""" 

42 

43 scheme = "s3" 

44 netloc = "my_bucket" 

45 

46 

47@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!") 

48class S3ReadWriteTestCase(GenericReadWriteTestCase, unittest.TestCase): 

49 """Tests of reading and writing S3 URIs.""" 

50 

51 scheme = "s3" 

52 netloc = "my_2nd_bucket" 

53 

54 mock_aws = mock_aws() 

55 """The mocked s3 interface from moto.""" 

56 

57 def setUp(self): 

58 self.enterContext(clean_test_environment_for_s3()) 

59 # Enable S3 mocking of tests. 

60 self.mock_aws.start() 

61 

62 # MOTO needs to know that we expect Bucket bucketname to exist 

63 s3 = boto3.resource("s3") 

64 s3.create_bucket(Bucket=self.netloc) 

65 

66 super().setUp() 

67 

68 def tearDown(self): 

69 s3 = boto3.resource("s3") 

70 bucket = s3.Bucket(self.netloc) 

71 try: 

72 bucket.objects.all().delete() 

73 except botocore.exceptions.ClientError as e: 

74 if e.response["Error"]["Code"] == "404": 

75 # the key was not reachable - pass 

76 pass 

77 else: 

78 raise 

79 

80 bucket = s3.Bucket(self.netloc) 

81 bucket.delete() 

82 

83 # Stop the S3 mock. 

84 self.mock_aws.stop() 

85 

86 S3ResourcePath.use_threads = None 

87 

88 super().tearDown() 

89 

90 def test_bucket_fail(self): 

91 # Deliberately create URI with unknown bucket. 

92 uri = ResourcePath("s3://badbucket/something/") 

93 

94 with self.assertRaises(ValueError): 

95 uri.mkdir() 

96 

97 with self.assertRaises(FileNotFoundError): 

98 uri.remove() 

99 

100 def test_transfer_progress(self): 

101 """Test progress bar reporting for upload and download.""" 

102 remote = self.root_uri.join("test.dat") 

103 remote.write(b"42") 

104 with ResourcePath.temporary_uri(suffix=".dat") as tmp: 

105 # Download from S3. 

106 with self.assertLogs("lsst.resources", level="DEBUG") as cm: 

107 tmp.transfer_from(remote, transfer="auto") 

108 self.assertRegex("".join(cm.output), r"test\.dat.*100\%") 

109 

110 # Upload to S3. 

111 with self.assertLogs("lsst.resources", level="DEBUG") as cm: 

112 remote.transfer_from(tmp, transfer="auto", overwrite=True) 

113 self.assertRegex("".join(cm.output), rf"{tmp.basename()}.*100\%") 

114 

115 def test_handle(self): 

116 remote = self.root_uri.join("test_handle.dat") 

117 with remote.open("wb") as handle: 

118 self.assertTrue(handle.writable()) 

119 # write 6 megabytes to make sure partial write work 

120 handle.write(6 * 1024 * 1024 * b"a") 

121 self.assertEqual(handle.tell(), 6 * 1024 * 1024) 

122 handle.flush() 

123 self.assertGreaterEqual(len(handle._multiPartUpload), 1) 

124 

125 # verify file can't be seeked back 

126 with self.assertRaises(OSError): 

127 handle.seek(0) 

128 

129 # write more bytes 

130 handle.write(1024 * b"c") 

131 

132 # seek back and overwrite 

133 handle.seek(6 * 1024 * 1024) 

134 handle.write(1024 * b"b") 

135 

136 with remote.open("rb") as handle: 

137 self.assertTrue(handle.readable()) 

138 # read the first 6 megabytes 

139 result = handle.read(6 * 1024 * 1024) 

140 self.assertEqual(result, 6 * 1024 * 1024 * b"a") 

141 self.assertEqual(handle.tell(), 6 * 1024 * 1024) 

142 # verify additional read gets the next part 

143 result = handle.read(1024) 

144 self.assertEqual(result, 1024 * b"b") 

145 # see back to the beginning to verify seeking 

146 handle.seek(0) 

147 result = handle.read(1024) 

148 self.assertEqual(result, 1024 * b"a") 

149 

150 def test_url_signing(self): 

151 self._test_url_signing_case("url-signing-test.txt", b"test123") 

152 # A zero byte presigned S3 HTTP URL is a weird edge case, because we 

153 # emulate HEAD requests using a 1-byte GET. 

154 self._test_url_signing_case("url-signing-test-zero-bytes.txt", b"") 

155 # Should be the same as a normal case, but check it for paranoia since 

156 # it's on the boundary of the read size. 

157 self._test_url_signing_case("url-signing-test-one-byte.txt", b"t") 

158 

159 def _test_url_signing_case(self, filename: str, test_data: bytes): 

160 s3_path = self.root_uri.join(filename) 

161 

162 put_url = s3_path.generate_presigned_put_url(expiration_time_seconds=1800) 

163 self._check_presigned_url(put_url, 1800) 

164 get_url = s3_path.generate_presigned_get_url(expiration_time_seconds=3600) 

165 self._check_presigned_url(get_url, 3600) 

166 

167 # Moto monkeypatches the 'requests' library to mock access to presigned 

168 # URLs, so we are able to use HttpResourcePath to access the URLs in 

169 # this test. 

170 ResourcePath(put_url).write(test_data) 

171 get_path = ResourcePath(get_url) 

172 retrieved = get_path.read() 

173 self.assertEqual(retrieved, test_data) 

174 self.assertTrue(get_path.exists()) 

175 self.assertEqual(get_path.size(), len(test_data)) 

176 

177 def test_nonexistent_presigned_url(self): 

178 s3_path = self.root_uri.join("this-is-a-missing-file.txt") 

179 get_url = s3_path.generate_presigned_get_url(expiration_time_seconds=3600) 

180 get_path = ResourcePath(get_url) 

181 # Check the HttpResourcePath implementation for presigned S3 urls. 

182 # Nothing has been uploaded to this URL, so it shouldn't exist. 

183 self.assertFalse(get_path.exists()) 

184 with self.assertRaises(FileNotFoundError): 

185 get_path.size() 

186 

187 def _check_presigned_url(self, url: str, expiration_time_seconds: int): 

188 parsed = urlparse(url) 

189 self.assertEqual(parsed.scheme, "https") 

190 

191 actual_expiration_timestamp = int(parse_qs(parsed.query)["Expires"][0]) 

192 current_time = int(time.time()) 

193 expected_expiration_timestamp = current_time + expiration_time_seconds 

194 # Allow some flex in the expiration time in case this test process goes 

195 # out to lunch for a while on a busy CI machine 

196 self.assertLessEqual(abs(expected_expiration_timestamp - actual_expiration_timestamp), 120) 

197 

198 def test_threading_true(self): 

199 with mock.patch.dict(os.environ, {"LSST_S3_USE_THREADS": "True"}): 

200 S3ResourcePath.use_threads = None 

201 test_resource_path = self.root_uri.join("test_file.dat") 

202 self.assertTrue(test_resource_path._transfer_config.use_threads) 

203 

204 def test_implicit_default_threading(self): 

205 S3ResourcePath.use_threads = None 

206 boto_default = signature(boto3.s3.transfer.TransferConfig).parameters["use_threads"].default 

207 test_resource_path = self.root_uri.join("test_file.dat") 

208 self.assertEqual(test_resource_path._transfer_config.use_threads, boto_default) 

209 

210 def test_explicit_default_threading(self): 

211 with mock.patch.dict(os.environ, {"LSST_S3_USE_THREADS": "None"}): 

212 S3ResourcePath.use_threads = None 

213 boto_default = signature(boto3.s3.transfer.TransferConfig).parameters["use_threads"].default 

214 test_resource_path = self.root_uri.join("test_file.dat") 

215 self.assertEqual(test_resource_path._transfer_config.use_threads, boto_default) 

216 

217 def test_threading_false(self): 

218 with mock.patch.dict(os.environ, {"LSST_S3_USE_THREADS": "False"}): 

219 S3ResourcePath.use_threads = None 

220 test_resource_path = self.root_uri.join("test_file.dat") 

221 self.assertFalse(test_resource_path._transfer_config.use_threads) 

222 

223 self.test_local() 

224 

225 

226if __name__ == "__main__": 

227 unittest.main()