Coverage for tests/test_s3.py: 23%

138 statements  

« prev     ^ index     » next       coverage.py v7.4.0, created at 2024-01-23 10:46 +0000

1# This file is part of lsst-resources. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# Use of this source code is governed by a 3-clause BSD-style 

10# license that can be found in the LICENSE file. 

11 

12import os 

13import time 

14import unittest 

15from inspect import signature 

16from unittest import mock 

17from urllib.parse import parse_qs, urlparse 

18 

19from lsst.resources import ResourcePath 

20from lsst.resources.s3 import S3ResourcePath 

21from lsst.resources.s3utils import clean_test_environment_for_s3 

22from lsst.resources.tests import GenericReadWriteTestCase, GenericTestCase 

23 

24try: 

25 import boto3 

26 import botocore 

27 from moto import mock_s3 

28except ImportError: 

29 boto3 = None 

30 

31 def mock_s3(cls): 

32 """No-op decorator in case moto mock_s3 can not be imported.""" 

33 return cls 

34 

35 

36class GenericS3TestCase(GenericTestCase, unittest.TestCase): 

37 """Generic tests of S3 URIs.""" 

38 

39 scheme = "s3" 

40 netloc = "my_bucket" 

41 

42 

43@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!") 

44class S3ReadWriteTestCase(GenericReadWriteTestCase, unittest.TestCase): 

45 """Tests of reading and writing S3 URIs.""" 

46 

47 scheme = "s3" 

48 netloc = "my_2nd_bucket" 

49 

50 mock_s3 = mock_s3() 

51 """The mocked s3 interface from moto.""" 

52 

53 def setUp(self): 

54 self.enterContext(clean_test_environment_for_s3()) 

55 # Enable S3 mocking of tests. 

56 self.mock_s3.start() 

57 

58 # MOTO needs to know that we expect Bucket bucketname to exist 

59 s3 = boto3.resource("s3") 

60 s3.create_bucket(Bucket=self.netloc) 

61 

62 super().setUp() 

63 

64 def tearDown(self): 

65 s3 = boto3.resource("s3") 

66 bucket = s3.Bucket(self.netloc) 

67 try: 

68 bucket.objects.all().delete() 

69 except botocore.exceptions.ClientError as e: 

70 if e.response["Error"]["Code"] == "404": 

71 # the key was not reachable - pass 

72 pass 

73 else: 

74 raise 

75 

76 bucket = s3.Bucket(self.netloc) 

77 bucket.delete() 

78 

79 # Stop the S3 mock. 

80 self.mock_s3.stop() 

81 

82 S3ResourcePath.use_threads = None 

83 

84 super().tearDown() 

85 

86 def test_bucket_fail(self): 

87 # Deliberately create URI with unknown bucket. 

88 uri = ResourcePath("s3://badbucket/something/") 

89 

90 with self.assertRaises(ValueError): 

91 uri.mkdir() 

92 

93 with self.assertRaises(FileNotFoundError): 

94 uri.remove() 

95 

96 def test_transfer_progress(self): 

97 """Test progress bar reporting for upload and download.""" 

98 remote = self.root_uri.join("test.dat") 

99 remote.write(b"42") 

100 with ResourcePath.temporary_uri(suffix=".dat") as tmp: 

101 # Download from S3. 

102 with self.assertLogs("lsst.resources", level="DEBUG") as cm: 

103 tmp.transfer_from(remote, transfer="auto") 

104 self.assertRegex("".join(cm.output), r"test\.dat.*100\%") 

105 

106 # Upload to S3. 

107 with self.assertLogs("lsst.resources", level="DEBUG") as cm: 

108 remote.transfer_from(tmp, transfer="auto", overwrite=True) 

109 self.assertRegex("".join(cm.output), rf"{tmp.basename()}.*100\%") 

110 

111 def test_handle(self): 

112 remote = self.root_uri.join("test_handle.dat") 

113 with remote.open("wb") as handle: 

114 self.assertTrue(handle.writable()) 

115 # write 6 megabytes to make sure partial write work 

116 handle.write(6 * 1024 * 1024 * b"a") 

117 self.assertEqual(handle.tell(), 6 * 1024 * 1024) 

118 handle.flush() 

119 self.assertGreaterEqual(len(handle._multiPartUpload), 1) 

120 

121 # verify file can't be seeked back 

122 with self.assertRaises(OSError): 

123 handle.seek(0) 

124 

125 # write more bytes 

126 handle.write(1024 * b"c") 

127 

128 # seek back and overwrite 

129 handle.seek(6 * 1024 * 1024) 

130 handle.write(1024 * b"b") 

131 

132 with remote.open("rb") as handle: 

133 self.assertTrue(handle.readable()) 

134 # read the first 6 megabytes 

135 result = handle.read(6 * 1024 * 1024) 

136 self.assertEqual(result, 6 * 1024 * 1024 * b"a") 

137 self.assertEqual(handle.tell(), 6 * 1024 * 1024) 

138 # verify additional read gets the next part 

139 result = handle.read(1024) 

140 self.assertEqual(result, 1024 * b"b") 

141 # see back to the beginning to verify seeking 

142 handle.seek(0) 

143 result = handle.read(1024) 

144 self.assertEqual(result, 1024 * b"a") 

145 

146 def test_url_signing(self): 

147 self._test_url_signing_case("url-signing-test.txt", b"test123") 

148 # A zero byte presigned S3 HTTP URL is a weird edge case, because we 

149 # emulate HEAD requests using a 1-byte GET. 

150 self._test_url_signing_case("url-signing-test-zero-bytes.txt", b"") 

151 # Should be the same as a normal case, but check it for paranoia since 

152 # it's on the boundary of the read size. 

153 self._test_url_signing_case("url-signing-test-one-byte.txt", b"t") 

154 

155 def _test_url_signing_case(self, filename: str, test_data: bytes): 

156 s3_path = self.root_uri.join(filename) 

157 

158 put_url = s3_path.generate_presigned_put_url(expiration_time_seconds=1800) 

159 self._check_presigned_url(put_url, 1800) 

160 get_url = s3_path.generate_presigned_get_url(expiration_time_seconds=3600) 

161 self._check_presigned_url(get_url, 3600) 

162 

163 # Moto monkeypatches the 'requests' library to mock access to presigned 

164 # URLs, so we are able to use HttpResourcePath to access the URLs in 

165 # this test. 

166 ResourcePath(put_url).write(test_data) 

167 get_path = ResourcePath(get_url) 

168 retrieved = get_path.read() 

169 self.assertEqual(retrieved, test_data) 

170 self.assertTrue(get_path.exists()) 

171 self.assertEqual(get_path.size(), len(test_data)) 

172 

173 def test_nonexistent_presigned_url(self): 

174 s3_path = self.root_uri.join("this-is-a-missing-file.txt") 

175 get_url = s3_path.generate_presigned_get_url(expiration_time_seconds=3600) 

176 get_path = ResourcePath(get_url) 

177 # Check the HttpResourcePath implementation for presigned S3 urls. 

178 # Nothing has been uploaded to this URL, so it shouldn't exist. 

179 self.assertFalse(get_path.exists()) 

180 with self.assertRaises(FileNotFoundError): 

181 get_path.size() 

182 

183 def _check_presigned_url(self, url: str, expiration_time_seconds: int): 

184 parsed = urlparse(url) 

185 self.assertEqual(parsed.scheme, "https") 

186 

187 actual_expiration_timestamp = int(parse_qs(parsed.query)["Expires"][0]) 

188 current_time = int(time.time()) 

189 expected_expiration_timestamp = current_time + expiration_time_seconds 

190 # Allow some flex in the expiration time in case this test process goes 

191 # out to lunch for a while on a busy CI machine 

192 self.assertLessEqual(abs(expected_expiration_timestamp - actual_expiration_timestamp), 120) 

193 

194 def test_threading_true(self): 

195 with mock.patch.dict(os.environ, {"LSST_S3_USE_THREADS": "True"}): 

196 S3ResourcePath.use_threads = None 

197 test_resource_path = self.root_uri.join("test_file.dat") 

198 self.assertTrue(test_resource_path._transfer_config.use_threads) 

199 

200 def test_implicit_default_threading(self): 

201 S3ResourcePath.use_threads = None 

202 boto_default = signature(boto3.s3.transfer.TransferConfig).parameters["use_threads"].default 

203 test_resource_path = self.root_uri.join("test_file.dat") 

204 self.assertEqual(test_resource_path._transfer_config.use_threads, boto_default) 

205 

206 def test_explicit_default_threading(self): 

207 with mock.patch.dict(os.environ, {"LSST_S3_USE_THREADS": "None"}): 

208 S3ResourcePath.use_threads = None 

209 boto_default = signature(boto3.s3.transfer.TransferConfig).parameters["use_threads"].default 

210 test_resource_path = self.root_uri.join("test_file.dat") 

211 self.assertEqual(test_resource_path._transfer_config.use_threads, boto_default) 

212 

213 def test_threading_false(self): 

214 with mock.patch.dict(os.environ, {"LSST_S3_USE_THREADS": "False"}): 

215 S3ResourcePath.use_threads = None 

216 test_resource_path = self.root_uri.join("test_file.dat") 

217 self.assertFalse(test_resource_path._transfer_config.use_threads) 

218 

219 self.test_local() 

220 

221 

222if __name__ == "__main__": 

223 unittest.main()