Coverage for tests/test_s3.py: 24%

125 statements  

« prev     ^ index     » next       coverage.py v7.4.0, created at 2024-01-17 10:49 +0000

1# This file is part of lsst-resources. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# Use of this source code is governed by a 3-clause BSD-style 

10# license that can be found in the LICENSE file. 

11 

12import os 

13import time 

14import unittest 

15from inspect import signature 

16from unittest import mock 

17from urllib.parse import parse_qs, urlparse 

18 

19from lsst.resources import ResourcePath 

20from lsst.resources.s3 import S3ResourcePath 

21from lsst.resources.s3utils import clean_test_environment_for_s3 

22from lsst.resources.tests import GenericReadWriteTestCase, GenericTestCase 

23 

24try: 

25 import boto3 

26 import botocore 

27 from moto import mock_s3 

28except ImportError: 

29 boto3 = None 

30 

31 def mock_s3(cls): 

32 """No-op decorator in case moto mock_s3 can not be imported.""" 

33 return cls 

34 

35 

36class GenericS3TestCase(GenericTestCase, unittest.TestCase): 

37 """Generic tests of S3 URIs.""" 

38 

39 scheme = "s3" 

40 netloc = "my_bucket" 

41 

42 

43@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!") 

44class S3ReadWriteTestCase(GenericReadWriteTestCase, unittest.TestCase): 

45 """Tests of reading and writing S3 URIs.""" 

46 

47 scheme = "s3" 

48 netloc = "my_2nd_bucket" 

49 

50 mock_s3 = mock_s3() 

51 """The mocked s3 interface from moto.""" 

52 

53 def setUp(self): 

54 self.enterContext(clean_test_environment_for_s3()) 

55 # Enable S3 mocking of tests. 

56 self.mock_s3.start() 

57 

58 # MOTO needs to know that we expect Bucket bucketname to exist 

59 s3 = boto3.resource("s3") 

60 s3.create_bucket(Bucket=self.netloc) 

61 

62 super().setUp() 

63 

64 def tearDown(self): 

65 s3 = boto3.resource("s3") 

66 bucket = s3.Bucket(self.netloc) 

67 try: 

68 bucket.objects.all().delete() 

69 except botocore.exceptions.ClientError as e: 

70 if e.response["Error"]["Code"] == "404": 

71 # the key was not reachable - pass 

72 pass 

73 else: 

74 raise 

75 

76 bucket = s3.Bucket(self.netloc) 

77 bucket.delete() 

78 

79 # Stop the S3 mock. 

80 self.mock_s3.stop() 

81 

82 S3ResourcePath.use_threads = None 

83 

84 super().tearDown() 

85 

86 def test_bucket_fail(self): 

87 # Deliberately create URI with unknown bucket. 

88 uri = ResourcePath("s3://badbucket/something/") 

89 

90 with self.assertRaises(ValueError): 

91 uri.mkdir() 

92 

93 with self.assertRaises(FileNotFoundError): 

94 uri.remove() 

95 

96 def test_transfer_progress(self): 

97 """Test progress bar reporting for upload and download.""" 

98 remote = self.root_uri.join("test.dat") 

99 remote.write(b"42") 

100 with ResourcePath.temporary_uri(suffix=".dat") as tmp: 

101 # Download from S3. 

102 with self.assertLogs("lsst.resources", level="DEBUG") as cm: 

103 tmp.transfer_from(remote, transfer="auto") 

104 self.assertRegex("".join(cm.output), r"test\.dat.*100\%") 

105 

106 # Upload to S3. 

107 with self.assertLogs("lsst.resources", level="DEBUG") as cm: 

108 remote.transfer_from(tmp, transfer="auto", overwrite=True) 

109 self.assertRegex("".join(cm.output), rf"{tmp.basename()}.*100\%") 

110 

111 def test_handle(self): 

112 remote = self.root_uri.join("test_handle.dat") 

113 with remote.open("wb") as handle: 

114 self.assertTrue(handle.writable()) 

115 # write 6 megabytes to make sure partial write work 

116 handle.write(6 * 1024 * 1024 * b"a") 

117 self.assertEqual(handle.tell(), 6 * 1024 * 1024) 

118 handle.flush() 

119 self.assertGreaterEqual(len(handle._multiPartUpload), 1) 

120 

121 # verify file can't be seeked back 

122 with self.assertRaises(OSError): 

123 handle.seek(0) 

124 

125 # write more bytes 

126 handle.write(1024 * b"c") 

127 

128 # seek back and overwrite 

129 handle.seek(6 * 1024 * 1024) 

130 handle.write(1024 * b"b") 

131 

132 with remote.open("rb") as handle: 

133 self.assertTrue(handle.readable()) 

134 # read the first 6 megabytes 

135 result = handle.read(6 * 1024 * 1024) 

136 self.assertEqual(result, 6 * 1024 * 1024 * b"a") 

137 self.assertEqual(handle.tell(), 6 * 1024 * 1024) 

138 # verify additional read gets the next part 

139 result = handle.read(1024) 

140 self.assertEqual(result, 1024 * b"b") 

141 # see back to the beginning to verify seeking 

142 handle.seek(0) 

143 result = handle.read(1024) 

144 self.assertEqual(result, 1024 * b"a") 

145 

146 def test_url_signing(self): 

147 s3_path = self.root_uri.join("url-signing-test.txt") 

148 

149 put_url = s3_path.generate_presigned_put_url(expiration_time_seconds=1800) 

150 self._check_presigned_url(put_url, 1800) 

151 get_url = s3_path.generate_presigned_get_url(expiration_time_seconds=3600) 

152 self._check_presigned_url(get_url, 3600) 

153 

154 # Moto monkeypatches the 'requests' library to mock access to presigned 

155 # URLs, so we are able to use HttpResourcePath to access the URLs in 

156 # this test 

157 test_data = b"test123" 

158 ResourcePath(put_url).write(test_data) 

159 retrieved = ResourcePath(get_url).read() 

160 self.assertEqual(retrieved, test_data) 

161 

162 def _check_presigned_url(self, url: str, expiration_time_seconds: int): 

163 parsed = urlparse(url) 

164 self.assertEqual(parsed.scheme, "https") 

165 

166 actual_expiration_timestamp = int(parse_qs(parsed.query)["Expires"][0]) 

167 current_time = int(time.time()) 

168 expected_expiration_timestamp = current_time + expiration_time_seconds 

169 # Allow some flex in the expiration time in case this test process goes 

170 # out to lunch for a while on a busy CI machine 

171 self.assertLessEqual(abs(expected_expiration_timestamp - actual_expiration_timestamp), 120) 

172 

173 def test_threading_true(self): 

174 with mock.patch.dict(os.environ, {"LSST_S3_USE_THREADS": "True"}): 

175 S3ResourcePath.use_threads = None 

176 test_resource_path = self.root_uri.join("test_file.dat") 

177 self.assertTrue(test_resource_path._transfer_config.use_threads) 

178 

179 def test_implicit_default_threading(self): 

180 S3ResourcePath.use_threads = None 

181 boto_default = signature(boto3.s3.transfer.TransferConfig).parameters["use_threads"].default 

182 test_resource_path = self.root_uri.join("test_file.dat") 

183 self.assertEqual(test_resource_path._transfer_config.use_threads, boto_default) 

184 

185 def test_explicit_default_threading(self): 

186 with mock.patch.dict(os.environ, {"LSST_S3_USE_THREADS": "None"}): 

187 S3ResourcePath.use_threads = None 

188 boto_default = signature(boto3.s3.transfer.TransferConfig).parameters["use_threads"].default 

189 test_resource_path = self.root_uri.join("test_file.dat") 

190 self.assertEqual(test_resource_path._transfer_config.use_threads, boto_default) 

191 

192 def test_threading_false(self): 

193 with mock.patch.dict(os.environ, {"LSST_S3_USE_THREADS": "False"}): 

194 S3ResourcePath.use_threads = None 

195 test_resource_path = self.root_uri.join("test_file.dat") 

196 self.assertFalse(test_resource_path._transfer_config.use_threads) 

197 

198 self.test_local() 

199 

200 

201if __name__ == "__main__": 

202 unittest.main()