Coverage for tests/test_s3.py: 23%
138 statements
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-23 10:46 +0000
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-23 10:46 +0000
1# This file is part of lsst-resources.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# Use of this source code is governed by a 3-clause BSD-style
10# license that can be found in the LICENSE file.
12import os
13import time
14import unittest
15from inspect import signature
16from unittest import mock
17from urllib.parse import parse_qs, urlparse
19from lsst.resources import ResourcePath
20from lsst.resources.s3 import S3ResourcePath
21from lsst.resources.s3utils import clean_test_environment_for_s3
22from lsst.resources.tests import GenericReadWriteTestCase, GenericTestCase
24try:
25 import boto3
26 import botocore
27 from moto import mock_s3
28except ImportError:
29 boto3 = None
31 def mock_s3(cls):
32 """No-op decorator in case moto mock_s3 can not be imported."""
33 return cls
36class GenericS3TestCase(GenericTestCase, unittest.TestCase):
37 """Generic tests of S3 URIs."""
39 scheme = "s3"
40 netloc = "my_bucket"
43@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!")
44class S3ReadWriteTestCase(GenericReadWriteTestCase, unittest.TestCase):
45 """Tests of reading and writing S3 URIs."""
47 scheme = "s3"
48 netloc = "my_2nd_bucket"
50 mock_s3 = mock_s3()
51 """The mocked s3 interface from moto."""
53 def setUp(self):
54 self.enterContext(clean_test_environment_for_s3())
55 # Enable S3 mocking of tests.
56 self.mock_s3.start()
58 # MOTO needs to know that we expect Bucket bucketname to exist
59 s3 = boto3.resource("s3")
60 s3.create_bucket(Bucket=self.netloc)
62 super().setUp()
64 def tearDown(self):
65 s3 = boto3.resource("s3")
66 bucket = s3.Bucket(self.netloc)
67 try:
68 bucket.objects.all().delete()
69 except botocore.exceptions.ClientError as e:
70 if e.response["Error"]["Code"] == "404":
71 # the key was not reachable - pass
72 pass
73 else:
74 raise
76 bucket = s3.Bucket(self.netloc)
77 bucket.delete()
79 # Stop the S3 mock.
80 self.mock_s3.stop()
82 S3ResourcePath.use_threads = None
84 super().tearDown()
86 def test_bucket_fail(self):
87 # Deliberately create URI with unknown bucket.
88 uri = ResourcePath("s3://badbucket/something/")
90 with self.assertRaises(ValueError):
91 uri.mkdir()
93 with self.assertRaises(FileNotFoundError):
94 uri.remove()
96 def test_transfer_progress(self):
97 """Test progress bar reporting for upload and download."""
98 remote = self.root_uri.join("test.dat")
99 remote.write(b"42")
100 with ResourcePath.temporary_uri(suffix=".dat") as tmp:
101 # Download from S3.
102 with self.assertLogs("lsst.resources", level="DEBUG") as cm:
103 tmp.transfer_from(remote, transfer="auto")
104 self.assertRegex("".join(cm.output), r"test\.dat.*100\%")
106 # Upload to S3.
107 with self.assertLogs("lsst.resources", level="DEBUG") as cm:
108 remote.transfer_from(tmp, transfer="auto", overwrite=True)
109 self.assertRegex("".join(cm.output), rf"{tmp.basename()}.*100\%")
111 def test_handle(self):
112 remote = self.root_uri.join("test_handle.dat")
113 with remote.open("wb") as handle:
114 self.assertTrue(handle.writable())
115 # write 6 megabytes to make sure partial write work
116 handle.write(6 * 1024 * 1024 * b"a")
117 self.assertEqual(handle.tell(), 6 * 1024 * 1024)
118 handle.flush()
119 self.assertGreaterEqual(len(handle._multiPartUpload), 1)
121 # verify file can't be seeked back
122 with self.assertRaises(OSError):
123 handle.seek(0)
125 # write more bytes
126 handle.write(1024 * b"c")
128 # seek back and overwrite
129 handle.seek(6 * 1024 * 1024)
130 handle.write(1024 * b"b")
132 with remote.open("rb") as handle:
133 self.assertTrue(handle.readable())
134 # read the first 6 megabytes
135 result = handle.read(6 * 1024 * 1024)
136 self.assertEqual(result, 6 * 1024 * 1024 * b"a")
137 self.assertEqual(handle.tell(), 6 * 1024 * 1024)
138 # verify additional read gets the next part
139 result = handle.read(1024)
140 self.assertEqual(result, 1024 * b"b")
141 # see back to the beginning to verify seeking
142 handle.seek(0)
143 result = handle.read(1024)
144 self.assertEqual(result, 1024 * b"a")
146 def test_url_signing(self):
147 self._test_url_signing_case("url-signing-test.txt", b"test123")
148 # A zero byte presigned S3 HTTP URL is a weird edge case, because we
149 # emulate HEAD requests using a 1-byte GET.
150 self._test_url_signing_case("url-signing-test-zero-bytes.txt", b"")
151 # Should be the same as a normal case, but check it for paranoia since
152 # it's on the boundary of the read size.
153 self._test_url_signing_case("url-signing-test-one-byte.txt", b"t")
155 def _test_url_signing_case(self, filename: str, test_data: bytes):
156 s3_path = self.root_uri.join(filename)
158 put_url = s3_path.generate_presigned_put_url(expiration_time_seconds=1800)
159 self._check_presigned_url(put_url, 1800)
160 get_url = s3_path.generate_presigned_get_url(expiration_time_seconds=3600)
161 self._check_presigned_url(get_url, 3600)
163 # Moto monkeypatches the 'requests' library to mock access to presigned
164 # URLs, so we are able to use HttpResourcePath to access the URLs in
165 # this test.
166 ResourcePath(put_url).write(test_data)
167 get_path = ResourcePath(get_url)
168 retrieved = get_path.read()
169 self.assertEqual(retrieved, test_data)
170 self.assertTrue(get_path.exists())
171 self.assertEqual(get_path.size(), len(test_data))
173 def test_nonexistent_presigned_url(self):
174 s3_path = self.root_uri.join("this-is-a-missing-file.txt")
175 get_url = s3_path.generate_presigned_get_url(expiration_time_seconds=3600)
176 get_path = ResourcePath(get_url)
177 # Check the HttpResourcePath implementation for presigned S3 urls.
178 # Nothing has been uploaded to this URL, so it shouldn't exist.
179 self.assertFalse(get_path.exists())
180 with self.assertRaises(FileNotFoundError):
181 get_path.size()
183 def _check_presigned_url(self, url: str, expiration_time_seconds: int):
184 parsed = urlparse(url)
185 self.assertEqual(parsed.scheme, "https")
187 actual_expiration_timestamp = int(parse_qs(parsed.query)["Expires"][0])
188 current_time = int(time.time())
189 expected_expiration_timestamp = current_time + expiration_time_seconds
190 # Allow some flex in the expiration time in case this test process goes
191 # out to lunch for a while on a busy CI machine
192 self.assertLessEqual(abs(expected_expiration_timestamp - actual_expiration_timestamp), 120)
194 def test_threading_true(self):
195 with mock.patch.dict(os.environ, {"LSST_S3_USE_THREADS": "True"}):
196 S3ResourcePath.use_threads = None
197 test_resource_path = self.root_uri.join("test_file.dat")
198 self.assertTrue(test_resource_path._transfer_config.use_threads)
200 def test_implicit_default_threading(self):
201 S3ResourcePath.use_threads = None
202 boto_default = signature(boto3.s3.transfer.TransferConfig).parameters["use_threads"].default
203 test_resource_path = self.root_uri.join("test_file.dat")
204 self.assertEqual(test_resource_path._transfer_config.use_threads, boto_default)
206 def test_explicit_default_threading(self):
207 with mock.patch.dict(os.environ, {"LSST_S3_USE_THREADS": "None"}):
208 S3ResourcePath.use_threads = None
209 boto_default = signature(boto3.s3.transfer.TransferConfig).parameters["use_threads"].default
210 test_resource_path = self.root_uri.join("test_file.dat")
211 self.assertEqual(test_resource_path._transfer_config.use_threads, boto_default)
213 def test_threading_false(self):
214 with mock.patch.dict(os.environ, {"LSST_S3_USE_THREADS": "False"}):
215 S3ResourcePath.use_threads = None
216 test_resource_path = self.root_uri.join("test_file.dat")
217 self.assertFalse(test_resource_path._transfer_config.use_threads)
219 self.test_local()
222if __name__ == "__main__":
223 unittest.main()