Coverage for tests/test_s3.py: 24%
141 statements
« prev ^ index » next coverage.py v7.4.1, created at 2024-02-01 11:14 +0000
« prev ^ index » next coverage.py v7.4.1, created at 2024-02-01 11:14 +0000
1# This file is part of lsst-resources.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# Use of this source code is governed by a 3-clause BSD-style
10# license that can be found in the LICENSE file.
12import os
13import time
14import unittest
15from inspect import signature
16from unittest import mock
17from urllib.parse import parse_qs, urlparse
19from lsst.resources import ResourcePath
20from lsst.resources.s3 import S3ResourcePath
21from lsst.resources.s3utils import clean_test_environment_for_s3
22from lsst.resources.tests import GenericReadWriteTestCase, GenericTestCase
24try:
25 import boto3
26 import botocore
28 try:
29 from moto import mock_aws # v5
30 except ImportError:
31 from moto import mock_s3 as mock_aws
32except ImportError:
33 boto3 = None
35 def mock_aws(cls):
36 """No-op decorator in case moto mock_aws can not be imported."""
37 return cls
40class GenericS3TestCase(GenericTestCase, unittest.TestCase):
41 """Generic tests of S3 URIs."""
43 scheme = "s3"
44 netloc = "my_bucket"
47@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!")
48class S3ReadWriteTestCase(GenericReadWriteTestCase, unittest.TestCase):
49 """Tests of reading and writing S3 URIs."""
51 scheme = "s3"
52 netloc = "my_2nd_bucket"
54 mock_aws = mock_aws()
55 """The mocked s3 interface from moto."""
57 def setUp(self):
58 self.enterContext(clean_test_environment_for_s3())
59 # Enable S3 mocking of tests.
60 self.mock_aws.start()
62 # MOTO needs to know that we expect Bucket bucketname to exist
63 s3 = boto3.resource("s3")
64 s3.create_bucket(Bucket=self.netloc)
66 super().setUp()
68 def tearDown(self):
69 s3 = boto3.resource("s3")
70 bucket = s3.Bucket(self.netloc)
71 try:
72 bucket.objects.all().delete()
73 except botocore.exceptions.ClientError as e:
74 if e.response["Error"]["Code"] == "404":
75 # the key was not reachable - pass
76 pass
77 else:
78 raise
80 bucket = s3.Bucket(self.netloc)
81 bucket.delete()
83 # Stop the S3 mock.
84 self.mock_aws.stop()
86 S3ResourcePath.use_threads = None
88 super().tearDown()
90 def test_bucket_fail(self):
91 # Deliberately create URI with unknown bucket.
92 uri = ResourcePath("s3://badbucket/something/")
94 with self.assertRaises(ValueError):
95 uri.mkdir()
97 with self.assertRaises(FileNotFoundError):
98 uri.remove()
100 def test_transfer_progress(self):
101 """Test progress bar reporting for upload and download."""
102 remote = self.root_uri.join("test.dat")
103 remote.write(b"42")
104 with ResourcePath.temporary_uri(suffix=".dat") as tmp:
105 # Download from S3.
106 with self.assertLogs("lsst.resources", level="DEBUG") as cm:
107 tmp.transfer_from(remote, transfer="auto")
108 self.assertRegex("".join(cm.output), r"test\.dat.*100\%")
110 # Upload to S3.
111 with self.assertLogs("lsst.resources", level="DEBUG") as cm:
112 remote.transfer_from(tmp, transfer="auto", overwrite=True)
113 self.assertRegex("".join(cm.output), rf"{tmp.basename()}.*100\%")
115 def test_handle(self):
116 remote = self.root_uri.join("test_handle.dat")
117 with remote.open("wb") as handle:
118 self.assertTrue(handle.writable())
119 # write 6 megabytes to make sure partial write work
120 handle.write(6 * 1024 * 1024 * b"a")
121 self.assertEqual(handle.tell(), 6 * 1024 * 1024)
122 handle.flush()
123 self.assertGreaterEqual(len(handle._multiPartUpload), 1)
125 # verify file can't be seeked back
126 with self.assertRaises(OSError):
127 handle.seek(0)
129 # write more bytes
130 handle.write(1024 * b"c")
132 # seek back and overwrite
133 handle.seek(6 * 1024 * 1024)
134 handle.write(1024 * b"b")
136 with remote.open("rb") as handle:
137 self.assertTrue(handle.readable())
138 # read the first 6 megabytes
139 result = handle.read(6 * 1024 * 1024)
140 self.assertEqual(result, 6 * 1024 * 1024 * b"a")
141 self.assertEqual(handle.tell(), 6 * 1024 * 1024)
142 # verify additional read gets the next part
143 result = handle.read(1024)
144 self.assertEqual(result, 1024 * b"b")
145 # see back to the beginning to verify seeking
146 handle.seek(0)
147 result = handle.read(1024)
148 self.assertEqual(result, 1024 * b"a")
150 def test_url_signing(self):
151 self._test_url_signing_case("url-signing-test.txt", b"test123")
152 # A zero byte presigned S3 HTTP URL is a weird edge case, because we
153 # emulate HEAD requests using a 1-byte GET.
154 self._test_url_signing_case("url-signing-test-zero-bytes.txt", b"")
155 # Should be the same as a normal case, but check it for paranoia since
156 # it's on the boundary of the read size.
157 self._test_url_signing_case("url-signing-test-one-byte.txt", b"t")
159 def _test_url_signing_case(self, filename: str, test_data: bytes):
160 s3_path = self.root_uri.join(filename)
162 put_url = s3_path.generate_presigned_put_url(expiration_time_seconds=1800)
163 self._check_presigned_url(put_url, 1800)
164 get_url = s3_path.generate_presigned_get_url(expiration_time_seconds=3600)
165 self._check_presigned_url(get_url, 3600)
167 # Moto monkeypatches the 'requests' library to mock access to presigned
168 # URLs, so we are able to use HttpResourcePath to access the URLs in
169 # this test.
170 ResourcePath(put_url).write(test_data)
171 get_path = ResourcePath(get_url)
172 retrieved = get_path.read()
173 self.assertEqual(retrieved, test_data)
174 self.assertTrue(get_path.exists())
175 self.assertEqual(get_path.size(), len(test_data))
177 def test_nonexistent_presigned_url(self):
178 s3_path = self.root_uri.join("this-is-a-missing-file.txt")
179 get_url = s3_path.generate_presigned_get_url(expiration_time_seconds=3600)
180 get_path = ResourcePath(get_url)
181 # Check the HttpResourcePath implementation for presigned S3 urls.
182 # Nothing has been uploaded to this URL, so it shouldn't exist.
183 self.assertFalse(get_path.exists())
184 with self.assertRaises(FileNotFoundError):
185 get_path.size()
187 def _check_presigned_url(self, url: str, expiration_time_seconds: int):
188 parsed = urlparse(url)
189 self.assertEqual(parsed.scheme, "https")
191 actual_expiration_timestamp = int(parse_qs(parsed.query)["Expires"][0])
192 current_time = int(time.time())
193 expected_expiration_timestamp = current_time + expiration_time_seconds
194 # Allow some flex in the expiration time in case this test process goes
195 # out to lunch for a while on a busy CI machine
196 self.assertLessEqual(abs(expected_expiration_timestamp - actual_expiration_timestamp), 120)
198 def test_threading_true(self):
199 with mock.patch.dict(os.environ, {"LSST_S3_USE_THREADS": "True"}):
200 S3ResourcePath.use_threads = None
201 test_resource_path = self.root_uri.join("test_file.dat")
202 self.assertTrue(test_resource_path._transfer_config.use_threads)
204 def test_implicit_default_threading(self):
205 S3ResourcePath.use_threads = None
206 boto_default = signature(boto3.s3.transfer.TransferConfig).parameters["use_threads"].default
207 test_resource_path = self.root_uri.join("test_file.dat")
208 self.assertEqual(test_resource_path._transfer_config.use_threads, boto_default)
210 def test_explicit_default_threading(self):
211 with mock.patch.dict(os.environ, {"LSST_S3_USE_THREADS": "None"}):
212 S3ResourcePath.use_threads = None
213 boto_default = signature(boto3.s3.transfer.TransferConfig).parameters["use_threads"].default
214 test_resource_path = self.root_uri.join("test_file.dat")
215 self.assertEqual(test_resource_path._transfer_config.use_threads, boto_default)
217 def test_threading_false(self):
218 with mock.patch.dict(os.environ, {"LSST_S3_USE_THREADS": "False"}):
219 S3ResourcePath.use_threads = None
220 test_resource_path = self.root_uri.join("test_file.dat")
221 self.assertFalse(test_resource_path._transfer_config.use_threads)
223 self.test_local()
226if __name__ == "__main__":
227 unittest.main()