Coverage for tests/test_s3.py: 26%
173 statements
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-16 02:51 -0700
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-16 02:51 -0700
1# This file is part of lsst-resources.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# Use of this source code is governed by a 3-clause BSD-style
10# license that can be found in the LICENSE file.
12import os
13import time
14import unittest
15from inspect import signature
16from unittest import mock
17from urllib.parse import parse_qs, urlparse
19from lsst.resources import ResourcePath
20from lsst.resources.s3 import S3ResourcePath
21from lsst.resources.s3utils import clean_test_environment_for_s3
22from lsst.resources.tests import GenericReadWriteTestCase, GenericTestCase
24try:
25 import boto3
26 import botocore
28 try:
29 from moto import mock_aws # v5
30 except ImportError:
31 from moto import mock_s3 as mock_aws
32except ImportError:
33 boto3 = None
35 def mock_aws(cls):
36 """No-op decorator in case moto mock_aws can not be imported."""
37 return cls
40class GenericS3TestCase(GenericTestCase, unittest.TestCase):
41 """Generic tests of S3 URIs."""
43 scheme = "s3"
44 netloc = "my_bucket"
47class S3ReadWriteTestCaseBase(GenericReadWriteTestCase):
48 """Tests of reading and writing S3 URIs."""
50 scheme = "s3"
51 s3_endpoint_url: str | None = None
53 def setUp(self):
54 self.enterContext(clean_test_environment_for_s3())
56 # Enable S3 mocking of tests.
57 self.enterContext(mock_aws())
59 # MOTO needs to know that we expect Bucket bucketname to exist
60 s3 = boto3.resource("s3", endpoint_url=self.s3_endpoint_url)
61 s3.create_bucket(Bucket=self.bucket)
63 super().setUp()
65 def tearDown(self):
66 s3 = boto3.resource("s3")
67 bucket = s3.Bucket(self.bucket)
68 try:
69 bucket.objects.all().delete()
70 except botocore.exceptions.ClientError as e:
71 if e.response["Error"]["Code"] == "404":
72 # the key was not reachable - pass
73 pass
74 else:
75 raise
77 bucket = s3.Bucket(self.bucket)
78 bucket.delete()
80 S3ResourcePath.use_threads = None
82 super().tearDown()
84 def test_bucket_fail(self):
85 # Deliberately create URI with unknown bucket.
86 uri = ResourcePath("s3://badbucket/something/")
88 with self.assertRaises(ValueError):
89 uri.mkdir()
91 with self.assertRaises(FileNotFoundError):
92 uri.remove()
94 def test_transfer_progress(self):
95 """Test progress bar reporting for upload and download."""
96 remote = self.root_uri.join("test.dat")
97 remote.write(b"42")
98 with ResourcePath.temporary_uri(suffix=".dat") as tmp:
99 # Download from S3.
100 with self.assertLogs("lsst.resources", level="DEBUG") as cm:
101 tmp.transfer_from(remote, transfer="auto")
102 self.assertRegex("".join(cm.output), r"test\.dat.*100\%")
104 # Upload to S3.
105 with self.assertLogs("lsst.resources", level="DEBUG") as cm:
106 remote.transfer_from(tmp, transfer="auto", overwrite=True)
107 self.assertRegex("".join(cm.output), rf"{tmp.basename()}.*100\%")
109 def test_handle(self):
110 remote = self.root_uri.join("test_handle.dat")
111 with remote.open("wb") as handle:
112 self.assertTrue(handle.writable())
113 # write 6 megabytes to make sure partial write work
114 handle.write(6 * 1024 * 1024 * b"a")
115 self.assertEqual(handle.tell(), 6 * 1024 * 1024)
116 handle.flush()
117 self.assertGreaterEqual(len(handle._multiPartUpload), 1)
119 # verify file can't be seeked back
120 with self.assertRaises(OSError):
121 handle.seek(0)
123 # write more bytes
124 handle.write(1024 * b"c")
126 # seek back and overwrite
127 handle.seek(6 * 1024 * 1024)
128 handle.write(1024 * b"b")
130 with remote.open("rb") as handle:
131 self.assertTrue(handle.readable())
132 # read the first 6 megabytes
133 result = handle.read(6 * 1024 * 1024)
134 self.assertEqual(result, 6 * 1024 * 1024 * b"a")
135 self.assertEqual(handle.tell(), 6 * 1024 * 1024)
136 # verify additional read gets the next part
137 result = handle.read(1024)
138 self.assertEqual(result, 1024 * b"b")
139 # see back to the beginning to verify seeking
140 handle.seek(0)
141 result = handle.read(1024)
142 self.assertEqual(result, 1024 * b"a")
144 def test_url_signing(self):
145 self._test_url_signing_case("url-signing-test.txt", b"test123")
146 # A zero byte presigned S3 HTTP URL is a weird edge case, because we
147 # emulate HEAD requests using a 1-byte GET.
148 self._test_url_signing_case("url-signing-test-zero-bytes.txt", b"")
149 # Should be the same as a normal case, but check it for paranoia since
150 # it's on the boundary of the read size.
151 self._test_url_signing_case("url-signing-test-one-byte.txt", b"t")
153 def _test_url_signing_case(self, filename: str, test_data: bytes):
154 s3_path = self.root_uri.join(filename)
156 put_url = s3_path.generate_presigned_put_url(expiration_time_seconds=1800)
157 self._check_presigned_url(put_url, 1800)
158 get_url = s3_path.generate_presigned_get_url(expiration_time_seconds=3600)
159 self._check_presigned_url(get_url, 3600)
161 # Moto monkeypatches the 'requests' library to mock access to presigned
162 # URLs, so we are able to use HttpResourcePath to access the URLs in
163 # this test.
164 ResourcePath(put_url).write(test_data)
165 get_path = ResourcePath(get_url)
166 retrieved = get_path.read()
167 self.assertEqual(retrieved, test_data)
168 self.assertTrue(get_path.exists())
169 self.assertEqual(get_path.size(), len(test_data))
171 def test_nonexistent_presigned_url(self):
172 s3_path = self.root_uri.join("this-is-a-missing-file.txt")
173 get_url = s3_path.generate_presigned_get_url(expiration_time_seconds=3600)
174 get_path = ResourcePath(get_url)
175 # Check the HttpResourcePath implementation for presigned S3 urls.
176 # Nothing has been uploaded to this URL, so it shouldn't exist.
177 self.assertFalse(get_path.exists())
178 with self.assertRaises(FileNotFoundError):
179 get_path.size()
181 def _check_presigned_url(self, url: str, expiration_time_seconds: int):
182 parsed = urlparse(url)
183 self.assertEqual(parsed.scheme, "https")
185 actual_expiration_timestamp = int(parse_qs(parsed.query)["Expires"][0])
186 current_time = int(time.time())
187 expected_expiration_timestamp = current_time + expiration_time_seconds
188 # Allow some flex in the expiration time in case this test process goes
189 # out to lunch for a while on a busy CI machine
190 self.assertLessEqual(abs(expected_expiration_timestamp - actual_expiration_timestamp), 120)
192 def test_threading_true(self):
193 with mock.patch.dict(os.environ, {"LSST_S3_USE_THREADS": "True"}):
194 S3ResourcePath.use_threads = None
195 test_resource_path = self.root_uri.join("test_file.dat")
196 self.assertTrue(test_resource_path._transfer_config.use_threads)
198 def test_implicit_default_threading(self):
199 S3ResourcePath.use_threads = None
200 boto_default = signature(boto3.s3.transfer.TransferConfig).parameters["use_threads"].default
201 test_resource_path = self.root_uri.join("test_file.dat")
202 self.assertEqual(test_resource_path._transfer_config.use_threads, boto_default)
204 def test_explicit_default_threading(self):
205 with mock.patch.dict(os.environ, {"LSST_S3_USE_THREADS": "None"}):
206 S3ResourcePath.use_threads = None
207 boto_default = signature(boto3.s3.transfer.TransferConfig).parameters["use_threads"].default
208 test_resource_path = self.root_uri.join("test_file.dat")
209 self.assertEqual(test_resource_path._transfer_config.use_threads, boto_default)
211 def test_threading_false(self):
212 with mock.patch.dict(os.environ, {"LSST_S3_USE_THREADS": "False"}):
213 S3ResourcePath.use_threads = None
214 test_resource_path = self.root_uri.join("test_file.dat")
215 self.assertFalse(test_resource_path._transfer_config.use_threads)
217 self.test_local()
220@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!")
221class S3ReadWriteTestCase(S3ReadWriteTestCaseBase, unittest.TestCase):
222 """Test S3 with no explicit profile/endpoint specified.
223 (``s3://bucketname/...``).
224 """
226 bucket = "my_2nd_bucket"
227 netloc = bucket
230@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!")
231class S3WithProfileReadWriteTestCase(S3ReadWriteTestCaseBase, unittest.TestCase):
232 """Test S3 URLs with explicit profile specified.
233 (``s3://profile@bucketname/...``).
234 """
236 bucket = "3rd_bucket"
237 netloc = f"myprofile@{bucket}"
238 s3_endpoint_url = "https://endpoint1.test.example"
240 def setUp(self):
241 # Configure custom S3 endpoints that we can target from tests using
242 # non-default profile.
243 self.enterContext(
244 mock.patch.dict(
245 os.environ,
246 {
247 "MOTO_S3_CUSTOM_ENDPOINTS": self.s3_endpoint_url,
248 "LSST_RESOURCES_S3_PROFILE_myprofile": "https://access_key:security_key@endpoint1.test.example",
249 },
250 )
251 )
253 super().setUp()
255 def test_missing_profile(self):
256 with self.assertRaises(botocore.exceptions.ProfileNotFound):
257 ResourcePath("s3://otherprofile@bucket").read()
259 def test_s3_endpoint_url(self):
260 with mock.patch.dict(
261 os.environ,
262 {"S3_ENDPOINT_URL": self.s3_endpoint_url},
263 ):
264 path = ResourcePath(f"s3://{self.bucket}/test-s3-endpoint-url.txt")
265 data = b"123"
266 path.write(data)
267 self.assertEqual(path.read(), data)
268 self.assertIn(
269 "https://endpoint1.test.example",
270 path.generate_presigned_get_url(expiration_time_seconds=3600),
271 )
273 def test_uri_syntax(self):
274 path1 = ResourcePath("s3://profile@bucket/path")
275 self.assertEqual(path1._bucket, "bucket")
276 self.assertEqual(path1._profile, "profile")
277 path2 = ResourcePath("s3://bucket2/path")
278 self.assertEqual(path2._bucket, "bucket2")
279 self.assertIsNone(path2._profile)
281 def test_ceph_uri_syntax(self):
282 # The Ceph S3 'multi-tenant' syntax for buckets can include colons.
283 path1 = ResourcePath("s3://profile@ceph:bucket/path")
284 self.assertEqual(path1._bucket, "ceph:bucket")
285 self.assertEqual(path1._profile, "profile")
286 path2 = ResourcePath("s3://ceph:bucket2/path")
287 self.assertEqual(path2._bucket, "ceph:bucket2")
288 self.assertIsNone(path2._profile)
291if __name__ == "__main__":
292 unittest.main()