Coverage for tests/test_uri.py: 12%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of lsst-resources.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# Use of this source code is governed by a 3-clause BSD-style
10# license that can be found in the LICENSE file.
12import glob
13import os
14import pathlib
15import shutil
16import unittest
17import urllib.parse
18import uuid
20import responses
22try:
23 import boto3
24 import botocore
25 from moto import mock_s3
26except ImportError:
27 boto3 = None
29 def mock_s3(cls):
30 """A no-op decorator in case moto mock_s3 can not be imported."""
31 return cls
34from lsst.resources import ResourcePath
35from lsst.resources.s3utils import setAwsEnvCredentials, unsetAwsEnvCredentials
36from lsst.resources.utils import makeTestTempDir, removeTestTempDir
38TESTDIR = os.path.abspath(os.path.dirname(__file__))
41def _check_open(test_case, uri, *, mode_suffixes=("", "t", "b"), **kwargs) -> None:
42 """Test an implementation of ButlerURI.open.
44 Parameters
45 ----------
46 test_case : `unittest.TestCase`
47 Test case to use for assertions.
48 uri : `ButlerURI`
49 URI to use for tests. Must point to a writeable location that is not
50 yet occupied by a file. On return, the location may point to a file
51 only if the test fails.
52 mode_suffixes : `Iterable` of `str`
53 Suffixes to pass as part of the ``mode`` argument to `ButlerURI.open`,
54 indicating whether to open as binary or as text; the only permitted
55 elements are ``""``, ``"t"``, and ``""b""`.
56 **kwargs
57 Additional keyword arguments to forward to all calls to `open`.
58 """
59 text_content = "wxyz🙂"
60 bytes_content = uuid.uuid4().bytes
61 content_by_mode_suffix = {
62 "": text_content,
63 "t": text_content,
64 "b": bytes_content,
65 }
66 empty_content_by_mode_suffix = {
67 "": "",
68 "t": "",
69 "b": b"",
70 }
71 for mode_suffix in mode_suffixes:
72 content = content_by_mode_suffix[mode_suffix]
73 # Create file with mode='x', which prohibits overwriting.
74 with uri.open("x" + mode_suffix, **kwargs) as write_buffer:
75 write_buffer.write(content)
76 test_case.assertTrue(uri.exists())
77 # Check that opening with 'x' now raises, and does not modify content.
78 with test_case.assertRaises(FileExistsError):
79 with uri.open("x" + mode_suffix, **kwargs) as write_buffer:
80 write_buffer.write("bad")
81 # Read the file we created and check the contents.
82 with uri.open("r" + mode_suffix, **kwargs) as read_buffer:
83 test_case.assertEqual(read_buffer.read(), content)
84 # Write two copies of the content, overwriting the single copy there.
85 with uri.open("w" + mode_suffix, **kwargs) as write_buffer:
86 write_buffer.write(content + content)
87 # Read again, this time use mode='r+', which reads what is there and
88 # then lets us write more; we'll use that to reset the file to one
89 # copy of the content.
90 with uri.open("r+" + mode_suffix, **kwargs) as rw_buffer:
91 test_case.assertEqual(rw_buffer.read(), content + content)
92 rw_buffer.seek(0)
93 rw_buffer.truncate()
94 rw_buffer.write(content)
95 rw_buffer.seek(0)
96 test_case.assertEqual(rw_buffer.read(), content)
97 with uri.open("r" + mode_suffix, **kwargs) as read_buffer:
98 test_case.assertEqual(read_buffer.read(), content)
99 # Append some more content to the file; should now have two copies.
100 with uri.open("a" + mode_suffix, **kwargs) as append_buffer:
101 append_buffer.write(content)
102 with uri.open("r" + mode_suffix, **kwargs) as read_buffer:
103 test_case.assertEqual(read_buffer.read(), content + content)
104 # Final mode to check is w+, which does read/write but truncates first.
105 with uri.open("w+" + mode_suffix, **kwargs) as rw_buffer:
106 test_case.assertEqual(rw_buffer.read(), empty_content_by_mode_suffix[mode_suffix])
107 rw_buffer.write(content)
108 rw_buffer.seek(0)
109 test_case.assertEqual(rw_buffer.read(), content)
110 with uri.open("r" + mode_suffix, **kwargs) as read_buffer:
111 test_case.assertEqual(read_buffer.read(), content)
112 # Remove file to make room for the next loop of tests with this URI.
113 uri.remove()
116class FileURITestCase(unittest.TestCase):
117 """Concrete tests for local files."""
119 def setUp(self):
120 # Use a local tempdir because on macOS the temp dirs use symlinks
121 # so relsymlink gets quite confused.
122 self.tmpdir = makeTestTempDir(TESTDIR)
124 def tearDown(self):
125 removeTestTempDir(self.tmpdir)
127 def testFile(self):
128 file = os.path.join(self.tmpdir, "test.txt")
129 uri = ResourcePath(file)
130 self.assertFalse(uri.exists(), f"{uri} should not exist")
131 self.assertEqual(uri.ospath, file)
133 path = pathlib.Path(file)
134 uri = ResourcePath(path)
135 self.assertEqual(uri.ospath, file)
137 content = "abcdefghijklmnopqrstuv\n"
138 uri.write(content.encode())
139 self.assertTrue(os.path.exists(file), "File should exist locally")
140 self.assertTrue(uri.exists(), f"{uri} should now exist")
141 self.assertEqual(uri.read().decode(), content)
142 self.assertEqual(uri.size(), len(content.encode()))
144 with self.assertRaises(FileNotFoundError):
145 ResourcePath("file/not/there.txt").size()
147 # Check that creating a URI from a URI returns the same thing
148 uri2 = ResourcePath(uri)
149 self.assertEqual(uri, uri2)
150 self.assertEqual(id(uri), id(uri2))
152 with self.assertRaises(ValueError):
153 # Scheme-less URIs are not allowed to support non-file roots
154 # at the present time. This may change in the future to become
155 # equivalent to ResourcePath.join()
156 ResourcePath("a/b.txt", root=ResourcePath("s3://bucket/a/b/"))
158 def testExtension(self):
159 file = ResourcePath(os.path.join(self.tmpdir, "test.txt"))
160 self.assertEqual(file.updatedExtension(None), file)
161 self.assertEqual(file.updatedExtension(".txt"), file)
162 self.assertEqual(id(file.updatedExtension(".txt")), id(file))
164 fits = file.updatedExtension(".fits.gz")
165 self.assertEqual(fits.basename(), "test.fits.gz")
166 self.assertEqual(fits.updatedExtension(".jpeg").basename(), "test.jpeg")
168 def testRelative(self):
169 """Check that we can get subpaths back from two URIs"""
170 parent = ResourcePath(self.tmpdir, forceDirectory=True, forceAbsolute=True)
171 self.assertTrue(parent.isdir())
172 child = ResourcePath(os.path.join(self.tmpdir, "dir1", "file.txt"), forceAbsolute=True)
174 self.assertEqual(child.relative_to(parent), "dir1/file.txt")
176 not_child = ResourcePath("/a/b/dir1/file.txt")
177 self.assertIsNone(not_child.relative_to(parent))
178 self.assertFalse(not_child.isdir())
180 not_directory = ResourcePath(os.path.join(self.tmpdir, "dir1", "file2.txt"))
181 self.assertIsNone(child.relative_to(not_directory))
183 # Relative URIs
184 parent = ResourcePath("a/b/", forceAbsolute=False)
185 child = ResourcePath("a/b/c/d.txt", forceAbsolute=False)
186 self.assertFalse(child.scheme)
187 self.assertEqual(child.relative_to(parent), "c/d.txt")
189 # forceAbsolute=True should work even on an existing ResourcePath
190 self.assertTrue(pathlib.Path(ResourcePath(child, forceAbsolute=True).ospath).is_absolute())
192 # File URI and schemeless URI
193 parent = ResourcePath("file:/a/b/c/")
194 child = ResourcePath("e/f/g.txt", forceAbsolute=False)
196 # If the child is relative and the parent is absolute we assume
197 # that the child is a child of the parent unless it uses ".."
198 self.assertEqual(child.relative_to(parent), "e/f/g.txt")
200 child = ResourcePath("../e/f/g.txt", forceAbsolute=False)
201 self.assertIsNone(child.relative_to(parent))
203 child = ResourcePath("../c/e/f/g.txt", forceAbsolute=False)
204 self.assertEqual(child.relative_to(parent), "e/f/g.txt")
206 # Test non-file root with relative path.
207 child = ResourcePath("e/f/g.txt", forceAbsolute=False)
208 parent = ResourcePath("s3://hello/a/b/c/")
209 self.assertEqual(child.relative_to(parent), "e/f/g.txt")
211 # Test with different netloc
212 child = ResourcePath("http://my.host/a/b/c.txt")
213 parent = ResourcePath("http://other.host/a/")
214 self.assertIsNone(child.relative_to(parent), f"{child}.relative_to({parent})")
216 # Schemeless absolute child.
217 # Schemeless absolute URI is constructed using root= parameter.
218 parent = ResourcePath("file:///a/b/c/")
219 child = ResourcePath("d/e.txt", root=parent)
220 self.assertEqual(child.relative_to(parent), "d/e.txt", f"{child}.relative_to({parent})")
222 parent = ResourcePath("c/", root="/a/b/")
223 self.assertEqual(child.relative_to(parent), "d/e.txt", f"{child}.relative_to({parent})")
225 # Absolute schemeless child with relative parent will always fail.
226 parent = ResourcePath("d/e.txt", forceAbsolute=False)
227 self.assertIsNone(child.relative_to(parent), f"{child}.relative_to({parent})")
229 def testParents(self):
230 """Test of splitting and parent walking."""
231 parent = ResourcePath(self.tmpdir, forceDirectory=True, forceAbsolute=True)
232 child_file = parent.join("subdir/file.txt")
233 self.assertFalse(child_file.isdir())
234 child_subdir, file = child_file.split()
235 self.assertEqual(file, "file.txt")
236 self.assertTrue(child_subdir.isdir())
237 self.assertEqual(child_file.dirname(), child_subdir)
238 self.assertEqual(child_file.basename(), file)
239 self.assertEqual(child_file.parent(), child_subdir)
240 derived_parent = child_subdir.parent()
241 self.assertEqual(derived_parent, parent)
242 self.assertTrue(derived_parent.isdir())
243 self.assertEqual(child_file.parent().parent(), parent)
245 def testEnvVar(self):
246 """Test that environment variables are expanded."""
248 with unittest.mock.patch.dict(os.environ, {"MY_TEST_DIR": "/a/b/c"}):
249 uri = ResourcePath("${MY_TEST_DIR}/d.txt")
250 self.assertEqual(uri.path, "/a/b/c/d.txt")
251 self.assertEqual(uri.scheme, "file")
253 # This will not expand
254 uri = ResourcePath("${MY_TEST_DIR}/d.txt", forceAbsolute=False)
255 self.assertEqual(uri.path, "${MY_TEST_DIR}/d.txt")
256 self.assertFalse(uri.scheme)
258 def testMkdir(self):
259 tmpdir = ResourcePath(self.tmpdir)
260 newdir = tmpdir.join("newdir/seconddir")
261 newdir.mkdir()
262 self.assertTrue(newdir.exists())
263 newfile = newdir.join("temp.txt")
264 newfile.write("Data".encode())
265 self.assertTrue(newfile.exists())
267 def testTransfer(self):
268 src = ResourcePath(os.path.join(self.tmpdir, "test.txt"))
269 content = "Content is some content\nwith something to say\n\n"
270 src.write(content.encode())
272 for mode in ("copy", "link", "hardlink", "symlink", "relsymlink"):
273 dest = ResourcePath(os.path.join(self.tmpdir, f"dest_{mode}.txt"))
274 dest.transfer_from(src, transfer=mode)
275 self.assertTrue(dest.exists(), f"Check that {dest} exists (transfer={mode})")
277 with open(dest.ospath, "r") as fh:
278 new_content = fh.read()
279 self.assertEqual(new_content, content)
281 if mode in ("symlink", "relsymlink"):
282 self.assertTrue(os.path.islink(dest.ospath), f"Check that {dest} is symlink")
284 # If the source and destination are hardlinks of each other
285 # the transfer should work even if overwrite=False.
286 if mode in ("link", "hardlink"):
287 dest.transfer_from(src, transfer=mode)
288 else:
289 with self.assertRaises(
290 FileExistsError, msg=f"Overwrite of {dest} should not be allowed ({mode})"
291 ):
292 dest.transfer_from(src, transfer=mode)
294 dest.transfer_from(src, transfer=mode, overwrite=True)
296 os.remove(dest.ospath)
298 b = src.read()
299 self.assertEqual(b.decode(), new_content)
301 nbytes = 10
302 subset = src.read(size=nbytes)
303 self.assertEqual(len(subset), nbytes)
304 self.assertEqual(subset.decode(), content[:nbytes])
306 with self.assertRaises(ValueError):
307 src.transfer_from(src, transfer="unknown")
309 def testTransferIdentical(self):
310 """Test overwrite of identical files."""
311 dir1 = ResourcePath(os.path.join(self.tmpdir, "dir1"), forceDirectory=True)
312 dir1.mkdir()
313 dir2 = os.path.join(self.tmpdir, "dir2")
314 os.symlink(dir1.ospath, dir2)
316 # Write a test file.
317 src_file = dir1.join("test.txt")
318 content = "0123456"
319 src_file.write(content.encode())
321 # Construct URI to destination that should be identical.
322 dest_file = ResourcePath(os.path.join(dir2), forceDirectory=True).join("test.txt")
323 self.assertTrue(dest_file.exists())
324 self.assertNotEqual(src_file, dest_file)
326 # Transfer it over itself.
327 dest_file.transfer_from(src_file, transfer="symlink", overwrite=True)
328 new_content = dest_file.read().decode()
329 self.assertEqual(content, new_content)
331 def testResource(self):
332 # No resources in this package so need a resource in the main
333 # python distribution.
334 u = ResourcePath("resource://idlelib/Icons/README.txt")
335 self.assertTrue(u.exists(), f"Check {u} exists")
337 content = u.read().decode()
338 self.assertIn("IDLE", content)
340 truncated = u.read(size=9).decode()
341 self.assertEqual(truncated, content[:9])
343 d = ResourcePath("resource://idlelib/Icons", forceDirectory=True)
344 self.assertTrue(u.exists(), f"Check directory {d} exists")
346 j = d.join("README.txt")
347 self.assertEqual(u, j)
348 self.assertFalse(j.dirLike)
349 self.assertFalse(j.isdir())
350 not_there = d.join("not-there.yaml")
351 self.assertFalse(not_there.exists())
353 bad = ResourcePath("resource://bad.module/not.yaml")
354 multi = ResourcePath.mexists([u, bad, not_there])
355 self.assertTrue(multi[u])
356 self.assertFalse(multi[bad])
357 self.assertFalse(multi[not_there])
359 def testEscapes(self):
360 """Special characters in file paths"""
361 src = ResourcePath("bbb/???/test.txt", root=self.tmpdir, forceAbsolute=True)
362 self.assertFalse(src.scheme)
363 src.write(b"Some content")
364 self.assertTrue(src.exists())
366 # abspath always returns a file scheme
367 file = src.abspath()
368 self.assertTrue(file.exists())
369 self.assertIn("???", file.ospath)
370 self.assertNotIn("???", file.path)
372 file = file.updatedFile("tests??.txt")
373 self.assertNotIn("??.txt", file.path)
374 file.write(b"Other content")
375 self.assertEqual(file.read(), b"Other content")
377 src = src.updatedFile("tests??.txt")
378 self.assertIn("??.txt", src.path)
379 self.assertEqual(file.read(), src.read(), f"reading from {file.ospath} and {src.ospath}")
381 # File URI and schemeless URI
382 parent = ResourcePath("file:" + urllib.parse.quote("/a/b/c/de/??/"))
383 child = ResourcePath("e/f/g.txt", forceAbsolute=False)
384 self.assertEqual(child.relative_to(parent), "e/f/g.txt")
386 child = ResourcePath("e/f??#/g.txt", forceAbsolute=False)
387 self.assertEqual(child.relative_to(parent), "e/f??#/g.txt")
389 child = ResourcePath("file:" + urllib.parse.quote("/a/b/c/de/??/e/f??#/g.txt"))
390 self.assertEqual(child.relative_to(parent), "e/f??#/g.txt")
392 self.assertEqual(child.relativeToPathRoot, "a/b/c/de/??/e/f??#/g.txt")
394 # Schemeless so should not quote
395 dir = ResourcePath("bbb/???/", root=self.tmpdir, forceAbsolute=True, forceDirectory=True)
396 self.assertIn("???", dir.ospath)
397 self.assertIn("???", dir.path)
398 self.assertFalse(dir.scheme)
400 # dir.join() morphs into a file scheme
401 new = dir.join("test_j.txt")
402 self.assertIn("???", new.ospath, f"Checking {new}")
403 new.write(b"Content")
405 new2name = "###/test??.txt"
406 new2 = dir.join(new2name)
407 self.assertIn("???", new2.ospath)
408 new2.write(b"Content")
409 self.assertTrue(new2.ospath.endswith(new2name))
410 self.assertEqual(new.read(), new2.read())
412 fdir = dir.abspath()
413 self.assertNotIn("???", fdir.path)
414 self.assertIn("???", fdir.ospath)
415 self.assertEqual(fdir.scheme, "file")
416 fnew = dir.join("test_jf.txt")
417 fnew.write(b"Content")
419 fnew2 = fdir.join(new2name)
420 fnew2.write(b"Content")
421 self.assertTrue(fnew2.ospath.endswith(new2name))
422 self.assertNotIn("###", fnew2.path)
424 self.assertEqual(fnew.read(), fnew2.read())
426 # Test that children relative to schemeless and file schemes
427 # still return the same unquoted name
428 self.assertEqual(fnew2.relative_to(fdir), new2name, f"{fnew2}.relative_to({fdir})")
429 self.assertEqual(fnew2.relative_to(dir), new2name, f"{fnew2}.relative_to({dir})")
430 self.assertEqual(new2.relative_to(fdir), new2name, f"{new2}.relative_to({fdir})")
431 self.assertEqual(new2.relative_to(dir), new2name, f"{new2}.relative_to({dir})")
433 # Check for double quoting
434 plus_path = "/a/b/c+d/"
435 with self.assertLogs(level="WARNING"):
436 uri = ResourcePath(urllib.parse.quote(plus_path), forceDirectory=True)
437 self.assertEqual(uri.ospath, plus_path)
439 # Check that # is not escaped for schemeless URIs
440 hash_path = "/a/b#/c&d#xyz"
441 hpos = hash_path.rfind("#")
442 uri = ResourcePath(hash_path)
443 self.assertEqual(uri.ospath, hash_path[:hpos])
444 self.assertEqual(uri.fragment, hash_path[hpos + 1 :])
446 def testHash(self):
447 """Test that we can store URIs in sets and as keys."""
448 uri1 = ResourcePath(TESTDIR)
449 uri2 = uri1.join("test/")
450 s = {uri1, uri2}
451 self.assertIn(uri1, s)
453 d = {uri1: "1", uri2: "2"}
454 self.assertEqual(d[uri2], "2")
456 def testWalk(self):
457 """Test ResourcePath.walk()."""
458 test_dir_uri = ResourcePath(TESTDIR)
460 # Look for a file that is not there
461 file = test_dir_uri.join("config/basic/butler.yaml")
462 found = list(ResourcePath.findFileResources([file]))
463 self.assertEqual(found[0], file)
465 # Compare against the full local paths
466 expected = set(
467 p for p in glob.glob(os.path.join(TESTDIR, "data", "**"), recursive=True) if os.path.isfile(p)
468 )
469 found = set(u.ospath for u in ResourcePath.findFileResources([test_dir_uri.join("data")]))
470 self.assertEqual(found, expected)
472 # Now solely the YAML files
473 expected_yaml = set(glob.glob(os.path.join(TESTDIR, "data", "**", "*.yaml"), recursive=True))
474 found = set(
475 u.ospath
476 for u in ResourcePath.findFileResources([test_dir_uri.join("data")], file_filter=r".*\.yaml$")
477 )
478 self.assertEqual(found, expected_yaml)
480 # Now two explicit directories and a file
481 expected = set(glob.glob(os.path.join(TESTDIR, "data", "dir1", "*.yaml"), recursive=True))
482 expected.update(set(glob.glob(os.path.join(TESTDIR, "data", "dir2", "*.yaml"), recursive=True)))
483 expected.add(file.ospath)
485 found = set(
486 u.ospath
487 for u in ResourcePath.findFileResources(
488 [file, test_dir_uri.join("data/dir1"), test_dir_uri.join("data/dir2")],
489 file_filter=r".*\.yaml$",
490 )
491 )
492 self.assertEqual(found, expected)
494 # Group by directory -- find everything and compare it with what
495 # we expected to be there in total.
496 found_yaml = set()
497 counter = 0
498 for uris in ResourcePath.findFileResources(
499 [file, test_dir_uri.join("data/")], file_filter=r".*\.yaml$", grouped=True
500 ):
501 found = set(u.ospath for u in uris)
502 if found:
503 counter += 1
505 found_yaml.update(found)
507 expected_yaml_2 = expected_yaml
508 expected_yaml_2.add(file.ospath)
509 self.assertEqual(found_yaml, expected_yaml)
510 self.assertEqual(counter, 3)
512 # Grouping but check that single files are returned in a single group
513 # at the end
514 file2 = test_dir_uri.join("config/templates/templates-bad.yaml")
515 found = list(
516 ResourcePath.findFileResources([file, file2, test_dir_uri.join("data/dir2")], grouped=True)
517 )
518 self.assertEqual(len(found), 2)
519 self.assertEqual(list(found[1]), [file, file2])
521 with self.assertRaises(ValueError):
522 list(file.walk())
524 def testRootURI(self):
525 """Test ResourcePath.root_uri()."""
526 uri = ResourcePath("https://www.notexist.com:8080/file/test")
527 uri2 = ResourcePath("s3://www.notexist.com/file/test")
528 self.assertEqual(uri.root_uri().geturl(), "https://www.notexist.com:8080/")
529 self.assertEqual(uri2.root_uri().geturl(), "s3://www.notexist.com/")
531 def testJoin(self):
532 """Test .join method."""
534 root_str = "s3://bucket/hsc/payload/"
535 root = ResourcePath(root_str)
537 self.assertEqual(root.join("b/test.txt").geturl(), f"{root_str}b/test.txt")
538 add_dir = root.join("b/c/d/")
539 self.assertTrue(add_dir.isdir())
540 self.assertEqual(add_dir.geturl(), f"{root_str}b/c/d/")
542 up_relative = root.join("../b/c.txt")
543 self.assertFalse(up_relative.isdir())
544 self.assertEqual(up_relative.geturl(), "s3://bucket/hsc/b/c.txt")
546 quote_example = "b&c.t@x#t"
547 needs_quote = root.join(quote_example)
548 self.assertEqual(needs_quote.unquoted_path, f"/hsc/payload/{quote_example}")
550 other = ResourcePath("file://localhost/test.txt")
551 self.assertEqual(root.join(other), other)
552 self.assertEqual(other.join("b/new.txt").geturl(), "file://localhost/b/new.txt")
554 joined = ResourcePath("s3://bucket/hsc/payload/").join(
555 ResourcePath("test.qgraph", forceAbsolute=False)
556 )
557 self.assertEqual(joined, ResourcePath("s3://bucket/hsc/payload/test.qgraph"))
559 with self.assertRaises(ValueError):
560 ResourcePath("s3://bucket/hsc/payload/").join(ResourcePath("test.qgraph"))
562 def testTemporary(self):
563 with ResourcePath.temporary_uri(suffix=".json") as tmp:
564 self.assertEqual(tmp.getExtension(), ".json", f"uri: {tmp}")
565 self.assertTrue(tmp.isabs(), f"uri: {tmp}")
566 self.assertFalse(tmp.exists(), f"uri: {tmp}")
567 tmp.write(b"abcd")
568 self.assertTrue(tmp.exists(), f"uri: {tmp}")
569 self.assertTrue(tmp.isTemporary)
570 self.assertFalse(tmp.exists(), f"uri: {tmp}")
572 tmpdir = ResourcePath(self.tmpdir, forceDirectory=True)
573 with ResourcePath.temporary_uri(prefix=tmpdir, suffix=".yaml") as tmp:
574 # Use a specified tmpdir and check it is okay for the file
575 # to not be created.
576 self.assertFalse(tmp.exists(), f"uri: {tmp}")
577 self.assertTrue(tmpdir.exists(), f"uri: {tmpdir} still exists")
579 def test_open(self):
580 tmpdir = ResourcePath(self.tmpdir, forceDirectory=True)
581 with ResourcePath.temporary_uri(prefix=tmpdir, suffix=".txt") as tmp:
582 _check_open(self, tmp, mode_suffixes=("", "t"))
583 _check_open(self, tmp, mode_suffixes=("t",), encoding="utf-16")
584 _check_open(self, tmp, mode_suffixes=("t",), prefer_file_temporary=True)
585 _check_open(self, tmp, mode_suffixes=("t",), encoding="utf-16", prefer_file_temporary=True)
586 with ResourcePath.temporary_uri(prefix=tmpdir, suffix=".dat") as tmp:
587 _check_open(self, tmp, mode_suffixes=("b",))
588 _check_open(self, tmp, mode_suffixes=("b"), prefer_file_temporary=True)
591@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!")
592@mock_s3
593class S3URITestCase(unittest.TestCase):
594 """Tests involving S3"""
596 bucketName = "any_bucket"
597 """Bucket name to use in tests"""
599 def setUp(self):
600 # Local test directory
601 self.tmpdir = makeTestTempDir(TESTDIR)
603 # set up some fake credentials if they do not exist
604 self.usingDummyCredentials = setAwsEnvCredentials()
606 # MOTO needs to know that we expect Bucket bucketname to exist
607 s3 = boto3.resource("s3")
608 s3.create_bucket(Bucket=self.bucketName)
610 def tearDown(self):
611 s3 = boto3.resource("s3")
612 bucket = s3.Bucket(self.bucketName)
613 try:
614 bucket.objects.all().delete()
615 except botocore.exceptions.ClientError as e:
616 if e.response["Error"]["Code"] == "404":
617 # the key was not reachable - pass
618 pass
619 else:
620 raise
622 bucket = s3.Bucket(self.bucketName)
623 bucket.delete()
625 # unset any potentially set dummy credentials
626 if self.usingDummyCredentials:
627 unsetAwsEnvCredentials()
629 shutil.rmtree(self.tmpdir, ignore_errors=True)
631 def makeS3Uri(self, path):
632 return f"s3://{self.bucketName}/{path}"
634 def testTransfer(self):
635 src = ResourcePath(os.path.join(self.tmpdir, "test.txt"))
636 content = "Content is some content\nwith something to say\n\n"
637 src.write(content.encode())
638 self.assertTrue(src.exists())
639 self.assertEqual(src.size(), len(content.encode()))
641 dest = ResourcePath(self.makeS3Uri("test.txt"))
642 self.assertFalse(dest.exists())
644 with self.assertRaises(FileNotFoundError):
645 dest.size()
647 dest.transfer_from(src, transfer="copy")
648 self.assertTrue(dest.exists())
650 dest2 = ResourcePath(self.makeS3Uri("copied.txt"))
651 dest2.transfer_from(dest, transfer="copy")
652 self.assertTrue(dest2.exists())
654 local = ResourcePath(os.path.join(self.tmpdir, "copied.txt"))
655 local.transfer_from(dest2, transfer="copy")
656 with open(local.ospath, "r") as fd:
657 new_content = fd.read()
658 self.assertEqual(new_content, content)
660 with self.assertRaises(ValueError):
661 dest2.transfer_from(local, transfer="symlink")
663 b = dest.read()
664 self.assertEqual(b.decode(), new_content)
666 nbytes = 10
667 subset = dest.read(size=nbytes)
668 self.assertEqual(len(subset), nbytes) # Extra byte comes back
669 self.assertEqual(subset.decode(), content[:nbytes])
671 with self.assertRaises(FileExistsError):
672 dest.transfer_from(src, transfer="copy")
674 dest.transfer_from(src, transfer="copy", overwrite=True)
676 def testWalk(self):
677 """Test that we can list an S3 bucket"""
678 # Files we want to create
679 expected = ("a/x.txt", "a/y.txt", "a/z.json", "a/b/w.txt", "a/b/c/d/v.json")
680 expected_uris = [ResourcePath(self.makeS3Uri(path)) for path in expected]
681 for uri in expected_uris:
682 # Doesn't matter what we write
683 uri.write("123".encode())
685 # Find all the files in the a/ tree
686 found = set(uri.path for uri in ResourcePath.findFileResources([ResourcePath(self.makeS3Uri("a/"))]))
687 self.assertEqual(found, {uri.path for uri in expected_uris})
689 # Find all the files in the a/ tree but group by folder
690 found = ResourcePath.findFileResources([ResourcePath(self.makeS3Uri("a/"))], grouped=True)
691 expected = (("/a/x.txt", "/a/y.txt", "/a/z.json"), ("/a/b/w.txt",), ("/a/b/c/d/v.json",))
693 for got, expect in zip(found, expected):
694 self.assertEqual(tuple(u.path for u in got), expect)
696 # Find only JSON files
697 found = set(
698 uri.path
699 for uri in ResourcePath.findFileResources(
700 [ResourcePath(self.makeS3Uri("a/"))], file_filter=r"\.json$"
701 )
702 )
703 self.assertEqual(found, {uri.path for uri in expected_uris if uri.path.endswith(".json")})
705 # JSON files grouped by directory
706 found = ResourcePath.findFileResources(
707 [ResourcePath(self.makeS3Uri("a/"))], file_filter=r"\.json$", grouped=True
708 )
709 expected = (("/a/z.json",), ("/a/b/c/d/v.json",))
711 for got, expect in zip(found, expected):
712 self.assertEqual(tuple(u.path for u in got), expect)
714 # Check pagination works with large numbers of files. S3 API limits
715 # us to 1000 response per list_objects call so create lots of files
716 created = set()
717 counter = 1
718 n_dir1 = 1100
719 while counter <= n_dir1:
720 new = ResourcePath(self.makeS3Uri(f"test/file{counter:04d}.txt"))
721 new.write(f"{counter}".encode())
722 created.add(str(new))
723 counter += 1
724 counter = 1
725 # Put some in a subdirectory to make sure we are looking in a
726 # hierarchy.
727 n_dir2 = 100
728 while counter <= n_dir2:
729 new = ResourcePath(self.makeS3Uri(f"test/subdir/file{counter:04d}.txt"))
730 new.write(f"{counter}".encode())
731 created.add(str(new))
732 counter += 1
734 found = ResourcePath.findFileResources([ResourcePath(self.makeS3Uri("test/"))])
735 self.assertEqual({str(u) for u in found}, created)
737 # Again with grouping.
738 found = list(ResourcePath.findFileResources([ResourcePath(self.makeS3Uri("test/"))], grouped=True))
739 self.assertEqual(len(found), 2)
740 dir_1 = list(found[0])
741 dir_2 = list(found[1])
742 self.assertEqual(len(dir_1), n_dir1)
743 self.assertEqual(len(dir_2), n_dir2)
745 def testWrite(self):
746 s3write = ResourcePath(self.makeS3Uri("created.txt"))
747 content = "abcdefghijklmnopqrstuv\n"
748 s3write.write(content.encode())
749 self.assertEqual(s3write.read().decode(), content)
751 def testTemporary(self):
752 s3root = ResourcePath(self.makeS3Uri("rootdir"), forceDirectory=True)
753 with ResourcePath.temporary_uri(prefix=s3root, suffix=".json") as tmp:
754 self.assertEqual(tmp.getExtension(), ".json", f"uri: {tmp}")
755 self.assertEqual(tmp.scheme, "s3", f"uri: {tmp}")
756 self.assertEqual(tmp.parent(), s3root)
757 basename = tmp.basename()
758 content = "abcd"
759 tmp.write(content.encode())
760 self.assertTrue(tmp.exists(), f"uri: {tmp}")
761 self.assertFalse(tmp.exists())
763 # Again without writing anything, to check that there is no complaint
764 # on exit of context manager.
765 with ResourcePath.temporary_uri(prefix=s3root, suffix=".json") as tmp:
766 self.assertFalse(tmp.exists())
767 # Check that the file has a different name than before.
768 self.assertNotEqual(tmp.basename(), basename, f"uri: {tmp}")
769 self.assertFalse(tmp.exists())
771 def testRelative(self):
772 """Check that we can get subpaths back from two URIs"""
773 parent = ResourcePath(self.makeS3Uri("rootdir"), forceDirectory=True)
774 child = ResourcePath(self.makeS3Uri("rootdir/dir1/file.txt"))
776 self.assertEqual(child.relative_to(parent), "dir1/file.txt")
778 not_child = ResourcePath(self.makeS3Uri("/a/b/dir1/file.txt"))
779 self.assertFalse(not_child.relative_to(parent))
781 not_s3 = ResourcePath(os.path.join(self.tmpdir, "dir1", "file2.txt"))
782 self.assertFalse(child.relative_to(not_s3))
784 def testQuoting(self):
785 """Check that quoting works."""
786 parent = ResourcePath(self.makeS3Uri("rootdir"), forceDirectory=True)
787 subpath = "rootdir/dir1+/file?.txt"
788 child = ResourcePath(self.makeS3Uri(urllib.parse.quote(subpath)))
790 self.assertEqual(child.relative_to(parent), "dir1+/file?.txt")
791 self.assertEqual(child.basename(), "file?.txt")
792 self.assertEqual(child.relativeToPathRoot, subpath)
793 self.assertIn("%", child.path)
794 self.assertEqual(child.unquoted_path, "/" + subpath)
796 def test_open(self):
797 text_uri = ResourcePath(self.makeS3Uri("file.txt"))
798 _check_open(self, text_uri, mode_suffixes=("", "t"))
799 _check_open(self, text_uri, mode_suffixes=("t",), encoding="utf-16")
800 _check_open(self, text_uri, mode_suffixes=("t",), prefer_file_temporary=True)
801 _check_open(self, text_uri, mode_suffixes=("t",), prefer_file_temporary=True, encoding="utf-16")
802 binary_uri = ResourcePath(self.makeS3Uri("file.dat"))
803 _check_open(self, binary_uri, mode_suffixes=("b",))
804 _check_open(self, binary_uri, mode_suffixes=("b",), prefer_file_temporary=True)
807# Mock required environment variables during tests
808@unittest.mock.patch.dict(
809 os.environ,
810 {
811 "LSST_BUTLER_WEBDAV_AUTH": "TOKEN",
812 "LSST_BUTLER_WEBDAV_TOKEN_FILE": os.path.join(TESTDIR, "data/webdav/token"),
813 "LSST_BUTLER_WEBDAV_CA_BUNDLE": "/path/to/ca/certs",
814 },
815)
816class WebdavURITestCase(unittest.TestCase):
817 def setUp(self):
818 serverRoot = "www.not-exists.orgx"
819 existingFolderName = "existingFolder"
820 existingFileName = "existingFile"
821 notExistingFileName = "notExistingFile"
823 self.baseURL = ResourcePath(f"https://{serverRoot}", forceDirectory=True)
824 self.existingFileResourcePath = ResourcePath(
825 f"https://{serverRoot}/{existingFolderName}/{existingFileName}"
826 )
827 self.notExistingFileResourcePath = ResourcePath(
828 f"https://{serverRoot}/{existingFolderName}/{notExistingFileName}"
829 )
830 self.existingFolderResourcePath = ResourcePath(
831 f"https://{serverRoot}/{existingFolderName}", forceDirectory=True
832 )
833 self.notExistingFolderResourcePath = ResourcePath(
834 f"https://{serverRoot}/{notExistingFileName}", forceDirectory=True
835 )
837 # Need to declare the options
838 responses.add(responses.OPTIONS, self.baseURL.geturl(), status=200, headers={"DAV": "1,2,3"})
840 # Used by HttpResourcePath.exists()
841 responses.add(
842 responses.HEAD,
843 self.existingFileResourcePath.geturl(),
844 status=200,
845 headers={"Content-Length": "1024"},
846 )
847 responses.add(responses.HEAD, self.notExistingFileResourcePath.geturl(), status=404)
849 # Used by HttpResourcePath.read()
850 responses.add(
851 responses.GET, self.existingFileResourcePath.geturl(), status=200, body=str.encode("It works!")
852 )
853 responses.add(responses.GET, self.notExistingFileResourcePath.geturl(), status=404)
855 # Used by HttpResourcePath.write()
856 responses.add(responses.PUT, self.existingFileResourcePath.geturl(), status=201)
858 # Used by HttpResourcePath.transfer_from()
859 responses.add(
860 responses.Response(
861 url=self.existingFileResourcePath.geturl(),
862 method="COPY",
863 headers={"Destination": self.existingFileResourcePath.geturl()},
864 status=201,
865 )
866 )
867 responses.add(
868 responses.Response(
869 url=self.existingFileResourcePath.geturl(),
870 method="COPY",
871 headers={"Destination": self.notExistingFileResourcePath.geturl()},
872 status=201,
873 )
874 )
875 responses.add(
876 responses.Response(
877 url=self.existingFileResourcePath.geturl(),
878 method="MOVE",
879 headers={"Destination": self.notExistingFileResourcePath.geturl()},
880 status=201,
881 )
882 )
884 # Used by HttpResourcePath.remove()
885 responses.add(responses.DELETE, self.existingFileResourcePath.geturl(), status=200)
886 responses.add(responses.DELETE, self.notExistingFileResourcePath.geturl(), status=404)
888 # Used by HttpResourcePath.mkdir()
889 responses.add(
890 responses.HEAD,
891 self.existingFolderResourcePath.geturl(),
892 status=200,
893 headers={"Content-Length": "1024"},
894 )
895 responses.add(responses.HEAD, self.baseURL.geturl(), status=200, headers={"Content-Length": "1024"})
896 responses.add(responses.HEAD, self.notExistingFolderResourcePath.geturl(), status=404)
897 responses.add(
898 responses.Response(url=self.notExistingFolderResourcePath.geturl(), method="MKCOL", status=201)
899 )
900 responses.add(
901 responses.Response(url=self.existingFolderResourcePath.geturl(), method="MKCOL", status=403)
902 )
904 @responses.activate
905 def testExists(self):
907 self.assertTrue(self.existingFileResourcePath.exists())
908 self.assertFalse(self.notExistingFileResourcePath.exists())
910 self.assertEqual(self.existingFileResourcePath.size(), 1024)
911 with self.assertRaises(FileNotFoundError):
912 self.notExistingFileResourcePath.size()
914 @responses.activate
915 def testRemove(self):
917 self.assertIsNone(self.existingFileResourcePath.remove())
918 with self.assertRaises(FileNotFoundError):
919 self.notExistingFileResourcePath.remove()
921 @responses.activate
922 def testMkdir(self):
924 # The mock means that we can't check this now exists
925 self.notExistingFolderResourcePath.mkdir()
927 # This should do nothing
928 self.existingFolderResourcePath.mkdir()
930 with self.assertRaises(ValueError):
931 self.notExistingFileResourcePath.mkdir()
933 @responses.activate
934 def testRead(self):
936 self.assertEqual(self.existingFileResourcePath.read().decode(), "It works!")
937 self.assertNotEqual(self.existingFileResourcePath.read().decode(), "Nope.")
938 with self.assertRaises(FileNotFoundError):
939 self.notExistingFileResourcePath.read()
941 @responses.activate
942 def testWrite(self):
944 self.assertIsNone(self.existingFileResourcePath.write(data=str.encode("Some content.")))
945 with self.assertRaises(FileExistsError):
946 self.existingFileResourcePath.write(data=str.encode("Some content."), overwrite=False)
948 @responses.activate
949 def testTransfer(self):
951 self.assertIsNone(self.notExistingFileResourcePath.transfer_from(src=self.existingFileResourcePath))
952 self.assertIsNone(
953 self.notExistingFileResourcePath.transfer_from(src=self.existingFileResourcePath, transfer="move")
954 )
955 with self.assertRaises(FileExistsError):
956 self.existingFileResourcePath.transfer_from(src=self.existingFileResourcePath)
957 with self.assertRaises(ValueError):
958 self.notExistingFileResourcePath.transfer_from(
959 src=self.existingFileResourcePath, transfer="unsupported"
960 )
962 def testParent(self):
964 self.assertEqual(
965 self.existingFolderResourcePath.geturl(), self.notExistingFileResourcePath.parent().geturl()
966 )
967 self.assertEqual(self.baseURL.geturl(), self.baseURL.parent().geturl())
968 self.assertEqual(
969 self.existingFileResourcePath.parent().geturl(), self.existingFileResourcePath.dirname().geturl()
970 )
973if __name__ == "__main__": 973 ↛ 974line 973 didn't jump to line 974, because the condition on line 973 was never true
974 unittest.main()