Coverage for tests/test_uri.py: 12%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22import glob
23import os
24import shutil
25import unittest
26import urllib.parse
27import responses
28import pathlib
30try:
31 import boto3
32 import botocore
33 from moto import mock_s3
34except ImportError:
35 boto3 = None
37 def mock_s3(cls):
38 """A no-op decorator in case moto mock_s3 can not be imported.
39 """
40 return cls
42from lsst.daf.butler import ButlerURI
43from lsst.resources.s3utils import setAwsEnvCredentials, unsetAwsEnvCredentials
44from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir
46TESTDIR = os.path.abspath(os.path.dirname(__file__))
49class FileURITestCase(unittest.TestCase):
50 """Concrete tests for local files"""
52 def setUp(self):
53 # Use a local tempdir because on macOS the temp dirs use symlinks
54 # so relsymlink gets quite confused.
55 self.tmpdir = makeTestTempDir(TESTDIR)
57 def tearDown(self):
58 removeTestTempDir(self.tmpdir)
60 def testFile(self):
61 file = os.path.join(self.tmpdir, "test.txt")
62 uri = ButlerURI(file)
63 self.assertFalse(uri.exists(), f"{uri} should not exist")
64 self.assertEqual(uri.ospath, file)
66 path = pathlib.Path(file)
67 uri = ButlerURI(path)
68 self.assertEqual(uri.ospath, file)
70 content = "abcdefghijklmnopqrstuv\n"
71 uri.write(content.encode())
72 self.assertTrue(os.path.exists(file), "File should exist locally")
73 self.assertTrue(uri.exists(), f"{uri} should now exist")
74 self.assertEqual(uri.read().decode(), content)
75 self.assertEqual(uri.size(), len(content.encode()))
77 with self.assertRaises(FileNotFoundError):
78 ButlerURI("file/not/there.txt").size()
80 # Check that creating a URI from a URI returns the same thing
81 uri2 = ButlerURI(uri)
82 self.assertEqual(uri, uri2)
83 self.assertEqual(id(uri), id(uri2))
85 with self.assertRaises(ValueError):
86 # Scheme-less URIs are not allowed to support non-file roots
87 # at the present time. This may change in the future to become
88 # equivalent to ButlerURI.join()
89 ButlerURI("a/b.txt", root=ButlerURI("s3://bucket/a/b/"))
91 def testExtension(self):
92 file = ButlerURI(os.path.join(self.tmpdir, "test.txt"))
93 self.assertEqual(file.updatedExtension(None), file)
94 self.assertEqual(file.updatedExtension(".txt"), file)
95 self.assertEqual(id(file.updatedExtension(".txt")), id(file))
97 fits = file.updatedExtension(".fits.gz")
98 self.assertEqual(fits.basename(), "test.fits.gz")
99 self.assertEqual(fits.updatedExtension(".jpeg").basename(), "test.jpeg")
101 def testRelative(self):
102 """Check that we can get subpaths back from two URIs"""
103 parent = ButlerURI(self.tmpdir, forceDirectory=True, forceAbsolute=True)
104 self.assertTrue(parent.isdir())
105 child = ButlerURI(os.path.join(self.tmpdir, "dir1", "file.txt"), forceAbsolute=True)
107 self.assertEqual(child.relative_to(parent), "dir1/file.txt")
109 not_child = ButlerURI("/a/b/dir1/file.txt")
110 self.assertIsNone(not_child.relative_to(parent))
111 self.assertFalse(not_child.isdir())
113 not_directory = ButlerURI(os.path.join(self.tmpdir, "dir1", "file2.txt"))
114 self.assertIsNone(child.relative_to(not_directory))
116 # Relative URIs
117 parent = ButlerURI("a/b/", forceAbsolute=False)
118 child = ButlerURI("a/b/c/d.txt", forceAbsolute=False)
119 self.assertFalse(child.scheme)
120 self.assertEqual(child.relative_to(parent), "c/d.txt")
122 # File URI and schemeless URI
123 parent = ButlerURI("file:/a/b/c/")
124 child = ButlerURI("e/f/g.txt", forceAbsolute=False)
126 # If the child is relative and the parent is absolute we assume
127 # that the child is a child of the parent unless it uses ".."
128 self.assertEqual(child.relative_to(parent), "e/f/g.txt")
130 child = ButlerURI("../e/f/g.txt", forceAbsolute=False)
131 self.assertIsNone(child.relative_to(parent))
133 child = ButlerURI("../c/e/f/g.txt", forceAbsolute=False)
134 self.assertEqual(child.relative_to(parent), "e/f/g.txt")
136 # Test non-file root with relative path.
137 child = ButlerURI("e/f/g.txt", forceAbsolute=False)
138 parent = ButlerURI("s3://hello/a/b/c/")
139 self.assertEqual(child.relative_to(parent), "e/f/g.txt")
141 # Test with different netloc
142 child = ButlerURI("http://my.host/a/b/c.txt")
143 parent = ButlerURI("http://other.host/a/")
144 self.assertIsNone(child.relative_to(parent), f"{child}.relative_to({parent})")
146 # Schemeless absolute child.
147 # Schemeless absolute URI is constructed using root= parameter.
148 parent = ButlerURI("file:///a/b/c/")
149 child = ButlerURI("d/e.txt", root=parent)
150 self.assertEqual(child.relative_to(parent), "d/e.txt", f"{child}.relative_to({parent})")
152 parent = ButlerURI("c/", root="/a/b/")
153 self.assertEqual(child.relative_to(parent), "d/e.txt", f"{child}.relative_to({parent})")
155 # Absolute schemeless child with relative parent will always fail.
156 parent = ButlerURI("d/e.txt", forceAbsolute=False)
157 self.assertIsNone(child.relative_to(parent), f"{child}.relative_to({parent})")
159 def testParents(self):
160 """Test of splitting and parent walking."""
161 parent = ButlerURI(self.tmpdir, forceDirectory=True, forceAbsolute=True)
162 child_file = parent.join("subdir/file.txt")
163 self.assertFalse(child_file.isdir())
164 child_subdir, file = child_file.split()
165 self.assertEqual(file, "file.txt")
166 self.assertTrue(child_subdir.isdir())
167 self.assertEqual(child_file.dirname(), child_subdir)
168 self.assertEqual(child_file.basename(), file)
169 self.assertEqual(child_file.parent(), child_subdir)
170 derived_parent = child_subdir.parent()
171 self.assertEqual(derived_parent, parent)
172 self.assertTrue(derived_parent.isdir())
173 self.assertEqual(child_file.parent().parent(), parent)
175 def testEnvVar(self):
176 """Test that environment variables are expanded."""
178 with unittest.mock.patch.dict(os.environ, {"MY_TEST_DIR": "/a/b/c"}):
179 uri = ButlerURI("${MY_TEST_DIR}/d.txt")
180 self.assertEqual(uri.path, "/a/b/c/d.txt")
181 self.assertEqual(uri.scheme, "file")
183 # This will not expand
184 uri = ButlerURI("${MY_TEST_DIR}/d.txt", forceAbsolute=False)
185 self.assertEqual(uri.path, "${MY_TEST_DIR}/d.txt")
186 self.assertFalse(uri.scheme)
188 def testMkdir(self):
189 tmpdir = ButlerURI(self.tmpdir)
190 newdir = tmpdir.join("newdir/seconddir")
191 newdir.mkdir()
192 self.assertTrue(newdir.exists())
193 newfile = newdir.join("temp.txt")
194 newfile.write("Data".encode())
195 self.assertTrue(newfile.exists())
197 def testTransfer(self):
198 src = ButlerURI(os.path.join(self.tmpdir, "test.txt"))
199 content = "Content is some content\nwith something to say\n\n"
200 src.write(content.encode())
202 for mode in ("copy", "link", "hardlink", "symlink", "relsymlink"):
203 dest = ButlerURI(os.path.join(self.tmpdir, f"dest_{mode}.txt"))
204 dest.transfer_from(src, transfer=mode)
205 self.assertTrue(dest.exists(), f"Check that {dest} exists (transfer={mode})")
207 with open(dest.ospath, "r") as fh:
208 new_content = fh.read()
209 self.assertEqual(new_content, content)
211 if mode in ("symlink", "relsymlink"):
212 self.assertTrue(os.path.islink(dest.ospath), f"Check that {dest} is symlink")
214 # If the source and destination are hardlinks of each other
215 # the transfer should work even if overwrite=False.
216 if mode in ("link", "hardlink"):
217 dest.transfer_from(src, transfer=mode)
218 else:
219 with self.assertRaises(FileExistsError,
220 msg=f"Overwrite of {dest} should not be allowed ({mode})"):
221 dest.transfer_from(src, transfer=mode)
223 dest.transfer_from(src, transfer=mode, overwrite=True)
225 os.remove(dest.ospath)
227 b = src.read()
228 self.assertEqual(b.decode(), new_content)
230 nbytes = 10
231 subset = src.read(size=nbytes)
232 self.assertEqual(len(subset), nbytes)
233 self.assertEqual(subset.decode(), content[:nbytes])
235 with self.assertRaises(ValueError):
236 src.transfer_from(src, transfer="unknown")
238 def testTransferIdentical(self):
239 """Test overwrite of identical files."""
240 dir1 = ButlerURI(os.path.join(self.tmpdir, "dir1"), forceDirectory=True)
241 dir1.mkdir()
242 dir2 = os.path.join(self.tmpdir, "dir2")
243 os.symlink(dir1.ospath, dir2)
245 # Write a test file.
246 src_file = dir1.join("test.txt")
247 content = "0123456"
248 src_file.write(content.encode())
250 # Construct URI to destination that should be identical.
251 dest_file = ButlerURI(os.path.join(dir2), forceDirectory=True).join("test.txt")
252 self.assertTrue(dest_file.exists())
253 self.assertNotEqual(src_file, dest_file)
255 # Transfer it over itself.
256 dest_file.transfer_from(src_file, transfer="symlink", overwrite=True)
257 new_content = dest_file.read().decode()
258 self.assertEqual(content, new_content)
260 def testResource(self):
261 u = ButlerURI("resource://lsst.daf.butler/configs/datastore.yaml")
262 self.assertTrue(u.exists(), f"Check {u} exists")
264 content = u.read().decode()
265 self.assertTrue(content.startswith("datastore:"))
267 truncated = u.read(size=9).decode()
268 self.assertEqual(truncated, "datastore")
270 d = ButlerURI("resource://lsst.daf.butler/configs", forceDirectory=True)
271 self.assertTrue(u.exists(), f"Check directory {d} exists")
273 j = d.join("datastore.yaml")
274 self.assertEqual(u, j)
275 self.assertFalse(j.dirLike)
276 self.assertFalse(j.isdir())
277 not_there = d.join("not-there.yaml")
278 self.assertFalse(not_there.exists())
280 bad = ButlerURI("resource://bad.module/not.yaml")
281 multi = ButlerURI.mexists([u, bad, not_there])
282 self.assertTrue(multi[u])
283 self.assertFalse(multi[bad])
284 self.assertFalse(multi[not_there])
286 def testEscapes(self):
287 """Special characters in file paths"""
288 src = ButlerURI("bbb/???/test.txt", root=self.tmpdir, forceAbsolute=True)
289 self.assertFalse(src.scheme)
290 src.write(b"Some content")
291 self.assertTrue(src.exists())
293 # abspath always returns a file scheme
294 file = src.abspath()
295 self.assertTrue(file.exists())
296 self.assertIn("???", file.ospath)
297 self.assertNotIn("???", file.path)
299 file = file.updatedFile("tests??.txt")
300 self.assertNotIn("??.txt", file.path)
301 file.write(b"Other content")
302 self.assertEqual(file.read(), b"Other content")
304 src = src.updatedFile("tests??.txt")
305 self.assertIn("??.txt", src.path)
306 self.assertEqual(file.read(), src.read(), f"reading from {file.ospath} and {src.ospath}")
308 # File URI and schemeless URI
309 parent = ButlerURI("file:" + urllib.parse.quote("/a/b/c/de/??/"))
310 child = ButlerURI("e/f/g.txt", forceAbsolute=False)
311 self.assertEqual(child.relative_to(parent), "e/f/g.txt")
313 child = ButlerURI("e/f??#/g.txt", forceAbsolute=False)
314 self.assertEqual(child.relative_to(parent), "e/f??#/g.txt")
316 child = ButlerURI("file:" + urllib.parse.quote("/a/b/c/de/??/e/f??#/g.txt"))
317 self.assertEqual(child.relative_to(parent), "e/f??#/g.txt")
319 self.assertEqual(child.relativeToPathRoot, "a/b/c/de/??/e/f??#/g.txt")
321 # Schemeless so should not quote
322 dir = ButlerURI("bbb/???/", root=self.tmpdir, forceAbsolute=True, forceDirectory=True)
323 self.assertIn("???", dir.ospath)
324 self.assertIn("???", dir.path)
325 self.assertFalse(dir.scheme)
327 # dir.join() morphs into a file scheme
328 new = dir.join("test_j.txt")
329 self.assertIn("???", new.ospath, f"Checking {new}")
330 new.write(b"Content")
332 new2name = "###/test??.txt"
333 new2 = dir.join(new2name)
334 self.assertIn("???", new2.ospath)
335 new2.write(b"Content")
336 self.assertTrue(new2.ospath.endswith(new2name))
337 self.assertEqual(new.read(), new2.read())
339 fdir = dir.abspath()
340 self.assertNotIn("???", fdir.path)
341 self.assertIn("???", fdir.ospath)
342 self.assertEqual(fdir.scheme, "file")
343 fnew = dir.join("test_jf.txt")
344 fnew.write(b"Content")
346 fnew2 = fdir.join(new2name)
347 fnew2.write(b"Content")
348 self.assertTrue(fnew2.ospath.endswith(new2name))
349 self.assertNotIn("###", fnew2.path)
351 self.assertEqual(fnew.read(), fnew2.read())
353 # Test that children relative to schemeless and file schemes
354 # still return the same unquoted name
355 self.assertEqual(fnew2.relative_to(fdir), new2name, f"{fnew2}.relative_to({fdir})")
356 self.assertEqual(fnew2.relative_to(dir), new2name, f"{fnew2}.relative_to({dir})")
357 self.assertEqual(new2.relative_to(fdir), new2name, f"{new2}.relative_to({fdir})")
358 self.assertEqual(new2.relative_to(dir), new2name, f"{new2}.relative_to({dir})")
360 # Check for double quoting
361 plus_path = "/a/b/c+d/"
362 with self.assertLogs(level="WARNING"):
363 uri = ButlerURI(urllib.parse.quote(plus_path), forceDirectory=True)
364 self.assertEqual(uri.ospath, plus_path)
366 # Check that # is not escaped for schemeless URIs
367 hash_path = "/a/b#/c&d#xyz"
368 hpos = hash_path.rfind("#")
369 uri = ButlerURI(hash_path)
370 self.assertEqual(uri.ospath, hash_path[:hpos])
371 self.assertEqual(uri.fragment, hash_path[hpos + 1:])
373 def testHash(self):
374 """Test that we can store URIs in sets and as keys."""
375 uri1 = ButlerURI(TESTDIR)
376 uri2 = uri1.join("test/")
377 s = {uri1, uri2}
378 self.assertIn(uri1, s)
380 d = {uri1: "1", uri2: "2"}
381 self.assertEqual(d[uri2], "2")
383 def testWalk(self):
384 """Test ButlerURI.walk()."""
385 test_dir_uri = ButlerURI(TESTDIR)
387 file = test_dir_uri.join("config/basic/butler.yaml")
388 found = list(ButlerURI.findFileResources([file]))
389 self.assertEqual(found[0], file)
391 # Compare against the full local paths
392 expected = set(p for p in glob.glob(os.path.join(TESTDIR, "config", "**"), recursive=True)
393 if os.path.isfile(p))
394 found = set(u.ospath for u in ButlerURI.findFileResources([test_dir_uri.join("config")]))
395 self.assertEqual(found, expected)
397 # Now solely the YAML files
398 expected_yaml = set(glob.glob(os.path.join(TESTDIR, "config", "**", "*.yaml"), recursive=True))
399 found = set(u.ospath for u in ButlerURI.findFileResources([test_dir_uri.join("config")],
400 file_filter=r".*\.yaml$"))
401 self.assertEqual(found, expected_yaml)
403 # Now two explicit directories and a file
404 expected = set(glob.glob(os.path.join(TESTDIR, "config", "**", "basic", "*.yaml"), recursive=True))
405 expected.update(set(glob.glob(os.path.join(TESTDIR, "config", "**", "templates", "*.yaml"),
406 recursive=True)))
407 expected.add(file.ospath)
409 found = set(u.ospath for u in ButlerURI.findFileResources([file, test_dir_uri.join("config/basic"),
410 test_dir_uri.join("config/templates")],
411 file_filter=r".*\.yaml$"))
412 self.assertEqual(found, expected)
414 # Group by directory -- find everything and compare it with what
415 # we expected to be there in total. We expect to find 9 directories
416 # containing yaml files so make sure we only iterate 9 times.
417 found_yaml = set()
418 counter = 0
419 for uris in ButlerURI.findFileResources([file, test_dir_uri.join("config/")],
420 file_filter=r".*\.yaml$", grouped=True):
421 found = set(u.ospath for u in uris)
422 if found:
423 counter += 1
425 found_yaml.update(found)
427 self.assertEqual(found_yaml, expected_yaml)
428 self.assertEqual(counter, 9)
430 # Grouping but check that single files are returned in a single group
431 # at the end
432 file2 = test_dir_uri.join("config/templates/templates-bad.yaml")
433 found = list(ButlerURI.findFileResources([file, file2, test_dir_uri.join("config/dbAuth")],
434 grouped=True))
435 self.assertEqual(len(found), 2)
436 self.assertEqual(list(found[1]), [file, file2])
438 with self.assertRaises(ValueError):
439 list(file.walk())
441 def testRootURI(self):
442 """Test ButlerURI.root_uri()."""
443 uri = ButlerURI("https://www.notexist.com:8080/file/test")
444 uri2 = ButlerURI("s3://www.notexist.com/file/test")
445 self.assertEqual(uri.root_uri().geturl(), "https://www.notexist.com:8080/")
446 self.assertEqual(uri2.root_uri().geturl(), "s3://www.notexist.com/")
448 def testJoin(self):
449 """Test .join method."""
451 root_str = "s3://bucket/hsc/payload/"
452 root = ButlerURI(root_str)
454 self.assertEqual(root.join("b/test.txt").geturl(), f"{root_str}b/test.txt")
455 add_dir = root.join("b/c/d/")
456 self.assertTrue(add_dir.isdir())
457 self.assertEqual(add_dir.geturl(), f"{root_str}b/c/d/")
459 quote_example = "b&c.t@x#t"
460 needs_quote = root.join(quote_example)
461 self.assertEqual(needs_quote.unquoted_path, f"/hsc/payload/{quote_example}")
463 other = ButlerURI("file://localhost/test.txt")
464 self.assertEqual(root.join(other), other)
465 self.assertEqual(other.join("b/new.txt").geturl(), "file://localhost/b/new.txt")
467 joined = ButlerURI("s3://bucket/hsc/payload/").join(ButlerURI("test.qgraph", forceAbsolute=False))
468 self.assertEqual(joined, ButlerURI("s3://bucket/hsc/payload/test.qgraph"))
470 with self.assertRaises(ValueError):
471 ButlerURI("s3://bucket/hsc/payload/").join(ButlerURI("test.qgraph"))
473 def testTemporary(self):
474 with ButlerURI.temporary_uri(suffix=".json") as tmp:
475 self.assertEqual(tmp.getExtension(), ".json", f"uri: {tmp}")
476 self.assertTrue(tmp.isabs(), f"uri: {tmp}")
477 self.assertFalse(tmp.exists(), f"uri: {tmp}")
478 tmp.write(b"abcd")
479 self.assertTrue(tmp.exists(), f"uri: {tmp}")
480 self.assertTrue(tmp.isTemporary)
481 self.assertFalse(tmp.exists(), f"uri: {tmp}")
483 tmpdir = ButlerURI(self.tmpdir, forceDirectory=True)
484 with ButlerURI.temporary_uri(prefix=tmpdir, suffix=".yaml") as tmp:
485 # Use a specified tmpdir and check it is okay for the file
486 # to not be created.
487 self.assertFalse(tmp.exists(), f"uri: {tmp}")
488 self.assertTrue(tmpdir.exists(), f"uri: {tmpdir} still exists")
491@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!")
492@mock_s3
493class S3URITestCase(unittest.TestCase):
494 """Tests involving S3"""
496 bucketName = "any_bucket"
497 """Bucket name to use in tests"""
499 def setUp(self):
500 # Local test directory
501 self.tmpdir = makeTestTempDir(TESTDIR)
503 # set up some fake credentials if they do not exist
504 self.usingDummyCredentials = setAwsEnvCredentials()
506 # MOTO needs to know that we expect Bucket bucketname to exist
507 s3 = boto3.resource("s3")
508 s3.create_bucket(Bucket=self.bucketName)
510 def tearDown(self):
511 s3 = boto3.resource("s3")
512 bucket = s3.Bucket(self.bucketName)
513 try:
514 bucket.objects.all().delete()
515 except botocore.exceptions.ClientError as e:
516 if e.response["Error"]["Code"] == "404":
517 # the key was not reachable - pass
518 pass
519 else:
520 raise
522 bucket = s3.Bucket(self.bucketName)
523 bucket.delete()
525 # unset any potentially set dummy credentials
526 if self.usingDummyCredentials:
527 unsetAwsEnvCredentials()
529 shutil.rmtree(self.tmpdir, ignore_errors=True)
531 def makeS3Uri(self, path):
532 return f"s3://{self.bucketName}/{path}"
534 def testTransfer(self):
535 src = ButlerURI(os.path.join(self.tmpdir, "test.txt"))
536 content = "Content is some content\nwith something to say\n\n"
537 src.write(content.encode())
538 self.assertTrue(src.exists())
539 self.assertEqual(src.size(), len(content.encode()))
541 dest = ButlerURI(self.makeS3Uri("test.txt"))
542 self.assertFalse(dest.exists())
544 with self.assertRaises(FileNotFoundError):
545 dest.size()
547 dest.transfer_from(src, transfer="copy")
548 self.assertTrue(dest.exists())
550 dest2 = ButlerURI(self.makeS3Uri("copied.txt"))
551 dest2.transfer_from(dest, transfer="copy")
552 self.assertTrue(dest2.exists())
554 local = ButlerURI(os.path.join(self.tmpdir, "copied.txt"))
555 local.transfer_from(dest2, transfer="copy")
556 with open(local.ospath, "r") as fd:
557 new_content = fd.read()
558 self.assertEqual(new_content, content)
560 with self.assertRaises(ValueError):
561 dest2.transfer_from(local, transfer="symlink")
563 b = dest.read()
564 self.assertEqual(b.decode(), new_content)
566 nbytes = 10
567 subset = dest.read(size=nbytes)
568 self.assertEqual(len(subset), nbytes) # Extra byte comes back
569 self.assertEqual(subset.decode(), content[:nbytes])
571 with self.assertRaises(FileExistsError):
572 dest.transfer_from(src, transfer="copy")
574 dest.transfer_from(src, transfer="copy", overwrite=True)
576 def testWalk(self):
577 """Test that we can list an S3 bucket"""
578 # Files we want to create
579 expected = ("a/x.txt", "a/y.txt", "a/z.json", "a/b/w.txt", "a/b/c/d/v.json")
580 expected_uris = [ButlerURI(self.makeS3Uri(path)) for path in expected]
581 for uri in expected_uris:
582 # Doesn't matter what we write
583 uri.write("123".encode())
585 # Find all the files in the a/ tree
586 found = set(uri.path for uri in ButlerURI.findFileResources([ButlerURI(self.makeS3Uri("a/"))]))
587 self.assertEqual(found, {uri.path for uri in expected_uris})
589 # Find all the files in the a/ tree but group by folder
590 found = ButlerURI.findFileResources([ButlerURI(self.makeS3Uri("a/"))],
591 grouped=True)
592 expected = (("/a/x.txt", "/a/y.txt", "/a/z.json"), ("/a/b/w.txt",), ("/a/b/c/d/v.json",))
594 for got, expect in zip(found, expected):
595 self.assertEqual(tuple(u.path for u in got), expect)
597 # Find only JSON files
598 found = set(uri.path for uri in ButlerURI.findFileResources([ButlerURI(self.makeS3Uri("a/"))],
599 file_filter=r"\.json$"))
600 self.assertEqual(found, {uri.path for uri in expected_uris if uri.path.endswith(".json")})
602 # JSON files grouped by directory
603 found = ButlerURI.findFileResources([ButlerURI(self.makeS3Uri("a/"))],
604 file_filter=r"\.json$", grouped=True)
605 expected = (("/a/z.json",), ("/a/b/c/d/v.json",))
607 for got, expect in zip(found, expected):
608 self.assertEqual(tuple(u.path for u in got), expect)
610 # Check pagination works with large numbers of files. S3 API limits
611 # us to 1000 response per list_objects call so create lots of files
612 created = set()
613 counter = 1
614 n_dir1 = 1100
615 while counter <= n_dir1:
616 new = ButlerURI(self.makeS3Uri(f"test/file{counter:04d}.txt"))
617 new.write(f"{counter}".encode())
618 created.add(str(new))
619 counter += 1
620 counter = 1
621 # Put some in a subdirectory to make sure we are looking in a
622 # hierarchy.
623 n_dir2 = 100
624 while counter <= n_dir2:
625 new = ButlerURI(self.makeS3Uri(f"test/subdir/file{counter:04d}.txt"))
626 new.write(f"{counter}".encode())
627 created.add(str(new))
628 counter += 1
630 found = ButlerURI.findFileResources([ButlerURI(self.makeS3Uri("test/"))])
631 self.assertEqual({str(u) for u in found}, created)
633 # Again with grouping.
634 found = list(ButlerURI.findFileResources([ButlerURI(self.makeS3Uri("test/"))], grouped=True))
635 self.assertEqual(len(found), 2)
636 dir_1 = list(found[0])
637 dir_2 = list(found[1])
638 self.assertEqual(len(dir_1), n_dir1)
639 self.assertEqual(len(dir_2), n_dir2)
641 def testWrite(self):
642 s3write = ButlerURI(self.makeS3Uri("created.txt"))
643 content = "abcdefghijklmnopqrstuv\n"
644 s3write.write(content.encode())
645 self.assertEqual(s3write.read().decode(), content)
647 def testTemporary(self):
648 s3root = ButlerURI(self.makeS3Uri("rootdir"), forceDirectory=True)
649 with ButlerURI.temporary_uri(prefix=s3root, suffix=".json") as tmp:
650 self.assertEqual(tmp.getExtension(), ".json", f"uri: {tmp}")
651 self.assertEqual(tmp.scheme, "s3", f"uri: {tmp}")
652 self.assertEqual(tmp.parent(), s3root)
653 basename = tmp.basename()
654 content = "abcd"
655 tmp.write(content.encode())
656 self.assertTrue(tmp.exists(), f"uri: {tmp}")
657 self.assertFalse(tmp.exists())
659 # Again without writing anything, to check that there is no complaint
660 # on exit of context manager.
661 with ButlerURI.temporary_uri(prefix=s3root, suffix=".json") as tmp:
662 self.assertFalse(tmp.exists())
663 # Check that the file has a different name than before.
664 self.assertNotEqual(tmp.basename(), basename, f"uri: {tmp}")
665 self.assertFalse(tmp.exists())
667 def testRelative(self):
668 """Check that we can get subpaths back from two URIs"""
669 parent = ButlerURI(self.makeS3Uri("rootdir"), forceDirectory=True)
670 child = ButlerURI(self.makeS3Uri("rootdir/dir1/file.txt"))
672 self.assertEqual(child.relative_to(parent), "dir1/file.txt")
674 not_child = ButlerURI(self.makeS3Uri("/a/b/dir1/file.txt"))
675 self.assertFalse(not_child.relative_to(parent))
677 not_s3 = ButlerURI(os.path.join(self.tmpdir, "dir1", "file2.txt"))
678 self.assertFalse(child.relative_to(not_s3))
680 def testQuoting(self):
681 """Check that quoting works."""
682 parent = ButlerURI(self.makeS3Uri("rootdir"), forceDirectory=True)
683 subpath = "rootdir/dir1+/file?.txt"
684 child = ButlerURI(self.makeS3Uri(urllib.parse.quote(subpath)))
686 self.assertEqual(child.relative_to(parent), "dir1+/file?.txt")
687 self.assertEqual(child.basename(), "file?.txt")
688 self.assertEqual(child.relativeToPathRoot, subpath)
689 self.assertIn("%", child.path)
690 self.assertEqual(child.unquoted_path, "/" + subpath)
693# Mock required environment variables during tests
694@unittest.mock.patch.dict(os.environ, {"LSST_BUTLER_WEBDAV_AUTH": "TOKEN",
695 "LSST_BUTLER_WEBDAV_TOKEN_FILE": os.path.join(
696 TESTDIR, "config/testConfigs/webdav/token"),
697 "LSST_BUTLER_WEBDAV_CA_BUNDLE": "/path/to/ca/certs"})
698class WebdavURITestCase(unittest.TestCase):
700 def setUp(self):
701 serverRoot = "www.not-exists.orgx"
702 existingFolderName = "existingFolder"
703 existingFileName = "existingFile"
704 notExistingFileName = "notExistingFile"
706 self.baseURL = ButlerURI(
707 f"https://{serverRoot}", forceDirectory=True)
708 self.existingFileButlerURI = ButlerURI(
709 f"https://{serverRoot}/{existingFolderName}/{existingFileName}")
710 self.notExistingFileButlerURI = ButlerURI(
711 f"https://{serverRoot}/{existingFolderName}/{notExistingFileName}")
712 self.existingFolderButlerURI = ButlerURI(
713 f"https://{serverRoot}/{existingFolderName}", forceDirectory=True)
714 self.notExistingFolderButlerURI = ButlerURI(
715 f"https://{serverRoot}/{notExistingFileName}", forceDirectory=True)
717 # Need to declare the options
718 responses.add(responses.OPTIONS,
719 self.baseURL.geturl(),
720 status=200, headers={"DAV": "1,2,3"})
722 # Used by ButlerHttpURI.exists()
723 responses.add(responses.HEAD,
724 self.existingFileButlerURI.geturl(),
725 status=200, headers={'Content-Length': '1024'})
726 responses.add(responses.HEAD,
727 self.notExistingFileButlerURI.geturl(),
728 status=404)
730 # Used by ButlerHttpURI.read()
731 responses.add(responses.GET,
732 self.existingFileButlerURI.geturl(),
733 status=200,
734 body=str.encode("It works!"))
735 responses.add(responses.GET,
736 self.notExistingFileButlerURI.geturl(),
737 status=404)
739 # Used by ButlerHttpURI.write()
740 responses.add(responses.PUT,
741 self.existingFileButlerURI.geturl(),
742 status=201)
744 # Used by ButlerHttpURI.transfer_from()
745 responses.add(responses.Response(url=self.existingFileButlerURI.geturl(),
746 method="COPY",
747 headers={"Destination": self.existingFileButlerURI.geturl()},
748 status=201))
749 responses.add(responses.Response(url=self.existingFileButlerURI.geturl(),
750 method="COPY",
751 headers={"Destination": self.notExistingFileButlerURI.geturl()},
752 status=201))
753 responses.add(responses.Response(url=self.existingFileButlerURI.geturl(),
754 method="MOVE",
755 headers={"Destination": self.notExistingFileButlerURI.geturl()},
756 status=201))
758 # Used by ButlerHttpURI.remove()
759 responses.add(responses.DELETE,
760 self.existingFileButlerURI.geturl(),
761 status=200)
762 responses.add(responses.DELETE,
763 self.notExistingFileButlerURI.geturl(),
764 status=404)
766 # Used by ButlerHttpURI.mkdir()
767 responses.add(responses.HEAD,
768 self.existingFolderButlerURI.geturl(),
769 status=200, headers={'Content-Length': '1024'})
770 responses.add(responses.HEAD,
771 self.baseURL.geturl(),
772 status=200, headers={'Content-Length': '1024'})
773 responses.add(responses.HEAD,
774 self.notExistingFolderButlerURI.geturl(),
775 status=404)
776 responses.add(responses.Response(url=self.notExistingFolderButlerURI.geturl(),
777 method="MKCOL",
778 status=201))
779 responses.add(responses.Response(url=self.existingFolderButlerURI.geturl(),
780 method="MKCOL",
781 status=403))
783 @responses.activate
784 def testExists(self):
786 self.assertTrue(self.existingFileButlerURI.exists())
787 self.assertFalse(self.notExistingFileButlerURI.exists())
789 self.assertEqual(self.existingFileButlerURI.size(), 1024)
790 with self.assertRaises(FileNotFoundError):
791 self.notExistingFileButlerURI.size()
793 @responses.activate
794 def testRemove(self):
796 self.assertIsNone(self.existingFileButlerURI.remove())
797 with self.assertRaises(FileNotFoundError):
798 self.notExistingFileButlerURI.remove()
800 @responses.activate
801 def testMkdir(self):
803 # The mock means that we can't check this now exists
804 self.notExistingFolderButlerURI.mkdir()
806 # This should do nothing
807 self.existingFolderButlerURI.mkdir()
809 with self.assertRaises(ValueError):
810 self.notExistingFileButlerURI.mkdir()
812 @responses.activate
813 def testRead(self):
815 self.assertEqual(self.existingFileButlerURI.read().decode(), "It works!")
816 self.assertNotEqual(self.existingFileButlerURI.read().decode(), "Nope.")
817 with self.assertRaises(FileNotFoundError):
818 self.notExistingFileButlerURI.read()
820 @responses.activate
821 def testWrite(self):
823 self.assertIsNone(self.existingFileButlerURI.write(data=str.encode("Some content.")))
824 with self.assertRaises(FileExistsError):
825 self.existingFileButlerURI.write(data=str.encode("Some content."), overwrite=False)
827 @responses.activate
828 def testTransfer(self):
830 self.assertIsNone(self.notExistingFileButlerURI.transfer_from(
831 src=self.existingFileButlerURI))
832 self.assertIsNone(self.notExistingFileButlerURI.transfer_from(
833 src=self.existingFileButlerURI,
834 transfer="move"))
835 with self.assertRaises(FileExistsError):
836 self.existingFileButlerURI.transfer_from(src=self.existingFileButlerURI)
837 with self.assertRaises(ValueError):
838 self.notExistingFileButlerURI.transfer_from(
839 src=self.existingFileButlerURI,
840 transfer="unsupported")
842 def testParent(self):
844 self.assertEqual(self.existingFolderButlerURI.geturl(),
845 self.notExistingFileButlerURI.parent().geturl())
846 self.assertEqual(self.baseURL.geturl(),
847 self.baseURL.parent().geturl())
848 self.assertEqual(self.existingFileButlerURI.parent().geturl(),
849 self.existingFileButlerURI.dirname().geturl())
852if __name__ == "__main__": 852 ↛ 853line 852 didn't jump to line 853, because the condition on line 852 was never true
853 unittest.main()