Coverage for tests/test_uri.py: 12%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22import glob
23import os
24import shutil
25import unittest
26import urllib.parse
27import responses
28import pathlib
30try:
31 import boto3
32 import botocore
33 from moto import mock_s3
34except ImportError:
35 boto3 = None
37 def mock_s3(cls):
38 """A no-op decorator in case moto mock_s3 can not be imported.
39 """
40 return cls
42from lsst.daf.butler import ButlerURI
43from lsst.daf.butler.core._butlerUri.s3utils import (setAwsEnvCredentials,
44 unsetAwsEnvCredentials)
45from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir
47TESTDIR = os.path.abspath(os.path.dirname(__file__))
50class FileURITestCase(unittest.TestCase):
51 """Concrete tests for local files"""
53 def setUp(self):
54 # Use a local tempdir because on macOS the temp dirs use symlinks
55 # so relsymlink gets quite confused.
56 self.tmpdir = makeTestTempDir(TESTDIR)
58 def tearDown(self):
59 removeTestTempDir(self.tmpdir)
61 def testFile(self):
62 file = os.path.join(self.tmpdir, "test.txt")
63 uri = ButlerURI(file)
64 self.assertFalse(uri.exists(), f"{uri} should not exist")
65 self.assertEqual(uri.ospath, file)
67 path = pathlib.Path(file)
68 uri = ButlerURI(path)
69 self.assertEqual(uri.ospath, file)
71 content = "abcdefghijklmnopqrstuv\n"
72 uri.write(content.encode())
73 self.assertTrue(os.path.exists(file), "File should exist locally")
74 self.assertTrue(uri.exists(), f"{uri} should now exist")
75 self.assertEqual(uri.read().decode(), content)
76 self.assertEqual(uri.size(), len(content.encode()))
78 with self.assertRaises(FileNotFoundError):
79 ButlerURI("file/not/there.txt").size()
81 # Check that creating a URI from a URI returns the same thing
82 uri2 = ButlerURI(uri)
83 self.assertEqual(uri, uri2)
84 self.assertEqual(id(uri), id(uri2))
86 with self.assertRaises(ValueError):
87 # Scheme-less URIs are not allowed to support non-file roots
88 # at the present time. This may change in the future to become
89 # equivalent to ButlerURI.join()
90 ButlerURI("a/b.txt", root=ButlerURI("s3://bucket/a/b/"))
92 def testExtension(self):
93 file = ButlerURI(os.path.join(self.tmpdir, "test.txt"))
94 self.assertEqual(file.updatedExtension(None), file)
95 self.assertEqual(file.updatedExtension(".txt"), file)
96 self.assertEqual(id(file.updatedExtension(".txt")), id(file))
98 fits = file.updatedExtension(".fits.gz")
99 self.assertEqual(fits.basename(), "test.fits.gz")
100 self.assertEqual(fits.updatedExtension(".jpeg").basename(), "test.jpeg")
102 def testRelative(self):
103 """Check that we can get subpaths back from two URIs"""
104 parent = ButlerURI(self.tmpdir, forceDirectory=True, forceAbsolute=True)
105 self.assertTrue(parent.isdir())
106 child = ButlerURI(os.path.join(self.tmpdir, "dir1", "file.txt"), forceAbsolute=True)
108 self.assertEqual(child.relative_to(parent), "dir1/file.txt")
110 not_child = ButlerURI("/a/b/dir1/file.txt")
111 self.assertIsNone(not_child.relative_to(parent))
112 self.assertFalse(not_child.isdir())
114 not_directory = ButlerURI(os.path.join(self.tmpdir, "dir1", "file2.txt"))
115 self.assertIsNone(child.relative_to(not_directory))
117 # Relative URIs
118 parent = ButlerURI("a/b/", forceAbsolute=False)
119 child = ButlerURI("a/b/c/d.txt", forceAbsolute=False)
120 self.assertFalse(child.scheme)
121 self.assertEqual(child.relative_to(parent), "c/d.txt")
123 # File URI and schemeless URI
124 parent = ButlerURI("file:/a/b/c/")
125 child = ButlerURI("e/f/g.txt", forceAbsolute=False)
127 # If the child is relative and the parent is absolute we assume
128 # that the child is a child of the parent unless it uses ".."
129 self.assertEqual(child.relative_to(parent), "e/f/g.txt")
131 child = ButlerURI("../e/f/g.txt", forceAbsolute=False)
132 self.assertIsNone(child.relative_to(parent))
134 child = ButlerURI("../c/e/f/g.txt", forceAbsolute=False)
135 self.assertEqual(child.relative_to(parent), "e/f/g.txt")
137 # Test non-file root with relative path.
138 child = ButlerURI("e/f/g.txt", forceAbsolute=False)
139 parent = ButlerURI("s3://hello/a/b/c/")
140 self.assertEqual(child.relative_to(parent), "e/f/g.txt")
142 # Test with different netloc
143 child = ButlerURI("http://my.host/a/b/c.txt")
144 parent = ButlerURI("http://other.host/a/")
145 self.assertIsNone(child.relative_to(parent), f"{child}.relative_to({parent})")
147 # Schemeless absolute child.
148 # Schemeless absolute URI is constructed using root= parameter.
149 parent = ButlerURI("file:///a/b/c/")
150 child = ButlerURI("d/e.txt", root=parent)
151 self.assertEqual(child.relative_to(parent), "d/e.txt", f"{child}.relative_to({parent})")
153 parent = ButlerURI("c/", root="/a/b/")
154 self.assertEqual(child.relative_to(parent), "d/e.txt", f"{child}.relative_to({parent})")
156 # Absolute schemeless child with relative parent will always fail.
157 parent = ButlerURI("d/e.txt", forceAbsolute=False)
158 self.assertIsNone(child.relative_to(parent), f"{child}.relative_to({parent})")
160 def testParents(self):
161 """Test of splitting and parent walking."""
162 parent = ButlerURI(self.tmpdir, forceDirectory=True, forceAbsolute=True)
163 child_file = parent.join("subdir/file.txt")
164 self.assertFalse(child_file.isdir())
165 child_subdir, file = child_file.split()
166 self.assertEqual(file, "file.txt")
167 self.assertTrue(child_subdir.isdir())
168 self.assertEqual(child_file.dirname(), child_subdir)
169 self.assertEqual(child_file.basename(), file)
170 self.assertEqual(child_file.parent(), child_subdir)
171 derived_parent = child_subdir.parent()
172 self.assertEqual(derived_parent, parent)
173 self.assertTrue(derived_parent.isdir())
174 self.assertEqual(child_file.parent().parent(), parent)
176 def testEnvVar(self):
177 """Test that environment variables are expanded."""
179 with unittest.mock.patch.dict(os.environ, {"MY_TEST_DIR": "/a/b/c"}):
180 uri = ButlerURI("${MY_TEST_DIR}/d.txt")
181 self.assertEqual(uri.path, "/a/b/c/d.txt")
182 self.assertEqual(uri.scheme, "file")
184 # This will not expand
185 uri = ButlerURI("${MY_TEST_DIR}/d.txt", forceAbsolute=False)
186 self.assertEqual(uri.path, "${MY_TEST_DIR}/d.txt")
187 self.assertFalse(uri.scheme)
189 def testMkdir(self):
190 tmpdir = ButlerURI(self.tmpdir)
191 newdir = tmpdir.join("newdir/seconddir")
192 newdir.mkdir()
193 self.assertTrue(newdir.exists())
194 newfile = newdir.join("temp.txt")
195 newfile.write("Data".encode())
196 self.assertTrue(newfile.exists())
198 def testTransfer(self):
199 src = ButlerURI(os.path.join(self.tmpdir, "test.txt"))
200 content = "Content is some content\nwith something to say\n\n"
201 src.write(content.encode())
203 for mode in ("copy", "link", "hardlink", "symlink", "relsymlink"):
204 dest = ButlerURI(os.path.join(self.tmpdir, f"dest_{mode}.txt"))
205 dest.transfer_from(src, transfer=mode)
206 self.assertTrue(dest.exists(), f"Check that {dest} exists (transfer={mode})")
208 with open(dest.ospath, "r") as fh:
209 new_content = fh.read()
210 self.assertEqual(new_content, content)
212 if mode in ("symlink", "relsymlink"):
213 self.assertTrue(os.path.islink(dest.ospath), f"Check that {dest} is symlink")
215 # If the source and destination are hardlinks of each other
216 # the transfer should work even if overwrite=False.
217 if mode in ("link", "hardlink"):
218 dest.transfer_from(src, transfer=mode)
219 else:
220 with self.assertRaises(FileExistsError,
221 msg=f"Overwrite of {dest} should not be allowed ({mode})"):
222 dest.transfer_from(src, transfer=mode)
224 dest.transfer_from(src, transfer=mode, overwrite=True)
226 os.remove(dest.ospath)
228 b = src.read()
229 self.assertEqual(b.decode(), new_content)
231 nbytes = 10
232 subset = src.read(size=nbytes)
233 self.assertEqual(len(subset), nbytes)
234 self.assertEqual(subset.decode(), content[:nbytes])
236 with self.assertRaises(ValueError):
237 src.transfer_from(src, transfer="unknown")
239 def testTransferIdentical(self):
240 """Test overwrite of identical files."""
241 dir1 = ButlerURI(os.path.join(self.tmpdir, "dir1"), forceDirectory=True)
242 dir1.mkdir()
243 dir2 = os.path.join(self.tmpdir, "dir2")
244 os.symlink(dir1.ospath, dir2)
246 # Write a test file.
247 src_file = dir1.join("test.txt")
248 content = "0123456"
249 src_file.write(content.encode())
251 # Construct URI to destination that should be identical.
252 dest_file = ButlerURI(os.path.join(dir2), forceDirectory=True).join("test.txt")
253 self.assertTrue(dest_file.exists())
254 self.assertNotEqual(src_file, dest_file)
256 # Transfer it over itself.
257 dest_file.transfer_from(src_file, transfer="symlink", overwrite=True)
258 new_content = dest_file.read().decode()
259 self.assertEqual(content, new_content)
261 def testResource(self):
262 u = ButlerURI("resource://lsst.daf.butler/configs/datastore.yaml")
263 self.assertTrue(u.exists(), f"Check {u} exists")
265 content = u.read().decode()
266 self.assertTrue(content.startswith("datastore:"))
268 truncated = u.read(size=9).decode()
269 self.assertEqual(truncated, "datastore")
271 d = ButlerURI("resource://lsst.daf.butler/configs", forceDirectory=True)
272 self.assertTrue(u.exists(), f"Check directory {d} exists")
274 j = d.join("datastore.yaml")
275 self.assertEqual(u, j)
276 self.assertFalse(j.dirLike)
277 self.assertFalse(j.isdir())
278 not_there = d.join("not-there.yaml")
279 self.assertFalse(not_there.exists())
281 bad = ButlerURI("resource://bad.module/not.yaml")
282 multi = ButlerURI.mexists([u, bad, not_there])
283 self.assertTrue(multi[u])
284 self.assertFalse(multi[bad])
285 self.assertFalse(multi[not_there])
287 def testEscapes(self):
288 """Special characters in file paths"""
289 src = ButlerURI("bbb/???/test.txt", root=self.tmpdir, forceAbsolute=True)
290 self.assertFalse(src.scheme)
291 src.write(b"Some content")
292 self.assertTrue(src.exists())
294 # abspath always returns a file scheme
295 file = src.abspath()
296 self.assertTrue(file.exists())
297 self.assertIn("???", file.ospath)
298 self.assertNotIn("???", file.path)
300 file = file.updatedFile("tests??.txt")
301 self.assertNotIn("??.txt", file.path)
302 file.write(b"Other content")
303 self.assertEqual(file.read(), b"Other content")
305 src = src.updatedFile("tests??.txt")
306 self.assertIn("??.txt", src.path)
307 self.assertEqual(file.read(), src.read(), f"reading from {file.ospath} and {src.ospath}")
309 # File URI and schemeless URI
310 parent = ButlerURI("file:" + urllib.parse.quote("/a/b/c/de/??/"))
311 child = ButlerURI("e/f/g.txt", forceAbsolute=False)
312 self.assertEqual(child.relative_to(parent), "e/f/g.txt")
314 child = ButlerURI("e/f??#/g.txt", forceAbsolute=False)
315 self.assertEqual(child.relative_to(parent), "e/f??#/g.txt")
317 child = ButlerURI("file:" + urllib.parse.quote("/a/b/c/de/??/e/f??#/g.txt"))
318 self.assertEqual(child.relative_to(parent), "e/f??#/g.txt")
320 self.assertEqual(child.relativeToPathRoot, "a/b/c/de/??/e/f??#/g.txt")
322 # Schemeless so should not quote
323 dir = ButlerURI("bbb/???/", root=self.tmpdir, forceAbsolute=True, forceDirectory=True)
324 self.assertIn("???", dir.ospath)
325 self.assertIn("???", dir.path)
326 self.assertFalse(dir.scheme)
328 # dir.join() morphs into a file scheme
329 new = dir.join("test_j.txt")
330 self.assertIn("???", new.ospath, f"Checking {new}")
331 new.write(b"Content")
333 new2name = "###/test??.txt"
334 new2 = dir.join(new2name)
335 self.assertIn("???", new2.ospath)
336 new2.write(b"Content")
337 self.assertTrue(new2.ospath.endswith(new2name))
338 self.assertEqual(new.read(), new2.read())
340 fdir = dir.abspath()
341 self.assertNotIn("???", fdir.path)
342 self.assertIn("???", fdir.ospath)
343 self.assertEqual(fdir.scheme, "file")
344 fnew = dir.join("test_jf.txt")
345 fnew.write(b"Content")
347 fnew2 = fdir.join(new2name)
348 fnew2.write(b"Content")
349 self.assertTrue(fnew2.ospath.endswith(new2name))
350 self.assertNotIn("###", fnew2.path)
352 self.assertEqual(fnew.read(), fnew2.read())
354 # Test that children relative to schemeless and file schemes
355 # still return the same unquoted name
356 self.assertEqual(fnew2.relative_to(fdir), new2name, f"{fnew2}.relative_to({fdir})")
357 self.assertEqual(fnew2.relative_to(dir), new2name, f"{fnew2}.relative_to({dir})")
358 self.assertEqual(new2.relative_to(fdir), new2name, f"{new2}.relative_to({fdir})")
359 self.assertEqual(new2.relative_to(dir), new2name, f"{new2}.relative_to({dir})")
361 # Check for double quoting
362 plus_path = "/a/b/c+d/"
363 with self.assertLogs(level="WARNING"):
364 uri = ButlerURI(urllib.parse.quote(plus_path), forceDirectory=True)
365 self.assertEqual(uri.ospath, plus_path)
367 # Check that # is not escaped for schemeless URIs
368 hash_path = "/a/b#/c&d#xyz"
369 hpos = hash_path.rfind("#")
370 uri = ButlerURI(hash_path)
371 self.assertEqual(uri.ospath, hash_path[:hpos])
372 self.assertEqual(uri.fragment, hash_path[hpos + 1:])
374 def testHash(self):
375 """Test that we can store URIs in sets and as keys."""
376 uri1 = ButlerURI(TESTDIR)
377 uri2 = uri1.join("test/")
378 s = {uri1, uri2}
379 self.assertIn(uri1, s)
381 d = {uri1: "1", uri2: "2"}
382 self.assertEqual(d[uri2], "2")
384 def testWalk(self):
385 """Test ButlerURI.walk()."""
386 test_dir_uri = ButlerURI(TESTDIR)
388 file = test_dir_uri.join("config/basic/butler.yaml")
389 found = list(ButlerURI.findFileResources([file]))
390 self.assertEqual(found[0], file)
392 # Compare against the full local paths
393 expected = set(p for p in glob.glob(os.path.join(TESTDIR, "config", "**"), recursive=True)
394 if os.path.isfile(p))
395 found = set(u.ospath for u in ButlerURI.findFileResources([test_dir_uri.join("config")]))
396 self.assertEqual(found, expected)
398 # Now solely the YAML files
399 expected_yaml = set(glob.glob(os.path.join(TESTDIR, "config", "**", "*.yaml"), recursive=True))
400 found = set(u.ospath for u in ButlerURI.findFileResources([test_dir_uri.join("config")],
401 file_filter=r".*\.yaml$"))
402 self.assertEqual(found, expected_yaml)
404 # Now two explicit directories and a file
405 expected = set(glob.glob(os.path.join(TESTDIR, "config", "**", "basic", "*.yaml"), recursive=True))
406 expected.update(set(glob.glob(os.path.join(TESTDIR, "config", "**", "templates", "*.yaml"),
407 recursive=True)))
408 expected.add(file.ospath)
410 found = set(u.ospath for u in ButlerURI.findFileResources([file, test_dir_uri.join("config/basic"),
411 test_dir_uri.join("config/templates")],
412 file_filter=r".*\.yaml$"))
413 self.assertEqual(found, expected)
415 # Group by directory -- find everything and compare it with what
416 # we expected to be there in total. We expect to find 9 directories
417 # containing yaml files so make sure we only iterate 9 times.
418 found_yaml = set()
419 counter = 0
420 for uris in ButlerURI.findFileResources([file, test_dir_uri.join("config/")],
421 file_filter=r".*\.yaml$", grouped=True):
422 found = set(u.ospath for u in uris)
423 if found:
424 counter += 1
426 found_yaml.update(found)
428 self.assertEqual(found_yaml, expected_yaml)
429 self.assertEqual(counter, 9)
431 # Grouping but check that single files are returned in a single group
432 # at the end
433 file2 = test_dir_uri.join("config/templates/templates-bad.yaml")
434 found = list(ButlerURI.findFileResources([file, file2, test_dir_uri.join("config/dbAuth")],
435 grouped=True))
436 self.assertEqual(len(found), 2)
437 self.assertEqual(list(found[1]), [file, file2])
439 with self.assertRaises(ValueError):
440 list(file.walk())
442 def testRootURI(self):
443 """Test ButlerURI.root_uri()."""
444 uri = ButlerURI("https://www.notexist.com:8080/file/test")
445 uri2 = ButlerURI("s3://www.notexist.com/file/test")
446 self.assertEqual(uri.root_uri().geturl(), "https://www.notexist.com:8080/")
447 self.assertEqual(uri2.root_uri().geturl(), "s3://www.notexist.com/")
449 def testJoin(self):
450 """Test .join method."""
452 root_str = "s3://bucket/hsc/payload/"
453 root = ButlerURI(root_str)
455 self.assertEqual(root.join("b/test.txt").geturl(), f"{root_str}b/test.txt")
456 add_dir = root.join("b/c/d/")
457 self.assertTrue(add_dir.isdir())
458 self.assertEqual(add_dir.geturl(), f"{root_str}b/c/d/")
460 quote_example = "b&c.t@x#t"
461 needs_quote = root.join(quote_example)
462 self.assertEqual(needs_quote.unquoted_path, f"/hsc/payload/{quote_example}")
464 other = ButlerURI("file://localhost/test.txt")
465 self.assertEqual(root.join(other), other)
466 self.assertEqual(other.join("b/new.txt").geturl(), "file://localhost/b/new.txt")
468 joined = ButlerURI("s3://bucket/hsc/payload/").join(ButlerURI("test.qgraph", forceAbsolute=False))
469 self.assertEqual(joined, ButlerURI("s3://bucket/hsc/payload/test.qgraph"))
471 with self.assertRaises(ValueError):
472 ButlerURI("s3://bucket/hsc/payload/").join(ButlerURI("test.qgraph"))
474 def testTemporary(self):
475 with ButlerURI.temporary_uri(suffix=".json") as tmp:
476 self.assertEqual(tmp.getExtension(), ".json", f"uri: {tmp}")
477 self.assertTrue(tmp.isabs(), f"uri: {tmp}")
478 self.assertFalse(tmp.exists(), f"uri: {tmp}")
479 tmp.write(b"abcd")
480 self.assertTrue(tmp.exists(), f"uri: {tmp}")
481 self.assertTrue(tmp.isTemporary)
482 self.assertFalse(tmp.exists(), f"uri: {tmp}")
484 tmpdir = ButlerURI(self.tmpdir, forceDirectory=True)
485 with ButlerURI.temporary_uri(prefix=tmpdir, suffix=".yaml") as tmp:
486 # Use a specified tmpdir and check it is okay for the file
487 # to not be created.
488 self.assertFalse(tmp.exists(), f"uri: {tmp}")
489 self.assertTrue(tmpdir.exists(), f"uri: {tmpdir} still exists")
492@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!")
493@mock_s3
494class S3URITestCase(unittest.TestCase):
495 """Tests involving S3"""
497 bucketName = "any_bucket"
498 """Bucket name to use in tests"""
500 def setUp(self):
501 # Local test directory
502 self.tmpdir = makeTestTempDir(TESTDIR)
504 # set up some fake credentials if they do not exist
505 self.usingDummyCredentials = setAwsEnvCredentials()
507 # MOTO needs to know that we expect Bucket bucketname to exist
508 s3 = boto3.resource("s3")
509 s3.create_bucket(Bucket=self.bucketName)
511 def tearDown(self):
512 s3 = boto3.resource("s3")
513 bucket = s3.Bucket(self.bucketName)
514 try:
515 bucket.objects.all().delete()
516 except botocore.exceptions.ClientError as e:
517 if e.response["Error"]["Code"] == "404":
518 # the key was not reachable - pass
519 pass
520 else:
521 raise
523 bucket = s3.Bucket(self.bucketName)
524 bucket.delete()
526 # unset any potentially set dummy credentials
527 if self.usingDummyCredentials:
528 unsetAwsEnvCredentials()
530 shutil.rmtree(self.tmpdir, ignore_errors=True)
532 def makeS3Uri(self, path):
533 return f"s3://{self.bucketName}/{path}"
535 def testTransfer(self):
536 src = ButlerURI(os.path.join(self.tmpdir, "test.txt"))
537 content = "Content is some content\nwith something to say\n\n"
538 src.write(content.encode())
539 self.assertTrue(src.exists())
540 self.assertEqual(src.size(), len(content.encode()))
542 dest = ButlerURI(self.makeS3Uri("test.txt"))
543 self.assertFalse(dest.exists())
545 with self.assertRaises(FileNotFoundError):
546 dest.size()
548 dest.transfer_from(src, transfer="copy")
549 self.assertTrue(dest.exists())
551 dest2 = ButlerURI(self.makeS3Uri("copied.txt"))
552 dest2.transfer_from(dest, transfer="copy")
553 self.assertTrue(dest2.exists())
555 local = ButlerURI(os.path.join(self.tmpdir, "copied.txt"))
556 local.transfer_from(dest2, transfer="copy")
557 with open(local.ospath, "r") as fd:
558 new_content = fd.read()
559 self.assertEqual(new_content, content)
561 with self.assertRaises(ValueError):
562 dest2.transfer_from(local, transfer="symlink")
564 b = dest.read()
565 self.assertEqual(b.decode(), new_content)
567 nbytes = 10
568 subset = dest.read(size=nbytes)
569 self.assertEqual(len(subset), nbytes) # Extra byte comes back
570 self.assertEqual(subset.decode(), content[:nbytes])
572 with self.assertRaises(FileExistsError):
573 dest.transfer_from(src, transfer="copy")
575 dest.transfer_from(src, transfer="copy", overwrite=True)
577 def testWalk(self):
578 """Test that we can list an S3 bucket"""
579 # Files we want to create
580 expected = ("a/x.txt", "a/y.txt", "a/z.json", "a/b/w.txt", "a/b/c/d/v.json")
581 expected_uris = [ButlerURI(self.makeS3Uri(path)) for path in expected]
582 for uri in expected_uris:
583 # Doesn't matter what we write
584 uri.write("123".encode())
586 # Find all the files in the a/ tree
587 found = set(uri.path for uri in ButlerURI.findFileResources([ButlerURI(self.makeS3Uri("a/"))]))
588 self.assertEqual(found, {uri.path for uri in expected_uris})
590 # Find all the files in the a/ tree but group by folder
591 found = ButlerURI.findFileResources([ButlerURI(self.makeS3Uri("a/"))],
592 grouped=True)
593 expected = (("/a/x.txt", "/a/y.txt", "/a/z.json"), ("/a/b/w.txt",), ("/a/b/c/d/v.json",))
595 for got, expect in zip(found, expected):
596 self.assertEqual(tuple(u.path for u in got), expect)
598 # Find only JSON files
599 found = set(uri.path for uri in ButlerURI.findFileResources([ButlerURI(self.makeS3Uri("a/"))],
600 file_filter=r"\.json$"))
601 self.assertEqual(found, {uri.path for uri in expected_uris if uri.path.endswith(".json")})
603 # JSON files grouped by directory
604 found = ButlerURI.findFileResources([ButlerURI(self.makeS3Uri("a/"))],
605 file_filter=r"\.json$", grouped=True)
606 expected = (("/a/z.json",), ("/a/b/c/d/v.json",))
608 for got, expect in zip(found, expected):
609 self.assertEqual(tuple(u.path for u in got), expect)
611 # Check pagination works with large numbers of files. S3 API limits
612 # us to 1000 response per list_objects call so create lots of files
613 created = set()
614 counter = 1
615 n_dir1 = 1100
616 while counter <= n_dir1:
617 new = ButlerURI(self.makeS3Uri(f"test/file{counter:04d}.txt"))
618 new.write(f"{counter}".encode())
619 created.add(str(new))
620 counter += 1
621 counter = 1
622 # Put some in a subdirectory to make sure we are looking in a
623 # hierarchy.
624 n_dir2 = 100
625 while counter <= n_dir2:
626 new = ButlerURI(self.makeS3Uri(f"test/subdir/file{counter:04d}.txt"))
627 new.write(f"{counter}".encode())
628 created.add(str(new))
629 counter += 1
631 found = ButlerURI.findFileResources([ButlerURI(self.makeS3Uri("test/"))])
632 self.assertEqual({str(u) for u in found}, created)
634 # Again with grouping.
635 found = list(ButlerURI.findFileResources([ButlerURI(self.makeS3Uri("test/"))], grouped=True))
636 self.assertEqual(len(found), 2)
637 dir_1 = list(found[0])
638 dir_2 = list(found[1])
639 self.assertEqual(len(dir_1), n_dir1)
640 self.assertEqual(len(dir_2), n_dir2)
642 def testWrite(self):
643 s3write = ButlerURI(self.makeS3Uri("created.txt"))
644 content = "abcdefghijklmnopqrstuv\n"
645 s3write.write(content.encode())
646 self.assertEqual(s3write.read().decode(), content)
648 def testTemporary(self):
649 s3root = ButlerURI(self.makeS3Uri("rootdir"), forceDirectory=True)
650 with ButlerURI.temporary_uri(prefix=s3root, suffix=".json") as tmp:
651 self.assertEqual(tmp.getExtension(), ".json", f"uri: {tmp}")
652 self.assertEqual(tmp.scheme, "s3", f"uri: {tmp}")
653 self.assertEqual(tmp.parent(), s3root)
654 basename = tmp.basename()
655 content = "abcd"
656 tmp.write(content.encode())
657 self.assertTrue(tmp.exists(), f"uri: {tmp}")
658 self.assertFalse(tmp.exists())
660 # Again without writing anything, to check that there is no complaint
661 # on exit of context manager.
662 with ButlerURI.temporary_uri(prefix=s3root, suffix=".json") as tmp:
663 self.assertFalse(tmp.exists())
664 # Check that the file has a different name than before.
665 self.assertNotEqual(tmp.basename(), basename, f"uri: {tmp}")
666 self.assertFalse(tmp.exists())
668 def testRelative(self):
669 """Check that we can get subpaths back from two URIs"""
670 parent = ButlerURI(self.makeS3Uri("rootdir"), forceDirectory=True)
671 child = ButlerURI(self.makeS3Uri("rootdir/dir1/file.txt"))
673 self.assertEqual(child.relative_to(parent), "dir1/file.txt")
675 not_child = ButlerURI(self.makeS3Uri("/a/b/dir1/file.txt"))
676 self.assertFalse(not_child.relative_to(parent))
678 not_s3 = ButlerURI(os.path.join(self.tmpdir, "dir1", "file2.txt"))
679 self.assertFalse(child.relative_to(not_s3))
681 def testQuoting(self):
682 """Check that quoting works."""
683 parent = ButlerURI(self.makeS3Uri("rootdir"), forceDirectory=True)
684 subpath = "rootdir/dir1+/file?.txt"
685 child = ButlerURI(self.makeS3Uri(urllib.parse.quote(subpath)))
687 self.assertEqual(child.relative_to(parent), "dir1+/file?.txt")
688 self.assertEqual(child.basename(), "file?.txt")
689 self.assertEqual(child.relativeToPathRoot, subpath)
690 self.assertIn("%", child.path)
691 self.assertEqual(child.unquoted_path, "/" + subpath)
694# Mock required environment variables during tests
695@unittest.mock.patch.dict(os.environ, {"LSST_BUTLER_WEBDAV_AUTH": "TOKEN",
696 "LSST_BUTLER_WEBDAV_TOKEN_FILE": os.path.join(
697 TESTDIR, "config/testConfigs/webdav/token"),
698 "LSST_BUTLER_WEBDAV_CA_BUNDLE": "/path/to/ca/certs"})
699class WebdavURITestCase(unittest.TestCase):
701 def setUp(self):
702 serverRoot = "www.not-exists.orgx"
703 existingFolderName = "existingFolder"
704 existingFileName = "existingFile"
705 notExistingFileName = "notExistingFile"
707 self.baseURL = ButlerURI(
708 f"https://{serverRoot}", forceDirectory=True)
709 self.existingFileButlerURI = ButlerURI(
710 f"https://{serverRoot}/{existingFolderName}/{existingFileName}")
711 self.notExistingFileButlerURI = ButlerURI(
712 f"https://{serverRoot}/{existingFolderName}/{notExistingFileName}")
713 self.existingFolderButlerURI = ButlerURI(
714 f"https://{serverRoot}/{existingFolderName}", forceDirectory=True)
715 self.notExistingFolderButlerURI = ButlerURI(
716 f"https://{serverRoot}/{notExistingFileName}", forceDirectory=True)
718 # Need to declare the options
719 responses.add(responses.OPTIONS,
720 self.baseURL.geturl(),
721 status=200, headers={"DAV": "1,2,3"})
723 # Used by ButlerHttpURI.exists()
724 responses.add(responses.HEAD,
725 self.existingFileButlerURI.geturl(),
726 status=200, headers={'Content-Length': '1024'})
727 responses.add(responses.HEAD,
728 self.notExistingFileButlerURI.geturl(),
729 status=404)
731 # Used by ButlerHttpURI.read()
732 responses.add(responses.GET,
733 self.existingFileButlerURI.geturl(),
734 status=200,
735 body=str.encode("It works!"))
736 responses.add(responses.GET,
737 self.notExistingFileButlerURI.geturl(),
738 status=404)
740 # Used by ButlerHttpURI.write()
741 responses.add(responses.PUT,
742 self.existingFileButlerURI.geturl(),
743 status=201)
745 # Used by ButlerHttpURI.transfer_from()
746 responses.add(responses.Response(url=self.existingFileButlerURI.geturl(),
747 method="COPY",
748 headers={"Destination": self.existingFileButlerURI.geturl()},
749 status=201))
750 responses.add(responses.Response(url=self.existingFileButlerURI.geturl(),
751 method="COPY",
752 headers={"Destination": self.notExistingFileButlerURI.geturl()},
753 status=201))
754 responses.add(responses.Response(url=self.existingFileButlerURI.geturl(),
755 method="MOVE",
756 headers={"Destination": self.notExistingFileButlerURI.geturl()},
757 status=201))
759 # Used by ButlerHttpURI.remove()
760 responses.add(responses.DELETE,
761 self.existingFileButlerURI.geturl(),
762 status=200)
763 responses.add(responses.DELETE,
764 self.notExistingFileButlerURI.geturl(),
765 status=404)
767 # Used by ButlerHttpURI.mkdir()
768 responses.add(responses.HEAD,
769 self.existingFolderButlerURI.geturl(),
770 status=200, headers={'Content-Length': '1024'})
771 responses.add(responses.HEAD,
772 self.baseURL.geturl(),
773 status=200, headers={'Content-Length': '1024'})
774 responses.add(responses.HEAD,
775 self.notExistingFolderButlerURI.geturl(),
776 status=404)
777 responses.add(responses.Response(url=self.notExistingFolderButlerURI.geturl(),
778 method="MKCOL",
779 status=201))
780 responses.add(responses.Response(url=self.existingFolderButlerURI.geturl(),
781 method="MKCOL",
782 status=403))
784 @responses.activate
785 def testExists(self):
787 self.assertTrue(self.existingFileButlerURI.exists())
788 self.assertFalse(self.notExistingFileButlerURI.exists())
790 self.assertEqual(self.existingFileButlerURI.size(), 1024)
791 with self.assertRaises(FileNotFoundError):
792 self.notExistingFileButlerURI.size()
794 @responses.activate
795 def testRemove(self):
797 self.assertIsNone(self.existingFileButlerURI.remove())
798 with self.assertRaises(FileNotFoundError):
799 self.notExistingFileButlerURI.remove()
801 @responses.activate
802 def testMkdir(self):
804 # The mock means that we can't check this now exists
805 self.notExistingFolderButlerURI.mkdir()
807 # This should do nothing
808 self.existingFolderButlerURI.mkdir()
810 with self.assertRaises(ValueError):
811 self.notExistingFileButlerURI.mkdir()
813 @responses.activate
814 def testRead(self):
816 self.assertEqual(self.existingFileButlerURI.read().decode(), "It works!")
817 self.assertNotEqual(self.existingFileButlerURI.read().decode(), "Nope.")
818 with self.assertRaises(FileNotFoundError):
819 self.notExistingFileButlerURI.read()
821 @responses.activate
822 def testWrite(self):
824 self.assertIsNone(self.existingFileButlerURI.write(data=str.encode("Some content.")))
825 with self.assertRaises(FileExistsError):
826 self.existingFileButlerURI.write(data=str.encode("Some content."), overwrite=False)
828 @responses.activate
829 def testTransfer(self):
831 self.assertIsNone(self.notExistingFileButlerURI.transfer_from(
832 src=self.existingFileButlerURI))
833 self.assertIsNone(self.notExistingFileButlerURI.transfer_from(
834 src=self.existingFileButlerURI,
835 transfer="move"))
836 with self.assertRaises(FileExistsError):
837 self.existingFileButlerURI.transfer_from(src=self.existingFileButlerURI)
838 with self.assertRaises(ValueError):
839 self.notExistingFileButlerURI.transfer_from(
840 src=self.existingFileButlerURI,
841 transfer="unsupported")
843 def testParent(self):
845 self.assertEqual(self.existingFolderButlerURI.geturl(),
846 self.notExistingFileButlerURI.parent().geturl())
847 self.assertEqual(self.baseURL.geturl(),
848 self.baseURL.parent().geturl())
849 self.assertEqual(self.existingFileButlerURI.parent().geturl(),
850 self.existingFileButlerURI.dirname().geturl())
853if __name__ == "__main__": 853 ↛ 854line 853 didn't jump to line 854, because the condition on line 853 was never true
854 unittest.main()