Coverage for tests/test_uri.py: 12%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22import glob
23import os
24import pathlib
25import shutil
26import unittest
27import urllib.parse
29import responses
31try:
32 import boto3
33 import botocore
34 from moto import mock_s3
35except ImportError:
36 boto3 = None
38 def mock_s3(cls):
39 """A no-op decorator in case moto mock_s3 can not be imported."""
40 return cls
43from lsst.daf.butler import ButlerURI
44from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir
45from lsst.resources.s3utils import setAwsEnvCredentials, unsetAwsEnvCredentials
47TESTDIR = os.path.abspath(os.path.dirname(__file__))
50class FileURITestCase(unittest.TestCase):
51 """Concrete tests for local files"""
53 def setUp(self):
54 # Use a local tempdir because on macOS the temp dirs use symlinks
55 # so relsymlink gets quite confused.
56 self.tmpdir = makeTestTempDir(TESTDIR)
58 def tearDown(self):
59 removeTestTempDir(self.tmpdir)
61 def testFile(self):
62 file = os.path.join(self.tmpdir, "test.txt")
63 uri = ButlerURI(file)
64 self.assertFalse(uri.exists(), f"{uri} should not exist")
65 self.assertEqual(uri.ospath, file)
67 path = pathlib.Path(file)
68 uri = ButlerURI(path)
69 self.assertEqual(uri.ospath, file)
71 content = "abcdefghijklmnopqrstuv\n"
72 uri.write(content.encode())
73 self.assertTrue(os.path.exists(file), "File should exist locally")
74 self.assertTrue(uri.exists(), f"{uri} should now exist")
75 self.assertEqual(uri.read().decode(), content)
76 self.assertEqual(uri.size(), len(content.encode()))
78 with self.assertRaises(FileNotFoundError):
79 ButlerURI("file/not/there.txt").size()
81 # Check that creating a URI from a URI returns the same thing
82 uri2 = ButlerURI(uri)
83 self.assertEqual(uri, uri2)
84 self.assertEqual(id(uri), id(uri2))
86 with self.assertRaises(ValueError):
87 # Scheme-less URIs are not allowed to support non-file roots
88 # at the present time. This may change in the future to become
89 # equivalent to ButlerURI.join()
90 ButlerURI("a/b.txt", root=ButlerURI("s3://bucket/a/b/"))
92 def testExtension(self):
93 file = ButlerURI(os.path.join(self.tmpdir, "test.txt"))
94 self.assertEqual(file.updatedExtension(None), file)
95 self.assertEqual(file.updatedExtension(".txt"), file)
96 self.assertEqual(id(file.updatedExtension(".txt")), id(file))
98 fits = file.updatedExtension(".fits.gz")
99 self.assertEqual(fits.basename(), "test.fits.gz")
100 self.assertEqual(fits.updatedExtension(".jpeg").basename(), "test.jpeg")
102 def testRelative(self):
103 """Check that we can get subpaths back from two URIs"""
104 parent = ButlerURI(self.tmpdir, forceDirectory=True, forceAbsolute=True)
105 self.assertTrue(parent.isdir())
106 child = ButlerURI(os.path.join(self.tmpdir, "dir1", "file.txt"), forceAbsolute=True)
108 self.assertEqual(child.relative_to(parent), "dir1/file.txt")
110 not_child = ButlerURI("/a/b/dir1/file.txt")
111 self.assertIsNone(not_child.relative_to(parent))
112 self.assertFalse(not_child.isdir())
114 not_directory = ButlerURI(os.path.join(self.tmpdir, "dir1", "file2.txt"))
115 self.assertIsNone(child.relative_to(not_directory))
117 # Relative URIs
118 parent = ButlerURI("a/b/", forceAbsolute=False)
119 child = ButlerURI("a/b/c/d.txt", forceAbsolute=False)
120 self.assertFalse(child.scheme)
121 self.assertEqual(child.relative_to(parent), "c/d.txt")
123 # File URI and schemeless URI
124 parent = ButlerURI("file:/a/b/c/")
125 child = ButlerURI("e/f/g.txt", forceAbsolute=False)
127 # If the child is relative and the parent is absolute we assume
128 # that the child is a child of the parent unless it uses ".."
129 self.assertEqual(child.relative_to(parent), "e/f/g.txt")
131 child = ButlerURI("../e/f/g.txt", forceAbsolute=False)
132 self.assertIsNone(child.relative_to(parent))
134 child = ButlerURI("../c/e/f/g.txt", forceAbsolute=False)
135 self.assertEqual(child.relative_to(parent), "e/f/g.txt")
137 # Test non-file root with relative path.
138 child = ButlerURI("e/f/g.txt", forceAbsolute=False)
139 parent = ButlerURI("s3://hello/a/b/c/")
140 self.assertEqual(child.relative_to(parent), "e/f/g.txt")
142 # Test with different netloc
143 child = ButlerURI("http://my.host/a/b/c.txt")
144 parent = ButlerURI("http://other.host/a/")
145 self.assertIsNone(child.relative_to(parent), f"{child}.relative_to({parent})")
147 # Schemeless absolute child.
148 # Schemeless absolute URI is constructed using root= parameter.
149 parent = ButlerURI("file:///a/b/c/")
150 child = ButlerURI("d/e.txt", root=parent)
151 self.assertEqual(child.relative_to(parent), "d/e.txt", f"{child}.relative_to({parent})")
153 parent = ButlerURI("c/", root="/a/b/")
154 self.assertEqual(child.relative_to(parent), "d/e.txt", f"{child}.relative_to({parent})")
156 # Absolute schemeless child with relative parent will always fail.
157 parent = ButlerURI("d/e.txt", forceAbsolute=False)
158 self.assertIsNone(child.relative_to(parent), f"{child}.relative_to({parent})")
160 def testParents(self):
161 """Test of splitting and parent walking."""
162 parent = ButlerURI(self.tmpdir, forceDirectory=True, forceAbsolute=True)
163 child_file = parent.join("subdir/file.txt")
164 self.assertFalse(child_file.isdir())
165 child_subdir, file = child_file.split()
166 self.assertEqual(file, "file.txt")
167 self.assertTrue(child_subdir.isdir())
168 self.assertEqual(child_file.dirname(), child_subdir)
169 self.assertEqual(child_file.basename(), file)
170 self.assertEqual(child_file.parent(), child_subdir)
171 derived_parent = child_subdir.parent()
172 self.assertEqual(derived_parent, parent)
173 self.assertTrue(derived_parent.isdir())
174 self.assertEqual(child_file.parent().parent(), parent)
176 def testEnvVar(self):
177 """Test that environment variables are expanded."""
179 with unittest.mock.patch.dict(os.environ, {"MY_TEST_DIR": "/a/b/c"}):
180 uri = ButlerURI("${MY_TEST_DIR}/d.txt")
181 self.assertEqual(uri.path, "/a/b/c/d.txt")
182 self.assertEqual(uri.scheme, "file")
184 # This will not expand
185 uri = ButlerURI("${MY_TEST_DIR}/d.txt", forceAbsolute=False)
186 self.assertEqual(uri.path, "${MY_TEST_DIR}/d.txt")
187 self.assertFalse(uri.scheme)
189 def testMkdir(self):
190 tmpdir = ButlerURI(self.tmpdir)
191 newdir = tmpdir.join("newdir/seconddir")
192 newdir.mkdir()
193 self.assertTrue(newdir.exists())
194 newfile = newdir.join("temp.txt")
195 newfile.write("Data".encode())
196 self.assertTrue(newfile.exists())
198 def testTransfer(self):
199 src = ButlerURI(os.path.join(self.tmpdir, "test.txt"))
200 content = "Content is some content\nwith something to say\n\n"
201 src.write(content.encode())
203 for mode in ("copy", "link", "hardlink", "symlink", "relsymlink"):
204 dest = ButlerURI(os.path.join(self.tmpdir, f"dest_{mode}.txt"))
205 dest.transfer_from(src, transfer=mode)
206 self.assertTrue(dest.exists(), f"Check that {dest} exists (transfer={mode})")
208 with open(dest.ospath, "r") as fh:
209 new_content = fh.read()
210 self.assertEqual(new_content, content)
212 if mode in ("symlink", "relsymlink"):
213 self.assertTrue(os.path.islink(dest.ospath), f"Check that {dest} is symlink")
215 # If the source and destination are hardlinks of each other
216 # the transfer should work even if overwrite=False.
217 if mode in ("link", "hardlink"):
218 dest.transfer_from(src, transfer=mode)
219 else:
220 with self.assertRaises(
221 FileExistsError, msg=f"Overwrite of {dest} should not be allowed ({mode})"
222 ):
223 dest.transfer_from(src, transfer=mode)
225 dest.transfer_from(src, transfer=mode, overwrite=True)
227 os.remove(dest.ospath)
229 b = src.read()
230 self.assertEqual(b.decode(), new_content)
232 nbytes = 10
233 subset = src.read(size=nbytes)
234 self.assertEqual(len(subset), nbytes)
235 self.assertEqual(subset.decode(), content[:nbytes])
237 with self.assertRaises(ValueError):
238 src.transfer_from(src, transfer="unknown")
240 def testTransferIdentical(self):
241 """Test overwrite of identical files."""
242 dir1 = ButlerURI(os.path.join(self.tmpdir, "dir1"), forceDirectory=True)
243 dir1.mkdir()
244 dir2 = os.path.join(self.tmpdir, "dir2")
245 os.symlink(dir1.ospath, dir2)
247 # Write a test file.
248 src_file = dir1.join("test.txt")
249 content = "0123456"
250 src_file.write(content.encode())
252 # Construct URI to destination that should be identical.
253 dest_file = ButlerURI(os.path.join(dir2), forceDirectory=True).join("test.txt")
254 self.assertTrue(dest_file.exists())
255 self.assertNotEqual(src_file, dest_file)
257 # Transfer it over itself.
258 dest_file.transfer_from(src_file, transfer="symlink", overwrite=True)
259 new_content = dest_file.read().decode()
260 self.assertEqual(content, new_content)
262 def testResource(self):
263 u = ButlerURI("resource://lsst.daf.butler/configs/datastore.yaml")
264 self.assertTrue(u.exists(), f"Check {u} exists")
266 content = u.read().decode()
267 self.assertTrue(content.startswith("datastore:"))
269 truncated = u.read(size=9).decode()
270 self.assertEqual(truncated, "datastore")
272 d = ButlerURI("resource://lsst.daf.butler/configs", forceDirectory=True)
273 self.assertTrue(u.exists(), f"Check directory {d} exists")
275 j = d.join("datastore.yaml")
276 self.assertEqual(u, j)
277 self.assertFalse(j.dirLike)
278 self.assertFalse(j.isdir())
279 not_there = d.join("not-there.yaml")
280 self.assertFalse(not_there.exists())
282 bad = ButlerURI("resource://bad.module/not.yaml")
283 multi = ButlerURI.mexists([u, bad, not_there])
284 self.assertTrue(multi[u])
285 self.assertFalse(multi[bad])
286 self.assertFalse(multi[not_there])
288 def testEscapes(self):
289 """Special characters in file paths"""
290 src = ButlerURI("bbb/???/test.txt", root=self.tmpdir, forceAbsolute=True)
291 self.assertFalse(src.scheme)
292 src.write(b"Some content")
293 self.assertTrue(src.exists())
295 # abspath always returns a file scheme
296 file = src.abspath()
297 self.assertTrue(file.exists())
298 self.assertIn("???", file.ospath)
299 self.assertNotIn("???", file.path)
301 file = file.updatedFile("tests??.txt")
302 self.assertNotIn("??.txt", file.path)
303 file.write(b"Other content")
304 self.assertEqual(file.read(), b"Other content")
306 src = src.updatedFile("tests??.txt")
307 self.assertIn("??.txt", src.path)
308 self.assertEqual(file.read(), src.read(), f"reading from {file.ospath} and {src.ospath}")
310 # File URI and schemeless URI
311 parent = ButlerURI("file:" + urllib.parse.quote("/a/b/c/de/??/"))
312 child = ButlerURI("e/f/g.txt", forceAbsolute=False)
313 self.assertEqual(child.relative_to(parent), "e/f/g.txt")
315 child = ButlerURI("e/f??#/g.txt", forceAbsolute=False)
316 self.assertEqual(child.relative_to(parent), "e/f??#/g.txt")
318 child = ButlerURI("file:" + urllib.parse.quote("/a/b/c/de/??/e/f??#/g.txt"))
319 self.assertEqual(child.relative_to(parent), "e/f??#/g.txt")
321 self.assertEqual(child.relativeToPathRoot, "a/b/c/de/??/e/f??#/g.txt")
323 # Schemeless so should not quote
324 dir = ButlerURI("bbb/???/", root=self.tmpdir, forceAbsolute=True, forceDirectory=True)
325 self.assertIn("???", dir.ospath)
326 self.assertIn("???", dir.path)
327 self.assertFalse(dir.scheme)
329 # dir.join() morphs into a file scheme
330 new = dir.join("test_j.txt")
331 self.assertIn("???", new.ospath, f"Checking {new}")
332 new.write(b"Content")
334 new2name = "###/test??.txt"
335 new2 = dir.join(new2name)
336 self.assertIn("???", new2.ospath)
337 new2.write(b"Content")
338 self.assertTrue(new2.ospath.endswith(new2name))
339 self.assertEqual(new.read(), new2.read())
341 fdir = dir.abspath()
342 self.assertNotIn("???", fdir.path)
343 self.assertIn("???", fdir.ospath)
344 self.assertEqual(fdir.scheme, "file")
345 fnew = dir.join("test_jf.txt")
346 fnew.write(b"Content")
348 fnew2 = fdir.join(new2name)
349 fnew2.write(b"Content")
350 self.assertTrue(fnew2.ospath.endswith(new2name))
351 self.assertNotIn("###", fnew2.path)
353 self.assertEqual(fnew.read(), fnew2.read())
355 # Test that children relative to schemeless and file schemes
356 # still return the same unquoted name
357 self.assertEqual(fnew2.relative_to(fdir), new2name, f"{fnew2}.relative_to({fdir})")
358 self.assertEqual(fnew2.relative_to(dir), new2name, f"{fnew2}.relative_to({dir})")
359 self.assertEqual(new2.relative_to(fdir), new2name, f"{new2}.relative_to({fdir})")
360 self.assertEqual(new2.relative_to(dir), new2name, f"{new2}.relative_to({dir})")
362 # Check for double quoting
363 plus_path = "/a/b/c+d/"
364 with self.assertLogs(level="WARNING"):
365 uri = ButlerURI(urllib.parse.quote(plus_path), forceDirectory=True)
366 self.assertEqual(uri.ospath, plus_path)
368 # Check that # is not escaped for schemeless URIs
369 hash_path = "/a/b#/c&d#xyz"
370 hpos = hash_path.rfind("#")
371 uri = ButlerURI(hash_path)
372 self.assertEqual(uri.ospath, hash_path[:hpos])
373 self.assertEqual(uri.fragment, hash_path[hpos + 1 :])
375 def testHash(self):
376 """Test that we can store URIs in sets and as keys."""
377 uri1 = ButlerURI(TESTDIR)
378 uri2 = uri1.join("test/")
379 s = {uri1, uri2}
380 self.assertIn(uri1, s)
382 d = {uri1: "1", uri2: "2"}
383 self.assertEqual(d[uri2], "2")
385 def testWalk(self):
386 """Test ButlerURI.walk()."""
387 test_dir_uri = ButlerURI(TESTDIR)
389 file = test_dir_uri.join("config/basic/butler.yaml")
390 found = list(ButlerURI.findFileResources([file]))
391 self.assertEqual(found[0], file)
393 # Compare against the full local paths
394 expected = set(
395 p for p in glob.glob(os.path.join(TESTDIR, "config", "**"), recursive=True) if os.path.isfile(p)
396 )
397 found = set(u.ospath for u in ButlerURI.findFileResources([test_dir_uri.join("config")]))
398 self.assertEqual(found, expected)
400 # Now solely the YAML files
401 expected_yaml = set(glob.glob(os.path.join(TESTDIR, "config", "**", "*.yaml"), recursive=True))
402 found = set(
403 u.ospath
404 for u in ButlerURI.findFileResources([test_dir_uri.join("config")], file_filter=r".*\.yaml$")
405 )
406 self.assertEqual(found, expected_yaml)
408 # Now two explicit directories and a file
409 expected = set(glob.glob(os.path.join(TESTDIR, "config", "**", "basic", "*.yaml"), recursive=True))
410 expected.update(
411 set(glob.glob(os.path.join(TESTDIR, "config", "**", "templates", "*.yaml"), recursive=True))
412 )
413 expected.add(file.ospath)
415 found = set(
416 u.ospath
417 for u in ButlerURI.findFileResources(
418 [file, test_dir_uri.join("config/basic"), test_dir_uri.join("config/templates")],
419 file_filter=r".*\.yaml$",
420 )
421 )
422 self.assertEqual(found, expected)
424 # Group by directory -- find everything and compare it with what
425 # we expected to be there in total. We expect to find 9 directories
426 # containing yaml files so make sure we only iterate 9 times.
427 found_yaml = set()
428 counter = 0
429 for uris in ButlerURI.findFileResources(
430 [file, test_dir_uri.join("config/")], file_filter=r".*\.yaml$", grouped=True
431 ):
432 found = set(u.ospath for u in uris)
433 if found:
434 counter += 1
436 found_yaml.update(found)
438 self.assertEqual(found_yaml, expected_yaml)
439 self.assertEqual(counter, 9)
441 # Grouping but check that single files are returned in a single group
442 # at the end
443 file2 = test_dir_uri.join("config/templates/templates-bad.yaml")
444 found = list(
445 ButlerURI.findFileResources([file, file2, test_dir_uri.join("config/dbAuth")], grouped=True)
446 )
447 self.assertEqual(len(found), 2)
448 self.assertEqual(list(found[1]), [file, file2])
450 with self.assertRaises(ValueError):
451 list(file.walk())
453 def testRootURI(self):
454 """Test ButlerURI.root_uri()."""
455 uri = ButlerURI("https://www.notexist.com:8080/file/test")
456 uri2 = ButlerURI("s3://www.notexist.com/file/test")
457 self.assertEqual(uri.root_uri().geturl(), "https://www.notexist.com:8080/")
458 self.assertEqual(uri2.root_uri().geturl(), "s3://www.notexist.com/")
460 def testJoin(self):
461 """Test .join method."""
463 root_str = "s3://bucket/hsc/payload/"
464 root = ButlerURI(root_str)
466 self.assertEqual(root.join("b/test.txt").geturl(), f"{root_str}b/test.txt")
467 add_dir = root.join("b/c/d/")
468 self.assertTrue(add_dir.isdir())
469 self.assertEqual(add_dir.geturl(), f"{root_str}b/c/d/")
471 quote_example = "b&c.t@x#t"
472 needs_quote = root.join(quote_example)
473 self.assertEqual(needs_quote.unquoted_path, f"/hsc/payload/{quote_example}")
475 other = ButlerURI("file://localhost/test.txt")
476 self.assertEqual(root.join(other), other)
477 self.assertEqual(other.join("b/new.txt").geturl(), "file://localhost/b/new.txt")
479 joined = ButlerURI("s3://bucket/hsc/payload/").join(ButlerURI("test.qgraph", forceAbsolute=False))
480 self.assertEqual(joined, ButlerURI("s3://bucket/hsc/payload/test.qgraph"))
482 with self.assertRaises(ValueError):
483 ButlerURI("s3://bucket/hsc/payload/").join(ButlerURI("test.qgraph"))
485 def testTemporary(self):
486 with ButlerURI.temporary_uri(suffix=".json") as tmp:
487 self.assertEqual(tmp.getExtension(), ".json", f"uri: {tmp}")
488 self.assertTrue(tmp.isabs(), f"uri: {tmp}")
489 self.assertFalse(tmp.exists(), f"uri: {tmp}")
490 tmp.write(b"abcd")
491 self.assertTrue(tmp.exists(), f"uri: {tmp}")
492 self.assertTrue(tmp.isTemporary)
493 self.assertFalse(tmp.exists(), f"uri: {tmp}")
495 tmpdir = ButlerURI(self.tmpdir, forceDirectory=True)
496 with ButlerURI.temporary_uri(prefix=tmpdir, suffix=".yaml") as tmp:
497 # Use a specified tmpdir and check it is okay for the file
498 # to not be created.
499 self.assertFalse(tmp.exists(), f"uri: {tmp}")
500 self.assertTrue(tmpdir.exists(), f"uri: {tmpdir} still exists")
503@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!")
504@mock_s3
505class S3URITestCase(unittest.TestCase):
506 """Tests involving S3"""
508 bucketName = "any_bucket"
509 """Bucket name to use in tests"""
511 def setUp(self):
512 # Local test directory
513 self.tmpdir = makeTestTempDir(TESTDIR)
515 # set up some fake credentials if they do not exist
516 self.usingDummyCredentials = setAwsEnvCredentials()
518 # MOTO needs to know that we expect Bucket bucketname to exist
519 s3 = boto3.resource("s3")
520 s3.create_bucket(Bucket=self.bucketName)
522 def tearDown(self):
523 s3 = boto3.resource("s3")
524 bucket = s3.Bucket(self.bucketName)
525 try:
526 bucket.objects.all().delete()
527 except botocore.exceptions.ClientError as e:
528 if e.response["Error"]["Code"] == "404":
529 # the key was not reachable - pass
530 pass
531 else:
532 raise
534 bucket = s3.Bucket(self.bucketName)
535 bucket.delete()
537 # unset any potentially set dummy credentials
538 if self.usingDummyCredentials:
539 unsetAwsEnvCredentials()
541 shutil.rmtree(self.tmpdir, ignore_errors=True)
543 def makeS3Uri(self, path):
544 return f"s3://{self.bucketName}/{path}"
546 def testTransfer(self):
547 src = ButlerURI(os.path.join(self.tmpdir, "test.txt"))
548 content = "Content is some content\nwith something to say\n\n"
549 src.write(content.encode())
550 self.assertTrue(src.exists())
551 self.assertEqual(src.size(), len(content.encode()))
553 dest = ButlerURI(self.makeS3Uri("test.txt"))
554 self.assertFalse(dest.exists())
556 with self.assertRaises(FileNotFoundError):
557 dest.size()
559 dest.transfer_from(src, transfer="copy")
560 self.assertTrue(dest.exists())
562 dest2 = ButlerURI(self.makeS3Uri("copied.txt"))
563 dest2.transfer_from(dest, transfer="copy")
564 self.assertTrue(dest2.exists())
566 local = ButlerURI(os.path.join(self.tmpdir, "copied.txt"))
567 local.transfer_from(dest2, transfer="copy")
568 with open(local.ospath, "r") as fd:
569 new_content = fd.read()
570 self.assertEqual(new_content, content)
572 with self.assertRaises(ValueError):
573 dest2.transfer_from(local, transfer="symlink")
575 b = dest.read()
576 self.assertEqual(b.decode(), new_content)
578 nbytes = 10
579 subset = dest.read(size=nbytes)
580 self.assertEqual(len(subset), nbytes) # Extra byte comes back
581 self.assertEqual(subset.decode(), content[:nbytes])
583 with self.assertRaises(FileExistsError):
584 dest.transfer_from(src, transfer="copy")
586 dest.transfer_from(src, transfer="copy", overwrite=True)
588 def testWalk(self):
589 """Test that we can list an S3 bucket"""
590 # Files we want to create
591 expected = ("a/x.txt", "a/y.txt", "a/z.json", "a/b/w.txt", "a/b/c/d/v.json")
592 expected_uris = [ButlerURI(self.makeS3Uri(path)) for path in expected]
593 for uri in expected_uris:
594 # Doesn't matter what we write
595 uri.write("123".encode())
597 # Find all the files in the a/ tree
598 found = set(uri.path for uri in ButlerURI.findFileResources([ButlerURI(self.makeS3Uri("a/"))]))
599 self.assertEqual(found, {uri.path for uri in expected_uris})
601 # Find all the files in the a/ tree but group by folder
602 found = ButlerURI.findFileResources([ButlerURI(self.makeS3Uri("a/"))], grouped=True)
603 expected = (("/a/x.txt", "/a/y.txt", "/a/z.json"), ("/a/b/w.txt",), ("/a/b/c/d/v.json",))
605 for got, expect in zip(found, expected):
606 self.assertEqual(tuple(u.path for u in got), expect)
608 # Find only JSON files
609 found = set(
610 uri.path
611 for uri in ButlerURI.findFileResources([ButlerURI(self.makeS3Uri("a/"))], file_filter=r"\.json$")
612 )
613 self.assertEqual(found, {uri.path for uri in expected_uris if uri.path.endswith(".json")})
615 # JSON files grouped by directory
616 found = ButlerURI.findFileResources(
617 [ButlerURI(self.makeS3Uri("a/"))], file_filter=r"\.json$", grouped=True
618 )
619 expected = (("/a/z.json",), ("/a/b/c/d/v.json",))
621 for got, expect in zip(found, expected):
622 self.assertEqual(tuple(u.path for u in got), expect)
624 # Check pagination works with large numbers of files. S3 API limits
625 # us to 1000 response per list_objects call so create lots of files
626 created = set()
627 counter = 1
628 n_dir1 = 1100
629 while counter <= n_dir1:
630 new = ButlerURI(self.makeS3Uri(f"test/file{counter:04d}.txt"))
631 new.write(f"{counter}".encode())
632 created.add(str(new))
633 counter += 1
634 counter = 1
635 # Put some in a subdirectory to make sure we are looking in a
636 # hierarchy.
637 n_dir2 = 100
638 while counter <= n_dir2:
639 new = ButlerURI(self.makeS3Uri(f"test/subdir/file{counter:04d}.txt"))
640 new.write(f"{counter}".encode())
641 created.add(str(new))
642 counter += 1
644 found = ButlerURI.findFileResources([ButlerURI(self.makeS3Uri("test/"))])
645 self.assertEqual({str(u) for u in found}, created)
647 # Again with grouping.
648 found = list(ButlerURI.findFileResources([ButlerURI(self.makeS3Uri("test/"))], grouped=True))
649 self.assertEqual(len(found), 2)
650 dir_1 = list(found[0])
651 dir_2 = list(found[1])
652 self.assertEqual(len(dir_1), n_dir1)
653 self.assertEqual(len(dir_2), n_dir2)
655 def testWrite(self):
656 s3write = ButlerURI(self.makeS3Uri("created.txt"))
657 content = "abcdefghijklmnopqrstuv\n"
658 s3write.write(content.encode())
659 self.assertEqual(s3write.read().decode(), content)
661 def testTemporary(self):
662 s3root = ButlerURI(self.makeS3Uri("rootdir"), forceDirectory=True)
663 with ButlerURI.temporary_uri(prefix=s3root, suffix=".json") as tmp:
664 self.assertEqual(tmp.getExtension(), ".json", f"uri: {tmp}")
665 self.assertEqual(tmp.scheme, "s3", f"uri: {tmp}")
666 self.assertEqual(tmp.parent(), s3root)
667 basename = tmp.basename()
668 content = "abcd"
669 tmp.write(content.encode())
670 self.assertTrue(tmp.exists(), f"uri: {tmp}")
671 self.assertFalse(tmp.exists())
673 # Again without writing anything, to check that there is no complaint
674 # on exit of context manager.
675 with ButlerURI.temporary_uri(prefix=s3root, suffix=".json") as tmp:
676 self.assertFalse(tmp.exists())
677 # Check that the file has a different name than before.
678 self.assertNotEqual(tmp.basename(), basename, f"uri: {tmp}")
679 self.assertFalse(tmp.exists())
681 def testRelative(self):
682 """Check that we can get subpaths back from two URIs"""
683 parent = ButlerURI(self.makeS3Uri("rootdir"), forceDirectory=True)
684 child = ButlerURI(self.makeS3Uri("rootdir/dir1/file.txt"))
686 self.assertEqual(child.relative_to(parent), "dir1/file.txt")
688 not_child = ButlerURI(self.makeS3Uri("/a/b/dir1/file.txt"))
689 self.assertFalse(not_child.relative_to(parent))
691 not_s3 = ButlerURI(os.path.join(self.tmpdir, "dir1", "file2.txt"))
692 self.assertFalse(child.relative_to(not_s3))
694 def testQuoting(self):
695 """Check that quoting works."""
696 parent = ButlerURI(self.makeS3Uri("rootdir"), forceDirectory=True)
697 subpath = "rootdir/dir1+/file?.txt"
698 child = ButlerURI(self.makeS3Uri(urllib.parse.quote(subpath)))
700 self.assertEqual(child.relative_to(parent), "dir1+/file?.txt")
701 self.assertEqual(child.basename(), "file?.txt")
702 self.assertEqual(child.relativeToPathRoot, subpath)
703 self.assertIn("%", child.path)
704 self.assertEqual(child.unquoted_path, "/" + subpath)
707# Mock required environment variables during tests
708@unittest.mock.patch.dict(
709 os.environ,
710 {
711 "LSST_BUTLER_WEBDAV_AUTH": "TOKEN",
712 "LSST_BUTLER_WEBDAV_TOKEN_FILE": os.path.join(TESTDIR, "config/testConfigs/webdav/token"),
713 "LSST_BUTLER_WEBDAV_CA_BUNDLE": "/path/to/ca/certs",
714 },
715)
716class WebdavURITestCase(unittest.TestCase):
717 def setUp(self):
718 serverRoot = "www.not-exists.orgx"
719 existingFolderName = "existingFolder"
720 existingFileName = "existingFile"
721 notExistingFileName = "notExistingFile"
723 self.baseURL = ButlerURI(f"https://{serverRoot}", forceDirectory=True)
724 self.existingFileButlerURI = ButlerURI(
725 f"https://{serverRoot}/{existingFolderName}/{existingFileName}"
726 )
727 self.notExistingFileButlerURI = ButlerURI(
728 f"https://{serverRoot}/{existingFolderName}/{notExistingFileName}"
729 )
730 self.existingFolderButlerURI = ButlerURI(
731 f"https://{serverRoot}/{existingFolderName}", forceDirectory=True
732 )
733 self.notExistingFolderButlerURI = ButlerURI(
734 f"https://{serverRoot}/{notExistingFileName}", forceDirectory=True
735 )
737 # Need to declare the options
738 responses.add(responses.OPTIONS, self.baseURL.geturl(), status=200, headers={"DAV": "1,2,3"})
740 # Used by ButlerHttpURI.exists()
741 responses.add(
742 responses.HEAD,
743 self.existingFileButlerURI.geturl(),
744 status=200,
745 headers={"Content-Length": "1024"},
746 )
747 responses.add(responses.HEAD, self.notExistingFileButlerURI.geturl(), status=404)
749 # Used by ButlerHttpURI.read()
750 responses.add(
751 responses.GET, self.existingFileButlerURI.geturl(), status=200, body=str.encode("It works!")
752 )
753 responses.add(responses.GET, self.notExistingFileButlerURI.geturl(), status=404)
755 # Used by ButlerHttpURI.write()
756 responses.add(responses.PUT, self.existingFileButlerURI.geturl(), status=201)
758 # Used by ButlerHttpURI.transfer_from()
759 responses.add(
760 responses.Response(
761 url=self.existingFileButlerURI.geturl(),
762 method="COPY",
763 headers={"Destination": self.existingFileButlerURI.geturl()},
764 status=201,
765 )
766 )
767 responses.add(
768 responses.Response(
769 url=self.existingFileButlerURI.geturl(),
770 method="COPY",
771 headers={"Destination": self.notExistingFileButlerURI.geturl()},
772 status=201,
773 )
774 )
775 responses.add(
776 responses.Response(
777 url=self.existingFileButlerURI.geturl(),
778 method="MOVE",
779 headers={"Destination": self.notExistingFileButlerURI.geturl()},
780 status=201,
781 )
782 )
784 # Used by ButlerHttpURI.remove()
785 responses.add(responses.DELETE, self.existingFileButlerURI.geturl(), status=200)
786 responses.add(responses.DELETE, self.notExistingFileButlerURI.geturl(), status=404)
788 # Used by ButlerHttpURI.mkdir()
789 responses.add(
790 responses.HEAD,
791 self.existingFolderButlerURI.geturl(),
792 status=200,
793 headers={"Content-Length": "1024"},
794 )
795 responses.add(responses.HEAD, self.baseURL.geturl(), status=200, headers={"Content-Length": "1024"})
796 responses.add(responses.HEAD, self.notExistingFolderButlerURI.geturl(), status=404)
797 responses.add(
798 responses.Response(url=self.notExistingFolderButlerURI.geturl(), method="MKCOL", status=201)
799 )
800 responses.add(
801 responses.Response(url=self.existingFolderButlerURI.geturl(), method="MKCOL", status=403)
802 )
804 @responses.activate
805 def testExists(self):
807 self.assertTrue(self.existingFileButlerURI.exists())
808 self.assertFalse(self.notExistingFileButlerURI.exists())
810 self.assertEqual(self.existingFileButlerURI.size(), 1024)
811 with self.assertRaises(FileNotFoundError):
812 self.notExistingFileButlerURI.size()
814 @responses.activate
815 def testRemove(self):
817 self.assertIsNone(self.existingFileButlerURI.remove())
818 with self.assertRaises(FileNotFoundError):
819 self.notExistingFileButlerURI.remove()
821 @responses.activate
822 def testMkdir(self):
824 # The mock means that we can't check this now exists
825 self.notExistingFolderButlerURI.mkdir()
827 # This should do nothing
828 self.existingFolderButlerURI.mkdir()
830 with self.assertRaises(ValueError):
831 self.notExistingFileButlerURI.mkdir()
833 @responses.activate
834 def testRead(self):
836 self.assertEqual(self.existingFileButlerURI.read().decode(), "It works!")
837 self.assertNotEqual(self.existingFileButlerURI.read().decode(), "Nope.")
838 with self.assertRaises(FileNotFoundError):
839 self.notExistingFileButlerURI.read()
841 @responses.activate
842 def testWrite(self):
844 self.assertIsNone(self.existingFileButlerURI.write(data=str.encode("Some content.")))
845 with self.assertRaises(FileExistsError):
846 self.existingFileButlerURI.write(data=str.encode("Some content."), overwrite=False)
848 @responses.activate
849 def testTransfer(self):
851 self.assertIsNone(self.notExistingFileButlerURI.transfer_from(src=self.existingFileButlerURI))
852 self.assertIsNone(
853 self.notExistingFileButlerURI.transfer_from(src=self.existingFileButlerURI, transfer="move")
854 )
855 with self.assertRaises(FileExistsError):
856 self.existingFileButlerURI.transfer_from(src=self.existingFileButlerURI)
857 with self.assertRaises(ValueError):
858 self.notExistingFileButlerURI.transfer_from(
859 src=self.existingFileButlerURI, transfer="unsupported"
860 )
862 def testParent(self):
864 self.assertEqual(
865 self.existingFolderButlerURI.geturl(), self.notExistingFileButlerURI.parent().geturl()
866 )
867 self.assertEqual(self.baseURL.geturl(), self.baseURL.parent().geturl())
868 self.assertEqual(
869 self.existingFileButlerURI.parent().geturl(), self.existingFileButlerURI.dirname().geturl()
870 )
873if __name__ == "__main__": 873 ↛ 874line 873 didn't jump to line 874, because the condition on line 873 was never true
874 unittest.main()