Coverage for tests/test_uri.py: 9%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22import glob
23import os
24import pathlib
25import unittest
26import urllib.parse
28from lsst.daf.butler import ButlerURI
29from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir
31TESTDIR = os.path.abspath(os.path.dirname(__file__))
34class FileURITestCase(unittest.TestCase):
35 """Concrete tests for local files.
37 Basic tests to show that `lsst.daf.butler.ButlerURI` compatibility
38 import still works. Can be removed when deprecation period ends.
39 """
41 def setUp(self):
42 # Use a local tempdir because on macOS the temp dirs use symlinks
43 # so relsymlink gets quite confused.
44 self.tmpdir = makeTestTempDir(TESTDIR)
46 def tearDown(self):
47 removeTestTempDir(self.tmpdir)
49 def testFile(self):
50 file = os.path.join(self.tmpdir, "test.txt")
51 uri = ButlerURI(file)
52 self.assertFalse(uri.exists(), f"{uri} should not exist")
53 self.assertEqual(uri.ospath, file)
55 path = pathlib.Path(file)
56 uri = ButlerURI(path)
57 self.assertEqual(uri.ospath, file)
59 content = "abcdefghijklmnopqrstuv\n"
60 uri.write(content.encode())
61 self.assertTrue(os.path.exists(file), "File should exist locally")
62 self.assertTrue(uri.exists(), f"{uri} should now exist")
63 self.assertEqual(uri.read().decode(), content)
64 self.assertEqual(uri.size(), len(content.encode()))
66 with self.assertRaises(FileNotFoundError):
67 ButlerURI("file/not/there.txt").size()
69 # Check that creating a URI from a URI returns the same thing
70 uri2 = ButlerURI(uri)
71 self.assertEqual(uri, uri2)
72 self.assertEqual(id(uri), id(uri2))
74 with self.assertRaises(ValueError):
75 # Scheme-less URIs are not allowed to support non-file roots
76 # at the present time. This may change in the future to become
77 # equivalent to ButlerURI.join()
78 ButlerURI("a/b.txt", root=ButlerURI("s3://bucket/a/b/"))
80 def testExtension(self):
81 file = ButlerURI(os.path.join(self.tmpdir, "test.txt"))
82 self.assertEqual(file.updatedExtension(None), file)
83 self.assertEqual(file.updatedExtension(".txt"), file)
84 self.assertEqual(id(file.updatedExtension(".txt")), id(file))
86 fits = file.updatedExtension(".fits.gz")
87 self.assertEqual(fits.basename(), "test.fits.gz")
88 self.assertEqual(fits.updatedExtension(".jpeg").basename(), "test.jpeg")
90 def testRelative(self):
91 """Check that we can get subpaths back from two URIs"""
92 parent = ButlerURI(self.tmpdir, forceDirectory=True, forceAbsolute=True)
93 self.assertTrue(parent.isdir())
94 child = ButlerURI(os.path.join(self.tmpdir, "dir1", "file.txt"), forceAbsolute=True)
96 self.assertEqual(child.relative_to(parent), "dir1/file.txt")
98 not_child = ButlerURI("/a/b/dir1/file.txt")
99 self.assertIsNone(not_child.relative_to(parent))
100 self.assertFalse(not_child.isdir())
102 not_directory = ButlerURI(os.path.join(self.tmpdir, "dir1", "file2.txt"))
103 self.assertIsNone(child.relative_to(not_directory))
105 # Relative URIs
106 parent = ButlerURI("a/b/", forceAbsolute=False)
107 child = ButlerURI("a/b/c/d.txt", forceAbsolute=False)
108 self.assertFalse(child.scheme)
109 self.assertEqual(child.relative_to(parent), "c/d.txt")
111 # File URI and schemeless URI
112 parent = ButlerURI("file:/a/b/c/")
113 child = ButlerURI("e/f/g.txt", forceAbsolute=False)
115 # If the child is relative and the parent is absolute we assume
116 # that the child is a child of the parent unless it uses ".."
117 self.assertEqual(child.relative_to(parent), "e/f/g.txt")
119 child = ButlerURI("../e/f/g.txt", forceAbsolute=False)
120 self.assertIsNone(child.relative_to(parent))
122 child = ButlerURI("../c/e/f/g.txt", forceAbsolute=False)
123 self.assertEqual(child.relative_to(parent), "e/f/g.txt")
125 # Test non-file root with relative path.
126 child = ButlerURI("e/f/g.txt", forceAbsolute=False)
127 parent = ButlerURI("s3://hello/a/b/c/")
128 self.assertEqual(child.relative_to(parent), "e/f/g.txt")
130 # Test with different netloc
131 child = ButlerURI("http://my.host/a/b/c.txt")
132 parent = ButlerURI("http://other.host/a/")
133 self.assertIsNone(child.relative_to(parent), f"{child}.relative_to({parent})")
135 # Schemeless absolute child.
136 # Schemeless absolute URI is constructed using root= parameter.
137 parent = ButlerURI("file:///a/b/c/")
138 child = ButlerURI("d/e.txt", root=parent)
139 self.assertEqual(child.relative_to(parent), "d/e.txt", f"{child}.relative_to({parent})")
141 parent = ButlerURI("c/", root="/a/b/")
142 self.assertEqual(child.relative_to(parent), "d/e.txt", f"{child}.relative_to({parent})")
144 # Absolute schemeless child with relative parent will always fail.
145 parent = ButlerURI("d/e.txt", forceAbsolute=False)
146 self.assertIsNone(child.relative_to(parent), f"{child}.relative_to({parent})")
148 def testParents(self):
149 """Test of splitting and parent walking."""
150 parent = ButlerURI(self.tmpdir, forceDirectory=True, forceAbsolute=True)
151 child_file = parent.join("subdir/file.txt")
152 self.assertFalse(child_file.isdir())
153 child_subdir, file = child_file.split()
154 self.assertEqual(file, "file.txt")
155 self.assertTrue(child_subdir.isdir())
156 self.assertEqual(child_file.dirname(), child_subdir)
157 self.assertEqual(child_file.basename(), file)
158 self.assertEqual(child_file.parent(), child_subdir)
159 derived_parent = child_subdir.parent()
160 self.assertEqual(derived_parent, parent)
161 self.assertTrue(derived_parent.isdir())
162 self.assertEqual(child_file.parent().parent(), parent)
164 def testEnvVar(self):
165 """Test that environment variables are expanded."""
167 with unittest.mock.patch.dict(os.environ, {"MY_TEST_DIR": "/a/b/c"}):
168 uri = ButlerURI("${MY_TEST_DIR}/d.txt")
169 self.assertEqual(uri.path, "/a/b/c/d.txt")
170 self.assertEqual(uri.scheme, "file")
172 # This will not expand
173 uri = ButlerURI("${MY_TEST_DIR}/d.txt", forceAbsolute=False)
174 self.assertEqual(uri.path, "${MY_TEST_DIR}/d.txt")
175 self.assertFalse(uri.scheme)
177 def testMkdir(self):
178 tmpdir = ButlerURI(self.tmpdir)
179 newdir = tmpdir.join("newdir/seconddir")
180 newdir.mkdir()
181 self.assertTrue(newdir.exists())
182 newfile = newdir.join("temp.txt")
183 newfile.write("Data".encode())
184 self.assertTrue(newfile.exists())
186 def testTransfer(self):
187 src = ButlerURI(os.path.join(self.tmpdir, "test.txt"))
188 content = "Content is some content\nwith something to say\n\n"
189 src.write(content.encode())
191 for mode in ("copy", "link", "hardlink", "symlink", "relsymlink"):
192 dest = ButlerURI(os.path.join(self.tmpdir, f"dest_{mode}.txt"))
193 dest.transfer_from(src, transfer=mode)
194 self.assertTrue(dest.exists(), f"Check that {dest} exists (transfer={mode})")
196 with open(dest.ospath, "r") as fh:
197 new_content = fh.read()
198 self.assertEqual(new_content, content)
200 if mode in ("symlink", "relsymlink"):
201 self.assertTrue(os.path.islink(dest.ospath), f"Check that {dest} is symlink")
203 # If the source and destination are hardlinks of each other
204 # the transfer should work even if overwrite=False.
205 if mode in ("link", "hardlink"):
206 dest.transfer_from(src, transfer=mode)
207 else:
208 with self.assertRaises(
209 FileExistsError, msg=f"Overwrite of {dest} should not be allowed ({mode})"
210 ):
211 dest.transfer_from(src, transfer=mode)
213 dest.transfer_from(src, transfer=mode, overwrite=True)
215 os.remove(dest.ospath)
217 b = src.read()
218 self.assertEqual(b.decode(), new_content)
220 nbytes = 10
221 subset = src.read(size=nbytes)
222 self.assertEqual(len(subset), nbytes)
223 self.assertEqual(subset.decode(), content[:nbytes])
225 with self.assertRaises(ValueError):
226 src.transfer_from(src, transfer="unknown")
228 def testTransferIdentical(self):
229 """Test overwrite of identical files."""
230 dir1 = ButlerURI(os.path.join(self.tmpdir, "dir1"), forceDirectory=True)
231 dir1.mkdir()
232 dir2 = os.path.join(self.tmpdir, "dir2")
233 os.symlink(dir1.ospath, dir2)
235 # Write a test file.
236 src_file = dir1.join("test.txt")
237 content = "0123456"
238 src_file.write(content.encode())
240 # Construct URI to destination that should be identical.
241 dest_file = ButlerURI(os.path.join(dir2), forceDirectory=True).join("test.txt")
242 self.assertTrue(dest_file.exists())
243 self.assertNotEqual(src_file, dest_file)
245 # Transfer it over itself.
246 dest_file.transfer_from(src_file, transfer="symlink", overwrite=True)
247 new_content = dest_file.read().decode()
248 self.assertEqual(content, new_content)
250 def testResource(self):
251 u = ButlerURI("resource://lsst.daf.butler/configs/datastore.yaml")
252 self.assertTrue(u.exists(), f"Check {u} exists")
254 content = u.read().decode()
255 self.assertTrue(content.startswith("datastore:"))
257 truncated = u.read(size=9).decode()
258 self.assertEqual(truncated, "datastore")
260 d = ButlerURI("resource://lsst.daf.butler/configs", forceDirectory=True)
261 self.assertTrue(u.exists(), f"Check directory {d} exists")
263 j = d.join("datastore.yaml")
264 self.assertEqual(u, j)
265 self.assertFalse(j.dirLike)
266 self.assertFalse(j.isdir())
267 not_there = d.join("not-there.yaml")
268 self.assertFalse(not_there.exists())
270 bad = ButlerURI("resource://bad.module/not.yaml")
271 multi = ButlerURI.mexists([u, bad, not_there])
272 self.assertTrue(multi[u])
273 self.assertFalse(multi[bad])
274 self.assertFalse(multi[not_there])
276 def testEscapes(self):
277 """Special characters in file paths"""
278 src = ButlerURI("bbb/???/test.txt", root=self.tmpdir, forceAbsolute=True)
279 self.assertFalse(src.scheme)
280 src.write(b"Some content")
281 self.assertTrue(src.exists())
283 # abspath always returns a file scheme
284 file = src.abspath()
285 self.assertTrue(file.exists())
286 self.assertIn("???", file.ospath)
287 self.assertNotIn("???", file.path)
289 file = file.updatedFile("tests??.txt")
290 self.assertNotIn("??.txt", file.path)
291 file.write(b"Other content")
292 self.assertEqual(file.read(), b"Other content")
294 src = src.updatedFile("tests??.txt")
295 self.assertIn("??.txt", src.path)
296 self.assertEqual(file.read(), src.read(), f"reading from {file.ospath} and {src.ospath}")
298 # File URI and schemeless URI
299 parent = ButlerURI("file:" + urllib.parse.quote("/a/b/c/de/??/"))
300 child = ButlerURI("e/f/g.txt", forceAbsolute=False)
301 self.assertEqual(child.relative_to(parent), "e/f/g.txt")
303 child = ButlerURI("e/f??#/g.txt", forceAbsolute=False)
304 self.assertEqual(child.relative_to(parent), "e/f??#/g.txt")
306 child = ButlerURI("file:" + urllib.parse.quote("/a/b/c/de/??/e/f??#/g.txt"))
307 self.assertEqual(child.relative_to(parent), "e/f??#/g.txt")
309 self.assertEqual(child.relativeToPathRoot, "a/b/c/de/??/e/f??#/g.txt")
311 # Schemeless so should not quote
312 dir = ButlerURI("bbb/???/", root=self.tmpdir, forceAbsolute=True, forceDirectory=True)
313 self.assertIn("???", dir.ospath)
314 self.assertIn("???", dir.path)
315 self.assertFalse(dir.scheme)
317 # dir.join() morphs into a file scheme
318 new = dir.join("test_j.txt")
319 self.assertIn("???", new.ospath, f"Checking {new}")
320 new.write(b"Content")
322 new2name = "###/test??.txt"
323 new2 = dir.join(new2name)
324 self.assertIn("???", new2.ospath)
325 new2.write(b"Content")
326 self.assertTrue(new2.ospath.endswith(new2name))
327 self.assertEqual(new.read(), new2.read())
329 fdir = dir.abspath()
330 self.assertNotIn("???", fdir.path)
331 self.assertIn("???", fdir.ospath)
332 self.assertEqual(fdir.scheme, "file")
333 fnew = dir.join("test_jf.txt")
334 fnew.write(b"Content")
336 fnew2 = fdir.join(new2name)
337 fnew2.write(b"Content")
338 self.assertTrue(fnew2.ospath.endswith(new2name))
339 self.assertNotIn("###", fnew2.path)
341 self.assertEqual(fnew.read(), fnew2.read())
343 # Test that children relative to schemeless and file schemes
344 # still return the same unquoted name
345 self.assertEqual(fnew2.relative_to(fdir), new2name, f"{fnew2}.relative_to({fdir})")
346 self.assertEqual(fnew2.relative_to(dir), new2name, f"{fnew2}.relative_to({dir})")
347 self.assertEqual(new2.relative_to(fdir), new2name, f"{new2}.relative_to({fdir})")
348 self.assertEqual(new2.relative_to(dir), new2name, f"{new2}.relative_to({dir})")
350 # Check for double quoting
351 plus_path = "/a/b/c+d/"
352 with self.assertLogs(level="WARNING"):
353 uri = ButlerURI(urllib.parse.quote(plus_path), forceDirectory=True)
354 self.assertEqual(uri.ospath, plus_path)
356 # Check that # is not escaped for schemeless URIs
357 hash_path = "/a/b#/c&d#xyz"
358 hpos = hash_path.rfind("#")
359 uri = ButlerURI(hash_path)
360 self.assertEqual(uri.ospath, hash_path[:hpos])
361 self.assertEqual(uri.fragment, hash_path[hpos + 1 :])
363 def testHash(self):
364 """Test that we can store URIs in sets and as keys."""
365 uri1 = ButlerURI(TESTDIR)
366 uri2 = uri1.join("test/")
367 s = {uri1, uri2}
368 self.assertIn(uri1, s)
370 d = {uri1: "1", uri2: "2"}
371 self.assertEqual(d[uri2], "2")
373 def testWalk(self):
374 """Test ButlerURI.walk()."""
375 test_dir_uri = ButlerURI(TESTDIR)
377 file = test_dir_uri.join("config/basic/butler.yaml")
378 found = list(ButlerURI.findFileResources([file]))
379 self.assertEqual(found[0], file)
381 # Compare against the full local paths
382 expected = set(
383 p for p in glob.glob(os.path.join(TESTDIR, "config", "**"), recursive=True) if os.path.isfile(p)
384 )
385 found = set(u.ospath for u in ButlerURI.findFileResources([test_dir_uri.join("config")]))
386 self.assertEqual(found, expected)
388 # Now solely the YAML files
389 expected_yaml = set(glob.glob(os.path.join(TESTDIR, "config", "**", "*.yaml"), recursive=True))
390 found = set(
391 u.ospath
392 for u in ButlerURI.findFileResources([test_dir_uri.join("config")], file_filter=r".*\.yaml$")
393 )
394 self.assertEqual(found, expected_yaml)
396 # Now two explicit directories and a file
397 expected = set(glob.glob(os.path.join(TESTDIR, "config", "**", "basic", "*.yaml"), recursive=True))
398 expected.update(
399 set(glob.glob(os.path.join(TESTDIR, "config", "**", "templates", "*.yaml"), recursive=True))
400 )
401 expected.add(file.ospath)
403 found = set(
404 u.ospath
405 for u in ButlerURI.findFileResources(
406 [file, test_dir_uri.join("config/basic"), test_dir_uri.join("config/templates")],
407 file_filter=r".*\.yaml$",
408 )
409 )
410 self.assertEqual(found, expected)
412 # Group by directory -- find everything and compare it with what
413 # we expected to be there in total. We expect to find 9 directories
414 # containing yaml files so make sure we only iterate 9 times.
415 found_yaml = set()
416 counter = 0
417 for uris in ButlerURI.findFileResources(
418 [file, test_dir_uri.join("config/")], file_filter=r".*\.yaml$", grouped=True
419 ):
420 found = set(u.ospath for u in uris)
421 if found:
422 counter += 1
424 found_yaml.update(found)
426 self.assertEqual(found_yaml, expected_yaml)
427 self.assertEqual(counter, 9)
429 # Grouping but check that single files are returned in a single group
430 # at the end
431 file2 = test_dir_uri.join("config/templates/templates-bad.yaml")
432 found = list(
433 ButlerURI.findFileResources([file, file2, test_dir_uri.join("config/dbAuth")], grouped=True)
434 )
435 self.assertEqual(len(found), 2)
436 self.assertEqual(list(found[1]), [file, file2])
438 with self.assertRaises(ValueError):
439 list(file.walk())
441 def testRootURI(self):
442 """Test ButlerURI.root_uri()."""
443 uri = ButlerURI("https://www.notexist.com:8080/file/test")
444 uri2 = ButlerURI("s3://www.notexist.com/file/test")
445 self.assertEqual(uri.root_uri().geturl(), "https://www.notexist.com:8080/")
446 self.assertEqual(uri2.root_uri().geturl(), "s3://www.notexist.com/")
448 def testJoin(self):
449 """Test .join method."""
451 root_str = "s3://bucket/hsc/payload/"
452 root = ButlerURI(root_str)
454 self.assertEqual(root.join("b/test.txt").geturl(), f"{root_str}b/test.txt")
455 add_dir = root.join("b/c/d/")
456 self.assertTrue(add_dir.isdir())
457 self.assertEqual(add_dir.geturl(), f"{root_str}b/c/d/")
459 quote_example = "b&c.t@x#t"
460 needs_quote = root.join(quote_example)
461 self.assertEqual(needs_quote.unquoted_path, f"/hsc/payload/{quote_example}")
463 other = ButlerURI("file://localhost/test.txt")
464 self.assertEqual(root.join(other), other)
465 self.assertEqual(other.join("b/new.txt").geturl(), "file://localhost/b/new.txt")
467 joined = ButlerURI("s3://bucket/hsc/payload/").join(ButlerURI("test.qgraph", forceAbsolute=False))
468 self.assertEqual(joined, ButlerURI("s3://bucket/hsc/payload/test.qgraph"))
470 with self.assertRaises(ValueError):
471 ButlerURI("s3://bucket/hsc/payload/").join(ButlerURI("test.qgraph"))
473 def testTemporary(self):
474 with ButlerURI.temporary_uri(suffix=".json") as tmp:
475 self.assertEqual(tmp.getExtension(), ".json", f"uri: {tmp}")
476 self.assertTrue(tmp.isabs(), f"uri: {tmp}")
477 self.assertFalse(tmp.exists(), f"uri: {tmp}")
478 tmp.write(b"abcd")
479 self.assertTrue(tmp.exists(), f"uri: {tmp}")
480 self.assertTrue(tmp.isTemporary)
481 self.assertFalse(tmp.exists(), f"uri: {tmp}")
483 tmpdir = ButlerURI(self.tmpdir, forceDirectory=True)
484 with ButlerURI.temporary_uri(prefix=tmpdir, suffix=".yaml") as tmp:
485 # Use a specified tmpdir and check it is okay for the file
486 # to not be created.
487 self.assertFalse(tmp.exists(), f"uri: {tmp}")
488 self.assertTrue(tmpdir.exists(), f"uri: {tmpdir} still exists")
491if __name__ == "__main__": 491 ↛ 492line 491 didn't jump to line 492, because the condition on line 491 was never true
492 unittest.main()