Coverage for tests/test_uri.py: 9%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22import glob
23import os
24import pathlib
25import unittest
26import urllib.parse
28from lsst.daf.butler import ButlerURI
29from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir
31TESTDIR = os.path.abspath(os.path.dirname(__file__))
34class FileURITestCase(unittest.TestCase):
35 """Concrete tests for local files.
37 Basic tests to show that `lsst.daf.butler.ButlerURI` compatibility
38 import still works. Can be removed when deprecation period ends.
39 """
41 def setUp(self):
42 # Use a local tempdir because on macOS the temp dirs use symlinks
43 # so relsymlink gets quite confused.
44 self.tmpdir = makeTestTempDir(TESTDIR)
46 def tearDown(self):
47 removeTestTempDir(self.tmpdir)
49 def testFile(self):
50 file = os.path.join(self.tmpdir, "test.txt")
51 uri = ButlerURI(file)
52 self.assertIsInstance(uri, ButlerURI)
53 self.assertFalse(uri.exists(), f"{uri} should not exist")
54 self.assertEqual(uri.ospath, file)
56 path = pathlib.Path(file)
57 uri = ButlerURI(path)
58 self.assertEqual(uri.ospath, file)
60 content = "abcdefghijklmnopqrstuv\n"
61 uri.write(content.encode())
62 self.assertTrue(os.path.exists(file), "File should exist locally")
63 self.assertTrue(uri.exists(), f"{uri} should now exist")
64 self.assertEqual(uri.read().decode(), content)
65 self.assertEqual(uri.size(), len(content.encode()))
67 with self.assertRaises(FileNotFoundError):
68 ButlerURI("file/not/there.txt").size()
70 # Check that creating a URI from a URI returns the same thing
71 uri2 = ButlerURI(uri)
72 self.assertEqual(uri, uri2)
73 self.assertEqual(id(uri), id(uri2))
75 with self.assertRaises(ValueError):
76 # Scheme-less URIs are not allowed to support non-file roots
77 # at the present time. This may change in the future to become
78 # equivalent to ButlerURI.join()
79 ButlerURI("a/b.txt", root=ButlerURI("s3://bucket/a/b/"))
81 def testExtension(self):
82 file = ButlerURI(os.path.join(self.tmpdir, "test.txt"))
83 self.assertEqual(file.updatedExtension(None), file)
84 self.assertEqual(file.updatedExtension(".txt"), file)
85 self.assertEqual(id(file.updatedExtension(".txt")), id(file))
87 fits = file.updatedExtension(".fits.gz")
88 self.assertEqual(fits.basename(), "test.fits.gz")
89 self.assertEqual(fits.updatedExtension(".jpeg").basename(), "test.jpeg")
91 def testRelative(self):
92 """Check that we can get subpaths back from two URIs"""
93 parent = ButlerURI(self.tmpdir, forceDirectory=True, forceAbsolute=True)
94 self.assertTrue(parent.isdir())
95 child = ButlerURI(os.path.join(self.tmpdir, "dir1", "file.txt"), forceAbsolute=True)
97 self.assertEqual(child.relative_to(parent), "dir1/file.txt")
99 not_child = ButlerURI("/a/b/dir1/file.txt")
100 self.assertIsNone(not_child.relative_to(parent))
101 self.assertFalse(not_child.isdir())
103 not_directory = ButlerURI(os.path.join(self.tmpdir, "dir1", "file2.txt"))
104 self.assertIsNone(child.relative_to(not_directory))
106 # Relative URIs
107 parent = ButlerURI("a/b/", forceAbsolute=False)
108 child = ButlerURI("a/b/c/d.txt", forceAbsolute=False)
109 self.assertFalse(child.scheme)
110 self.assertEqual(child.relative_to(parent), "c/d.txt")
112 # File URI and schemeless URI
113 parent = ButlerURI("file:/a/b/c/")
114 child = ButlerURI("e/f/g.txt", forceAbsolute=False)
116 # If the child is relative and the parent is absolute we assume
117 # that the child is a child of the parent unless it uses ".."
118 self.assertEqual(child.relative_to(parent), "e/f/g.txt")
120 child = ButlerURI("../e/f/g.txt", forceAbsolute=False)
121 self.assertIsNone(child.relative_to(parent))
123 child = ButlerURI("../c/e/f/g.txt", forceAbsolute=False)
124 self.assertEqual(child.relative_to(parent), "e/f/g.txt")
126 # Test non-file root with relative path.
127 child = ButlerURI("e/f/g.txt", forceAbsolute=False)
128 parent = ButlerURI("s3://hello/a/b/c/")
129 self.assertEqual(child.relative_to(parent), "e/f/g.txt")
131 # Test with different netloc
132 child = ButlerURI("http://my.host/a/b/c.txt")
133 parent = ButlerURI("http://other.host/a/")
134 self.assertIsNone(child.relative_to(parent), f"{child}.relative_to({parent})")
136 # Schemeless absolute child.
137 # Schemeless absolute URI is constructed using root= parameter.
138 parent = ButlerURI("file:///a/b/c/")
139 child = ButlerURI("d/e.txt", root=parent)
140 self.assertEqual(child.relative_to(parent), "d/e.txt", f"{child}.relative_to({parent})")
142 parent = ButlerURI("c/", root="/a/b/")
143 self.assertEqual(child.relative_to(parent), "d/e.txt", f"{child}.relative_to({parent})")
145 # Absolute schemeless child with relative parent will always fail.
146 parent = ButlerURI("d/e.txt", forceAbsolute=False)
147 self.assertIsNone(child.relative_to(parent), f"{child}.relative_to({parent})")
149 def testParents(self):
150 """Test of splitting and parent walking."""
151 parent = ButlerURI(self.tmpdir, forceDirectory=True, forceAbsolute=True)
152 child_file = parent.join("subdir/file.txt")
153 self.assertFalse(child_file.isdir())
154 child_subdir, file = child_file.split()
155 self.assertEqual(file, "file.txt")
156 self.assertTrue(child_subdir.isdir())
157 self.assertEqual(child_file.dirname(), child_subdir)
158 self.assertEqual(child_file.basename(), file)
159 self.assertEqual(child_file.parent(), child_subdir)
160 derived_parent = child_subdir.parent()
161 self.assertEqual(derived_parent, parent)
162 self.assertTrue(derived_parent.isdir())
163 self.assertEqual(child_file.parent().parent(), parent)
165 def testEnvVar(self):
166 """Test that environment variables are expanded."""
168 with unittest.mock.patch.dict(os.environ, {"MY_TEST_DIR": "/a/b/c"}):
169 uri = ButlerURI("${MY_TEST_DIR}/d.txt")
170 self.assertEqual(uri.path, "/a/b/c/d.txt")
171 self.assertEqual(uri.scheme, "file")
173 # This will not expand
174 uri = ButlerURI("${MY_TEST_DIR}/d.txt", forceAbsolute=False)
175 self.assertEqual(uri.path, "${MY_TEST_DIR}/d.txt")
176 self.assertFalse(uri.scheme)
178 def testMkdir(self):
179 tmpdir = ButlerURI(self.tmpdir)
180 newdir = tmpdir.join("newdir/seconddir")
181 newdir.mkdir()
182 self.assertTrue(newdir.exists())
183 newfile = newdir.join("temp.txt")
184 newfile.write("Data".encode())
185 self.assertTrue(newfile.exists())
187 def testTransfer(self):
188 src = ButlerURI(os.path.join(self.tmpdir, "test.txt"))
189 content = "Content is some content\nwith something to say\n\n"
190 src.write(content.encode())
192 for mode in ("copy", "link", "hardlink", "symlink", "relsymlink"):
193 dest = ButlerURI(os.path.join(self.tmpdir, f"dest_{mode}.txt"))
194 dest.transfer_from(src, transfer=mode)
195 self.assertTrue(dest.exists(), f"Check that {dest} exists (transfer={mode})")
197 with open(dest.ospath, "r") as fh:
198 new_content = fh.read()
199 self.assertEqual(new_content, content)
201 if mode in ("symlink", "relsymlink"):
202 self.assertTrue(os.path.islink(dest.ospath), f"Check that {dest} is symlink")
204 # If the source and destination are hardlinks of each other
205 # the transfer should work even if overwrite=False.
206 if mode in ("link", "hardlink"):
207 dest.transfer_from(src, transfer=mode)
208 else:
209 with self.assertRaises(
210 FileExistsError, msg=f"Overwrite of {dest} should not be allowed ({mode})"
211 ):
212 dest.transfer_from(src, transfer=mode)
214 dest.transfer_from(src, transfer=mode, overwrite=True)
216 os.remove(dest.ospath)
218 b = src.read()
219 self.assertEqual(b.decode(), new_content)
221 nbytes = 10
222 subset = src.read(size=nbytes)
223 self.assertEqual(len(subset), nbytes)
224 self.assertEqual(subset.decode(), content[:nbytes])
226 with self.assertRaises(ValueError):
227 src.transfer_from(src, transfer="unknown")
229 def testTransferIdentical(self):
230 """Test overwrite of identical files."""
231 dir1 = ButlerURI(os.path.join(self.tmpdir, "dir1"), forceDirectory=True)
232 dir1.mkdir()
233 dir2 = os.path.join(self.tmpdir, "dir2")
234 os.symlink(dir1.ospath, dir2)
236 # Write a test file.
237 src_file = dir1.join("test.txt")
238 content = "0123456"
239 src_file.write(content.encode())
241 # Construct URI to destination that should be identical.
242 dest_file = ButlerURI(os.path.join(dir2), forceDirectory=True).join("test.txt")
243 self.assertTrue(dest_file.exists())
244 self.assertNotEqual(src_file, dest_file)
246 # Transfer it over itself.
247 dest_file.transfer_from(src_file, transfer="symlink", overwrite=True)
248 new_content = dest_file.read().decode()
249 self.assertEqual(content, new_content)
251 def testResource(self):
252 u = ButlerURI("resource://lsst.daf.butler/configs/datastore.yaml")
253 self.assertTrue(u.exists(), f"Check {u} exists")
255 content = u.read().decode()
256 self.assertTrue(content.startswith("datastore:"))
258 truncated = u.read(size=9).decode()
259 self.assertEqual(truncated, "datastore")
261 d = ButlerURI("resource://lsst.daf.butler/configs", forceDirectory=True)
262 self.assertTrue(u.exists(), f"Check directory {d} exists")
264 j = d.join("datastore.yaml")
265 self.assertEqual(u, j)
266 self.assertFalse(j.dirLike)
267 self.assertFalse(j.isdir())
268 not_there = d.join("not-there.yaml")
269 self.assertFalse(not_there.exists())
271 bad = ButlerURI("resource://bad.module/not.yaml")
272 multi = ButlerURI.mexists([u, bad, not_there])
273 self.assertTrue(multi[u])
274 self.assertFalse(multi[bad])
275 self.assertFalse(multi[not_there])
277 def testEscapes(self):
278 """Special characters in file paths"""
279 src = ButlerURI("bbb/???/test.txt", root=self.tmpdir, forceAbsolute=True)
280 self.assertFalse(src.scheme)
281 src.write(b"Some content")
282 self.assertTrue(src.exists())
284 # abspath always returns a file scheme
285 file = src.abspath()
286 self.assertTrue(file.exists())
287 self.assertIn("???", file.ospath)
288 self.assertNotIn("???", file.path)
290 file = file.updatedFile("tests??.txt")
291 self.assertNotIn("??.txt", file.path)
292 file.write(b"Other content")
293 self.assertEqual(file.read(), b"Other content")
295 src = src.updatedFile("tests??.txt")
296 self.assertIn("??.txt", src.path)
297 self.assertEqual(file.read(), src.read(), f"reading from {file.ospath} and {src.ospath}")
299 # File URI and schemeless URI
300 parent = ButlerURI("file:" + urllib.parse.quote("/a/b/c/de/??/"))
301 child = ButlerURI("e/f/g.txt", forceAbsolute=False)
302 self.assertEqual(child.relative_to(parent), "e/f/g.txt")
304 child = ButlerURI("e/f??#/g.txt", forceAbsolute=False)
305 self.assertEqual(child.relative_to(parent), "e/f??#/g.txt")
307 child = ButlerURI("file:" + urllib.parse.quote("/a/b/c/de/??/e/f??#/g.txt"))
308 self.assertEqual(child.relative_to(parent), "e/f??#/g.txt")
310 self.assertEqual(child.relativeToPathRoot, "a/b/c/de/??/e/f??#/g.txt")
312 # Schemeless so should not quote
313 dir = ButlerURI("bbb/???/", root=self.tmpdir, forceAbsolute=True, forceDirectory=True)
314 self.assertIn("???", dir.ospath)
315 self.assertIn("???", dir.path)
316 self.assertFalse(dir.scheme)
318 # dir.join() morphs into a file scheme
319 new = dir.join("test_j.txt")
320 self.assertIn("???", new.ospath, f"Checking {new}")
321 new.write(b"Content")
323 new2name = "###/test??.txt"
324 new2 = dir.join(new2name)
325 self.assertIn("???", new2.ospath)
326 new2.write(b"Content")
327 self.assertTrue(new2.ospath.endswith(new2name))
328 self.assertEqual(new.read(), new2.read())
330 fdir = dir.abspath()
331 self.assertNotIn("???", fdir.path)
332 self.assertIn("???", fdir.ospath)
333 self.assertEqual(fdir.scheme, "file")
334 fnew = dir.join("test_jf.txt")
335 fnew.write(b"Content")
337 fnew2 = fdir.join(new2name)
338 fnew2.write(b"Content")
339 self.assertTrue(fnew2.ospath.endswith(new2name))
340 self.assertNotIn("###", fnew2.path)
342 self.assertEqual(fnew.read(), fnew2.read())
344 # Test that children relative to schemeless and file schemes
345 # still return the same unquoted name
346 self.assertEqual(fnew2.relative_to(fdir), new2name, f"{fnew2}.relative_to({fdir})")
347 self.assertEqual(fnew2.relative_to(dir), new2name, f"{fnew2}.relative_to({dir})")
348 self.assertEqual(new2.relative_to(fdir), new2name, f"{new2}.relative_to({fdir})")
349 self.assertEqual(new2.relative_to(dir), new2name, f"{new2}.relative_to({dir})")
351 # Check for double quoting
352 plus_path = "/a/b/c+d/"
353 with self.assertLogs(level="WARNING"):
354 uri = ButlerURI(urllib.parse.quote(plus_path), forceDirectory=True)
355 self.assertEqual(uri.ospath, plus_path)
357 # Check that # is not escaped for schemeless URIs
358 hash_path = "/a/b#/c&d#xyz"
359 hpos = hash_path.rfind("#")
360 uri = ButlerURI(hash_path)
361 self.assertEqual(uri.ospath, hash_path[:hpos])
362 self.assertEqual(uri.fragment, hash_path[hpos + 1 :])
364 def testHash(self):
365 """Test that we can store URIs in sets and as keys."""
366 uri1 = ButlerURI(TESTDIR)
367 uri2 = uri1.join("test/")
368 s = {uri1, uri2}
369 self.assertIn(uri1, s)
371 d = {uri1: "1", uri2: "2"}
372 self.assertEqual(d[uri2], "2")
374 def testWalk(self):
375 """Test ButlerURI.walk()."""
376 test_dir_uri = ButlerURI(TESTDIR)
378 file = test_dir_uri.join("config/basic/butler.yaml")
379 found = list(ButlerURI.findFileResources([file]))
380 self.assertEqual(found[0], file)
382 # Compare against the full local paths
383 expected = set(
384 p for p in glob.glob(os.path.join(TESTDIR, "config", "**"), recursive=True) if os.path.isfile(p)
385 )
386 found = set(u.ospath for u in ButlerURI.findFileResources([test_dir_uri.join("config")]))
387 self.assertEqual(found, expected)
389 # Now solely the YAML files
390 expected_yaml = set(glob.glob(os.path.join(TESTDIR, "config", "**", "*.yaml"), recursive=True))
391 found = set(
392 u.ospath
393 for u in ButlerURI.findFileResources([test_dir_uri.join("config")], file_filter=r".*\.yaml$")
394 )
395 self.assertEqual(found, expected_yaml)
397 # Now two explicit directories and a file
398 expected = set(glob.glob(os.path.join(TESTDIR, "config", "**", "basic", "*.yaml"), recursive=True))
399 expected.update(
400 set(glob.glob(os.path.join(TESTDIR, "config", "**", "templates", "*.yaml"), recursive=True))
401 )
402 expected.add(file.ospath)
404 found = set(
405 u.ospath
406 for u in ButlerURI.findFileResources(
407 [file, test_dir_uri.join("config/basic"), test_dir_uri.join("config/templates")],
408 file_filter=r".*\.yaml$",
409 )
410 )
411 self.assertEqual(found, expected)
413 # Group by directory -- find everything and compare it with what
414 # we expected to be there in total. We expect to find 9 directories
415 # containing yaml files so make sure we only iterate 9 times.
416 found_yaml = set()
417 counter = 0
418 for uris in ButlerURI.findFileResources(
419 [file, test_dir_uri.join("config/")], file_filter=r".*\.yaml$", grouped=True
420 ):
421 found = set(u.ospath for u in uris)
422 if found:
423 counter += 1
425 found_yaml.update(found)
427 self.assertEqual(found_yaml, expected_yaml)
428 self.assertEqual(counter, 9)
430 # Grouping but check that single files are returned in a single group
431 # at the end
432 file2 = test_dir_uri.join("config/templates/templates-bad.yaml")
433 found = list(
434 ButlerURI.findFileResources([file, file2, test_dir_uri.join("config/dbAuth")], grouped=True)
435 )
436 self.assertEqual(len(found), 2)
437 self.assertEqual(list(found[1]), [file, file2])
439 with self.assertRaises(ValueError):
440 list(file.walk())
442 def testRootURI(self):
443 """Test ButlerURI.root_uri()."""
444 uri = ButlerURI("https://www.notexist.com:8080/file/test")
445 uri2 = ButlerURI("s3://www.notexist.com/file/test")
446 self.assertEqual(uri.root_uri().geturl(), "https://www.notexist.com:8080/")
447 self.assertEqual(uri2.root_uri().geturl(), "s3://www.notexist.com/")
449 def testJoin(self):
450 """Test .join method."""
452 root_str = "s3://bucket/hsc/payload/"
453 root = ButlerURI(root_str)
455 self.assertEqual(root.join("b/test.txt").geturl(), f"{root_str}b/test.txt")
456 add_dir = root.join("b/c/d/")
457 self.assertTrue(add_dir.isdir())
458 self.assertEqual(add_dir.geturl(), f"{root_str}b/c/d/")
460 quote_example = "b&c.t@x#t"
461 needs_quote = root.join(quote_example)
462 self.assertEqual(needs_quote.unquoted_path, f"/hsc/payload/{quote_example}")
464 other = ButlerURI("file://localhost/test.txt")
465 self.assertEqual(root.join(other), other)
466 self.assertEqual(other.join("b/new.txt").geturl(), "file://localhost/b/new.txt")
468 joined = ButlerURI("s3://bucket/hsc/payload/").join(ButlerURI("test.qgraph", forceAbsolute=False))
469 self.assertEqual(joined, ButlerURI("s3://bucket/hsc/payload/test.qgraph"))
471 with self.assertRaises(ValueError):
472 ButlerURI("s3://bucket/hsc/payload/").join(ButlerURI("test.qgraph"))
474 def testTemporary(self):
475 with ButlerURI.temporary_uri(suffix=".json") as tmp:
476 self.assertEqual(tmp.getExtension(), ".json", f"uri: {tmp}")
477 self.assertTrue(tmp.isabs(), f"uri: {tmp}")
478 self.assertFalse(tmp.exists(), f"uri: {tmp}")
479 tmp.write(b"abcd")
480 self.assertTrue(tmp.exists(), f"uri: {tmp}")
481 self.assertTrue(tmp.isTemporary)
482 self.assertFalse(tmp.exists(), f"uri: {tmp}")
484 tmpdir = ButlerURI(self.tmpdir, forceDirectory=True)
485 with ButlerURI.temporary_uri(prefix=tmpdir, suffix=".yaml") as tmp:
486 # Use a specified tmpdir and check it is okay for the file
487 # to not be created.
488 self.assertFalse(tmp.exists(), f"uri: {tmp}")
489 self.assertTrue(tmpdir.exists(), f"uri: {tmpdir} still exists")
492if __name__ == "__main__": 492 ↛ 493line 492 didn't jump to line 493, because the condition on line 492 was never true
493 unittest.main()