Coverage for tests/test_uri.py: 9%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

304 statements  

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22import glob 

23import os 

24import pathlib 

25import unittest 

26import urllib.parse 

27 

28from lsst.daf.butler import ButlerURI 

29from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir 

30 

31TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

32 

33 

34class FileURITestCase(unittest.TestCase): 

35 """Concrete tests for local files. 

36 

37 Basic tests to show that `lsst.daf.butler.ButlerURI` compatibility 

38 import still works. Can be removed when deprecation period ends. 

39 """ 

40 

41 def setUp(self): 

42 # Use a local tempdir because on macOS the temp dirs use symlinks 

43 # so relsymlink gets quite confused. 

44 self.tmpdir = makeTestTempDir(TESTDIR) 

45 

46 def tearDown(self): 

47 removeTestTempDir(self.tmpdir) 

48 

49 def testFile(self): 

50 file = os.path.join(self.tmpdir, "test.txt") 

51 uri = ButlerURI(file) 

52 self.assertIsInstance(uri, ButlerURI) 

53 self.assertFalse(uri.exists(), f"{uri} should not exist") 

54 self.assertEqual(uri.ospath, file) 

55 

56 path = pathlib.Path(file) 

57 uri = ButlerURI(path) 

58 self.assertEqual(uri.ospath, file) 

59 

60 content = "abcdefghijklmnopqrstuv\n" 

61 uri.write(content.encode()) 

62 self.assertTrue(os.path.exists(file), "File should exist locally") 

63 self.assertTrue(uri.exists(), f"{uri} should now exist") 

64 self.assertEqual(uri.read().decode(), content) 

65 self.assertEqual(uri.size(), len(content.encode())) 

66 

67 with self.assertRaises(FileNotFoundError): 

68 ButlerURI("file/not/there.txt").size() 

69 

70 # Check that creating a URI from a URI returns the same thing 

71 uri2 = ButlerURI(uri) 

72 self.assertEqual(uri, uri2) 

73 self.assertEqual(id(uri), id(uri2)) 

74 

75 with self.assertRaises(ValueError): 

76 # Scheme-less URIs are not allowed to support non-file roots 

77 # at the present time. This may change in the future to become 

78 # equivalent to ButlerURI.join() 

79 ButlerURI("a/b.txt", root=ButlerURI("s3://bucket/a/b/")) 

80 

81 def testExtension(self): 

82 file = ButlerURI(os.path.join(self.tmpdir, "test.txt")) 

83 self.assertEqual(file.updatedExtension(None), file) 

84 self.assertEqual(file.updatedExtension(".txt"), file) 

85 self.assertEqual(id(file.updatedExtension(".txt")), id(file)) 

86 

87 fits = file.updatedExtension(".fits.gz") 

88 self.assertEqual(fits.basename(), "test.fits.gz") 

89 self.assertEqual(fits.updatedExtension(".jpeg").basename(), "test.jpeg") 

90 

91 def testRelative(self): 

92 """Check that we can get subpaths back from two URIs""" 

93 parent = ButlerURI(self.tmpdir, forceDirectory=True, forceAbsolute=True) 

94 self.assertTrue(parent.isdir()) 

95 child = ButlerURI(os.path.join(self.tmpdir, "dir1", "file.txt"), forceAbsolute=True) 

96 

97 self.assertEqual(child.relative_to(parent), "dir1/file.txt") 

98 

99 not_child = ButlerURI("/a/b/dir1/file.txt") 

100 self.assertIsNone(not_child.relative_to(parent)) 

101 self.assertFalse(not_child.isdir()) 

102 

103 not_directory = ButlerURI(os.path.join(self.tmpdir, "dir1", "file2.txt")) 

104 self.assertIsNone(child.relative_to(not_directory)) 

105 

106 # Relative URIs 

107 parent = ButlerURI("a/b/", forceAbsolute=False) 

108 child = ButlerURI("a/b/c/d.txt", forceAbsolute=False) 

109 self.assertFalse(child.scheme) 

110 self.assertEqual(child.relative_to(parent), "c/d.txt") 

111 

112 # File URI and schemeless URI 

113 parent = ButlerURI("file:/a/b/c/") 

114 child = ButlerURI("e/f/g.txt", forceAbsolute=False) 

115 

116 # If the child is relative and the parent is absolute we assume 

117 # that the child is a child of the parent unless it uses ".." 

118 self.assertEqual(child.relative_to(parent), "e/f/g.txt") 

119 

120 child = ButlerURI("../e/f/g.txt", forceAbsolute=False) 

121 self.assertIsNone(child.relative_to(parent)) 

122 

123 child = ButlerURI("../c/e/f/g.txt", forceAbsolute=False) 

124 self.assertEqual(child.relative_to(parent), "e/f/g.txt") 

125 

126 # Test non-file root with relative path. 

127 child = ButlerURI("e/f/g.txt", forceAbsolute=False) 

128 parent = ButlerURI("s3://hello/a/b/c/") 

129 self.assertEqual(child.relative_to(parent), "e/f/g.txt") 

130 

131 # Test with different netloc 

132 child = ButlerURI("http://my.host/a/b/c.txt") 

133 parent = ButlerURI("http://other.host/a/") 

134 self.assertIsNone(child.relative_to(parent), f"{child}.relative_to({parent})") 

135 

136 # Schemeless absolute child. 

137 # Schemeless absolute URI is constructed using root= parameter. 

138 parent = ButlerURI("file:///a/b/c/") 

139 child = ButlerURI("d/e.txt", root=parent) 

140 self.assertEqual(child.relative_to(parent), "d/e.txt", f"{child}.relative_to({parent})") 

141 

142 parent = ButlerURI("c/", root="/a/b/") 

143 self.assertEqual(child.relative_to(parent), "d/e.txt", f"{child}.relative_to({parent})") 

144 

145 # Absolute schemeless child with relative parent will always fail. 

146 parent = ButlerURI("d/e.txt", forceAbsolute=False) 

147 self.assertIsNone(child.relative_to(parent), f"{child}.relative_to({parent})") 

148 

149 def testParents(self): 

150 """Test of splitting and parent walking.""" 

151 parent = ButlerURI(self.tmpdir, forceDirectory=True, forceAbsolute=True) 

152 child_file = parent.join("subdir/file.txt") 

153 self.assertFalse(child_file.isdir()) 

154 child_subdir, file = child_file.split() 

155 self.assertEqual(file, "file.txt") 

156 self.assertTrue(child_subdir.isdir()) 

157 self.assertEqual(child_file.dirname(), child_subdir) 

158 self.assertEqual(child_file.basename(), file) 

159 self.assertEqual(child_file.parent(), child_subdir) 

160 derived_parent = child_subdir.parent() 

161 self.assertEqual(derived_parent, parent) 

162 self.assertTrue(derived_parent.isdir()) 

163 self.assertEqual(child_file.parent().parent(), parent) 

164 

165 def testEnvVar(self): 

166 """Test that environment variables are expanded.""" 

167 

168 with unittest.mock.patch.dict(os.environ, {"MY_TEST_DIR": "/a/b/c"}): 

169 uri = ButlerURI("${MY_TEST_DIR}/d.txt") 

170 self.assertEqual(uri.path, "/a/b/c/d.txt") 

171 self.assertEqual(uri.scheme, "file") 

172 

173 # This will not expand 

174 uri = ButlerURI("${MY_TEST_DIR}/d.txt", forceAbsolute=False) 

175 self.assertEqual(uri.path, "${MY_TEST_DIR}/d.txt") 

176 self.assertFalse(uri.scheme) 

177 

178 def testMkdir(self): 

179 tmpdir = ButlerURI(self.tmpdir) 

180 newdir = tmpdir.join("newdir/seconddir") 

181 newdir.mkdir() 

182 self.assertTrue(newdir.exists()) 

183 newfile = newdir.join("temp.txt") 

184 newfile.write("Data".encode()) 

185 self.assertTrue(newfile.exists()) 

186 

187 def testTransfer(self): 

188 src = ButlerURI(os.path.join(self.tmpdir, "test.txt")) 

189 content = "Content is some content\nwith something to say\n\n" 

190 src.write(content.encode()) 

191 

192 for mode in ("copy", "link", "hardlink", "symlink", "relsymlink"): 

193 dest = ButlerURI(os.path.join(self.tmpdir, f"dest_{mode}.txt")) 

194 dest.transfer_from(src, transfer=mode) 

195 self.assertTrue(dest.exists(), f"Check that {dest} exists (transfer={mode})") 

196 

197 with open(dest.ospath, "r") as fh: 

198 new_content = fh.read() 

199 self.assertEqual(new_content, content) 

200 

201 if mode in ("symlink", "relsymlink"): 

202 self.assertTrue(os.path.islink(dest.ospath), f"Check that {dest} is symlink") 

203 

204 # If the source and destination are hardlinks of each other 

205 # the transfer should work even if overwrite=False. 

206 if mode in ("link", "hardlink"): 

207 dest.transfer_from(src, transfer=mode) 

208 else: 

209 with self.assertRaises( 

210 FileExistsError, msg=f"Overwrite of {dest} should not be allowed ({mode})" 

211 ): 

212 dest.transfer_from(src, transfer=mode) 

213 

214 dest.transfer_from(src, transfer=mode, overwrite=True) 

215 

216 os.remove(dest.ospath) 

217 

218 b = src.read() 

219 self.assertEqual(b.decode(), new_content) 

220 

221 nbytes = 10 

222 subset = src.read(size=nbytes) 

223 self.assertEqual(len(subset), nbytes) 

224 self.assertEqual(subset.decode(), content[:nbytes]) 

225 

226 with self.assertRaises(ValueError): 

227 src.transfer_from(src, transfer="unknown") 

228 

229 def testTransferIdentical(self): 

230 """Test overwrite of identical files.""" 

231 dir1 = ButlerURI(os.path.join(self.tmpdir, "dir1"), forceDirectory=True) 

232 dir1.mkdir() 

233 dir2 = os.path.join(self.tmpdir, "dir2") 

234 os.symlink(dir1.ospath, dir2) 

235 

236 # Write a test file. 

237 src_file = dir1.join("test.txt") 

238 content = "0123456" 

239 src_file.write(content.encode()) 

240 

241 # Construct URI to destination that should be identical. 

242 dest_file = ButlerURI(os.path.join(dir2), forceDirectory=True).join("test.txt") 

243 self.assertTrue(dest_file.exists()) 

244 self.assertNotEqual(src_file, dest_file) 

245 

246 # Transfer it over itself. 

247 dest_file.transfer_from(src_file, transfer="symlink", overwrite=True) 

248 new_content = dest_file.read().decode() 

249 self.assertEqual(content, new_content) 

250 

251 def testResource(self): 

252 u = ButlerURI("resource://lsst.daf.butler/configs/datastore.yaml") 

253 self.assertTrue(u.exists(), f"Check {u} exists") 

254 

255 content = u.read().decode() 

256 self.assertTrue(content.startswith("datastore:")) 

257 

258 truncated = u.read(size=9).decode() 

259 self.assertEqual(truncated, "datastore") 

260 

261 d = ButlerURI("resource://lsst.daf.butler/configs", forceDirectory=True) 

262 self.assertTrue(u.exists(), f"Check directory {d} exists") 

263 

264 j = d.join("datastore.yaml") 

265 self.assertEqual(u, j) 

266 self.assertFalse(j.dirLike) 

267 self.assertFalse(j.isdir()) 

268 not_there = d.join("not-there.yaml") 

269 self.assertFalse(not_there.exists()) 

270 

271 bad = ButlerURI("resource://bad.module/not.yaml") 

272 multi = ButlerURI.mexists([u, bad, not_there]) 

273 self.assertTrue(multi[u]) 

274 self.assertFalse(multi[bad]) 

275 self.assertFalse(multi[not_there]) 

276 

277 def testEscapes(self): 

278 """Special characters in file paths""" 

279 src = ButlerURI("bbb/???/test.txt", root=self.tmpdir, forceAbsolute=True) 

280 self.assertFalse(src.scheme) 

281 src.write(b"Some content") 

282 self.assertTrue(src.exists()) 

283 

284 # abspath always returns a file scheme 

285 file = src.abspath() 

286 self.assertTrue(file.exists()) 

287 self.assertIn("???", file.ospath) 

288 self.assertNotIn("???", file.path) 

289 

290 file = file.updatedFile("tests??.txt") 

291 self.assertNotIn("??.txt", file.path) 

292 file.write(b"Other content") 

293 self.assertEqual(file.read(), b"Other content") 

294 

295 src = src.updatedFile("tests??.txt") 

296 self.assertIn("??.txt", src.path) 

297 self.assertEqual(file.read(), src.read(), f"reading from {file.ospath} and {src.ospath}") 

298 

299 # File URI and schemeless URI 

300 parent = ButlerURI("file:" + urllib.parse.quote("/a/b/c/de/??/")) 

301 child = ButlerURI("e/f/g.txt", forceAbsolute=False) 

302 self.assertEqual(child.relative_to(parent), "e/f/g.txt") 

303 

304 child = ButlerURI("e/f??#/g.txt", forceAbsolute=False) 

305 self.assertEqual(child.relative_to(parent), "e/f??#/g.txt") 

306 

307 child = ButlerURI("file:" + urllib.parse.quote("/a/b/c/de/??/e/f??#/g.txt")) 

308 self.assertEqual(child.relative_to(parent), "e/f??#/g.txt") 

309 

310 self.assertEqual(child.relativeToPathRoot, "a/b/c/de/??/e/f??#/g.txt") 

311 

312 # Schemeless so should not quote 

313 dir = ButlerURI("bbb/???/", root=self.tmpdir, forceAbsolute=True, forceDirectory=True) 

314 self.assertIn("???", dir.ospath) 

315 self.assertIn("???", dir.path) 

316 self.assertFalse(dir.scheme) 

317 

318 # dir.join() morphs into a file scheme 

319 new = dir.join("test_j.txt") 

320 self.assertIn("???", new.ospath, f"Checking {new}") 

321 new.write(b"Content") 

322 

323 new2name = "###/test??.txt" 

324 new2 = dir.join(new2name) 

325 self.assertIn("???", new2.ospath) 

326 new2.write(b"Content") 

327 self.assertTrue(new2.ospath.endswith(new2name)) 

328 self.assertEqual(new.read(), new2.read()) 

329 

330 fdir = dir.abspath() 

331 self.assertNotIn("???", fdir.path) 

332 self.assertIn("???", fdir.ospath) 

333 self.assertEqual(fdir.scheme, "file") 

334 fnew = dir.join("test_jf.txt") 

335 fnew.write(b"Content") 

336 

337 fnew2 = fdir.join(new2name) 

338 fnew2.write(b"Content") 

339 self.assertTrue(fnew2.ospath.endswith(new2name)) 

340 self.assertNotIn("###", fnew2.path) 

341 

342 self.assertEqual(fnew.read(), fnew2.read()) 

343 

344 # Test that children relative to schemeless and file schemes 

345 # still return the same unquoted name 

346 self.assertEqual(fnew2.relative_to(fdir), new2name, f"{fnew2}.relative_to({fdir})") 

347 self.assertEqual(fnew2.relative_to(dir), new2name, f"{fnew2}.relative_to({dir})") 

348 self.assertEqual(new2.relative_to(fdir), new2name, f"{new2}.relative_to({fdir})") 

349 self.assertEqual(new2.relative_to(dir), new2name, f"{new2}.relative_to({dir})") 

350 

351 # Check for double quoting 

352 plus_path = "/a/b/c+d/" 

353 with self.assertLogs(level="WARNING"): 

354 uri = ButlerURI(urllib.parse.quote(plus_path), forceDirectory=True) 

355 self.assertEqual(uri.ospath, plus_path) 

356 

357 # Check that # is not escaped for schemeless URIs 

358 hash_path = "/a/b#/c&d#xyz" 

359 hpos = hash_path.rfind("#") 

360 uri = ButlerURI(hash_path) 

361 self.assertEqual(uri.ospath, hash_path[:hpos]) 

362 self.assertEqual(uri.fragment, hash_path[hpos + 1 :]) 

363 

364 def testHash(self): 

365 """Test that we can store URIs in sets and as keys.""" 

366 uri1 = ButlerURI(TESTDIR) 

367 uri2 = uri1.join("test/") 

368 s = {uri1, uri2} 

369 self.assertIn(uri1, s) 

370 

371 d = {uri1: "1", uri2: "2"} 

372 self.assertEqual(d[uri2], "2") 

373 

374 def testWalk(self): 

375 """Test ButlerURI.walk().""" 

376 test_dir_uri = ButlerURI(TESTDIR) 

377 

378 file = test_dir_uri.join("config/basic/butler.yaml") 

379 found = list(ButlerURI.findFileResources([file])) 

380 self.assertEqual(found[0], file) 

381 

382 # Compare against the full local paths 

383 expected = set( 

384 p for p in glob.glob(os.path.join(TESTDIR, "config", "**"), recursive=True) if os.path.isfile(p) 

385 ) 

386 found = set(u.ospath for u in ButlerURI.findFileResources([test_dir_uri.join("config")])) 

387 self.assertEqual(found, expected) 

388 

389 # Now solely the YAML files 

390 expected_yaml = set(glob.glob(os.path.join(TESTDIR, "config", "**", "*.yaml"), recursive=True)) 

391 found = set( 

392 u.ospath 

393 for u in ButlerURI.findFileResources([test_dir_uri.join("config")], file_filter=r".*\.yaml$") 

394 ) 

395 self.assertEqual(found, expected_yaml) 

396 

397 # Now two explicit directories and a file 

398 expected = set(glob.glob(os.path.join(TESTDIR, "config", "**", "basic", "*.yaml"), recursive=True)) 

399 expected.update( 

400 set(glob.glob(os.path.join(TESTDIR, "config", "**", "templates", "*.yaml"), recursive=True)) 

401 ) 

402 expected.add(file.ospath) 

403 

404 found = set( 

405 u.ospath 

406 for u in ButlerURI.findFileResources( 

407 [file, test_dir_uri.join("config/basic"), test_dir_uri.join("config/templates")], 

408 file_filter=r".*\.yaml$", 

409 ) 

410 ) 

411 self.assertEqual(found, expected) 

412 

413 # Group by directory -- find everything and compare it with what 

414 # we expected to be there in total. We expect to find 9 directories 

415 # containing yaml files so make sure we only iterate 9 times. 

416 found_yaml = set() 

417 counter = 0 

418 for uris in ButlerURI.findFileResources( 

419 [file, test_dir_uri.join("config/")], file_filter=r".*\.yaml$", grouped=True 

420 ): 

421 found = set(u.ospath for u in uris) 

422 if found: 

423 counter += 1 

424 

425 found_yaml.update(found) 

426 

427 self.assertEqual(found_yaml, expected_yaml) 

428 self.assertEqual(counter, 9) 

429 

430 # Grouping but check that single files are returned in a single group 

431 # at the end 

432 file2 = test_dir_uri.join("config/templates/templates-bad.yaml") 

433 found = list( 

434 ButlerURI.findFileResources([file, file2, test_dir_uri.join("config/dbAuth")], grouped=True) 

435 ) 

436 self.assertEqual(len(found), 2) 

437 self.assertEqual(list(found[1]), [file, file2]) 

438 

439 with self.assertRaises(ValueError): 

440 list(file.walk()) 

441 

442 def testRootURI(self): 

443 """Test ButlerURI.root_uri().""" 

444 uri = ButlerURI("https://www.notexist.com:8080/file/test") 

445 uri2 = ButlerURI("s3://www.notexist.com/file/test") 

446 self.assertEqual(uri.root_uri().geturl(), "https://www.notexist.com:8080/") 

447 self.assertEqual(uri2.root_uri().geturl(), "s3://www.notexist.com/") 

448 

449 def testJoin(self): 

450 """Test .join method.""" 

451 

452 root_str = "s3://bucket/hsc/payload/" 

453 root = ButlerURI(root_str) 

454 

455 self.assertEqual(root.join("b/test.txt").geturl(), f"{root_str}b/test.txt") 

456 add_dir = root.join("b/c/d/") 

457 self.assertTrue(add_dir.isdir()) 

458 self.assertEqual(add_dir.geturl(), f"{root_str}b/c/d/") 

459 

460 quote_example = "b&c.t@x#t" 

461 needs_quote = root.join(quote_example) 

462 self.assertEqual(needs_quote.unquoted_path, f"/hsc/payload/{quote_example}") 

463 

464 other = ButlerURI("file://localhost/test.txt") 

465 self.assertEqual(root.join(other), other) 

466 self.assertEqual(other.join("b/new.txt").geturl(), "file://localhost/b/new.txt") 

467 

468 joined = ButlerURI("s3://bucket/hsc/payload/").join(ButlerURI("test.qgraph", forceAbsolute=False)) 

469 self.assertEqual(joined, ButlerURI("s3://bucket/hsc/payload/test.qgraph")) 

470 

471 with self.assertRaises(ValueError): 

472 ButlerURI("s3://bucket/hsc/payload/").join(ButlerURI("test.qgraph")) 

473 

474 def testTemporary(self): 

475 with ButlerURI.temporary_uri(suffix=".json") as tmp: 

476 self.assertEqual(tmp.getExtension(), ".json", f"uri: {tmp}") 

477 self.assertTrue(tmp.isabs(), f"uri: {tmp}") 

478 self.assertFalse(tmp.exists(), f"uri: {tmp}") 

479 tmp.write(b"abcd") 

480 self.assertTrue(tmp.exists(), f"uri: {tmp}") 

481 self.assertTrue(tmp.isTemporary) 

482 self.assertFalse(tmp.exists(), f"uri: {tmp}") 

483 

484 tmpdir = ButlerURI(self.tmpdir, forceDirectory=True) 

485 with ButlerURI.temporary_uri(prefix=tmpdir, suffix=".yaml") as tmp: 

486 # Use a specified tmpdir and check it is okay for the file 

487 # to not be created. 

488 self.assertFalse(tmp.exists(), f"uri: {tmp}") 

489 self.assertTrue(tmpdir.exists(), f"uri: {tmpdir} still exists") 

490 

491 

492if __name__ == "__main__": 492 ↛ 493line 492 didn't jump to line 493, because the condition on line 492 was never true

493 unittest.main()