Coverage for tests/test_uri.py: 9%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

303 statements  

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22import glob 

23import os 

24import pathlib 

25import unittest 

26import urllib.parse 

27 

28from lsst.daf.butler import ButlerURI 

29from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir 

30 

31TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

32 

33 

34class FileURITestCase(unittest.TestCase): 

35 """Concrete tests for local files. 

36 

37 Basic tests to show that `lsst.daf.butler.ButlerURI` compatibility 

38 import still works. Can be removed when deprecation period ends. 

39 """ 

40 

41 def setUp(self): 

42 # Use a local tempdir because on macOS the temp dirs use symlinks 

43 # so relsymlink gets quite confused. 

44 self.tmpdir = makeTestTempDir(TESTDIR) 

45 

46 def tearDown(self): 

47 removeTestTempDir(self.tmpdir) 

48 

49 def testFile(self): 

50 file = os.path.join(self.tmpdir, "test.txt") 

51 uri = ButlerURI(file) 

52 self.assertFalse(uri.exists(), f"{uri} should not exist") 

53 self.assertEqual(uri.ospath, file) 

54 

55 path = pathlib.Path(file) 

56 uri = ButlerURI(path) 

57 self.assertEqual(uri.ospath, file) 

58 

59 content = "abcdefghijklmnopqrstuv\n" 

60 uri.write(content.encode()) 

61 self.assertTrue(os.path.exists(file), "File should exist locally") 

62 self.assertTrue(uri.exists(), f"{uri} should now exist") 

63 self.assertEqual(uri.read().decode(), content) 

64 self.assertEqual(uri.size(), len(content.encode())) 

65 

66 with self.assertRaises(FileNotFoundError): 

67 ButlerURI("file/not/there.txt").size() 

68 

69 # Check that creating a URI from a URI returns the same thing 

70 uri2 = ButlerURI(uri) 

71 self.assertEqual(uri, uri2) 

72 self.assertEqual(id(uri), id(uri2)) 

73 

74 with self.assertRaises(ValueError): 

75 # Scheme-less URIs are not allowed to support non-file roots 

76 # at the present time. This may change in the future to become 

77 # equivalent to ButlerURI.join() 

78 ButlerURI("a/b.txt", root=ButlerURI("s3://bucket/a/b/")) 

79 

80 def testExtension(self): 

81 file = ButlerURI(os.path.join(self.tmpdir, "test.txt")) 

82 self.assertEqual(file.updatedExtension(None), file) 

83 self.assertEqual(file.updatedExtension(".txt"), file) 

84 self.assertEqual(id(file.updatedExtension(".txt")), id(file)) 

85 

86 fits = file.updatedExtension(".fits.gz") 

87 self.assertEqual(fits.basename(), "test.fits.gz") 

88 self.assertEqual(fits.updatedExtension(".jpeg").basename(), "test.jpeg") 

89 

90 def testRelative(self): 

91 """Check that we can get subpaths back from two URIs""" 

92 parent = ButlerURI(self.tmpdir, forceDirectory=True, forceAbsolute=True) 

93 self.assertTrue(parent.isdir()) 

94 child = ButlerURI(os.path.join(self.tmpdir, "dir1", "file.txt"), forceAbsolute=True) 

95 

96 self.assertEqual(child.relative_to(parent), "dir1/file.txt") 

97 

98 not_child = ButlerURI("/a/b/dir1/file.txt") 

99 self.assertIsNone(not_child.relative_to(parent)) 

100 self.assertFalse(not_child.isdir()) 

101 

102 not_directory = ButlerURI(os.path.join(self.tmpdir, "dir1", "file2.txt")) 

103 self.assertIsNone(child.relative_to(not_directory)) 

104 

105 # Relative URIs 

106 parent = ButlerURI("a/b/", forceAbsolute=False) 

107 child = ButlerURI("a/b/c/d.txt", forceAbsolute=False) 

108 self.assertFalse(child.scheme) 

109 self.assertEqual(child.relative_to(parent), "c/d.txt") 

110 

111 # File URI and schemeless URI 

112 parent = ButlerURI("file:/a/b/c/") 

113 child = ButlerURI("e/f/g.txt", forceAbsolute=False) 

114 

115 # If the child is relative and the parent is absolute we assume 

116 # that the child is a child of the parent unless it uses ".." 

117 self.assertEqual(child.relative_to(parent), "e/f/g.txt") 

118 

119 child = ButlerURI("../e/f/g.txt", forceAbsolute=False) 

120 self.assertIsNone(child.relative_to(parent)) 

121 

122 child = ButlerURI("../c/e/f/g.txt", forceAbsolute=False) 

123 self.assertEqual(child.relative_to(parent), "e/f/g.txt") 

124 

125 # Test non-file root with relative path. 

126 child = ButlerURI("e/f/g.txt", forceAbsolute=False) 

127 parent = ButlerURI("s3://hello/a/b/c/") 

128 self.assertEqual(child.relative_to(parent), "e/f/g.txt") 

129 

130 # Test with different netloc 

131 child = ButlerURI("http://my.host/a/b/c.txt") 

132 parent = ButlerURI("http://other.host/a/") 

133 self.assertIsNone(child.relative_to(parent), f"{child}.relative_to({parent})") 

134 

135 # Schemeless absolute child. 

136 # Schemeless absolute URI is constructed using root= parameter. 

137 parent = ButlerURI("file:///a/b/c/") 

138 child = ButlerURI("d/e.txt", root=parent) 

139 self.assertEqual(child.relative_to(parent), "d/e.txt", f"{child}.relative_to({parent})") 

140 

141 parent = ButlerURI("c/", root="/a/b/") 

142 self.assertEqual(child.relative_to(parent), "d/e.txt", f"{child}.relative_to({parent})") 

143 

144 # Absolute schemeless child with relative parent will always fail. 

145 parent = ButlerURI("d/e.txt", forceAbsolute=False) 

146 self.assertIsNone(child.relative_to(parent), f"{child}.relative_to({parent})") 

147 

148 def testParents(self): 

149 """Test of splitting and parent walking.""" 

150 parent = ButlerURI(self.tmpdir, forceDirectory=True, forceAbsolute=True) 

151 child_file = parent.join("subdir/file.txt") 

152 self.assertFalse(child_file.isdir()) 

153 child_subdir, file = child_file.split() 

154 self.assertEqual(file, "file.txt") 

155 self.assertTrue(child_subdir.isdir()) 

156 self.assertEqual(child_file.dirname(), child_subdir) 

157 self.assertEqual(child_file.basename(), file) 

158 self.assertEqual(child_file.parent(), child_subdir) 

159 derived_parent = child_subdir.parent() 

160 self.assertEqual(derived_parent, parent) 

161 self.assertTrue(derived_parent.isdir()) 

162 self.assertEqual(child_file.parent().parent(), parent) 

163 

164 def testEnvVar(self): 

165 """Test that environment variables are expanded.""" 

166 

167 with unittest.mock.patch.dict(os.environ, {"MY_TEST_DIR": "/a/b/c"}): 

168 uri = ButlerURI("${MY_TEST_DIR}/d.txt") 

169 self.assertEqual(uri.path, "/a/b/c/d.txt") 

170 self.assertEqual(uri.scheme, "file") 

171 

172 # This will not expand 

173 uri = ButlerURI("${MY_TEST_DIR}/d.txt", forceAbsolute=False) 

174 self.assertEqual(uri.path, "${MY_TEST_DIR}/d.txt") 

175 self.assertFalse(uri.scheme) 

176 

177 def testMkdir(self): 

178 tmpdir = ButlerURI(self.tmpdir) 

179 newdir = tmpdir.join("newdir/seconddir") 

180 newdir.mkdir() 

181 self.assertTrue(newdir.exists()) 

182 newfile = newdir.join("temp.txt") 

183 newfile.write("Data".encode()) 

184 self.assertTrue(newfile.exists()) 

185 

186 def testTransfer(self): 

187 src = ButlerURI(os.path.join(self.tmpdir, "test.txt")) 

188 content = "Content is some content\nwith something to say\n\n" 

189 src.write(content.encode()) 

190 

191 for mode in ("copy", "link", "hardlink", "symlink", "relsymlink"): 

192 dest = ButlerURI(os.path.join(self.tmpdir, f"dest_{mode}.txt")) 

193 dest.transfer_from(src, transfer=mode) 

194 self.assertTrue(dest.exists(), f"Check that {dest} exists (transfer={mode})") 

195 

196 with open(dest.ospath, "r") as fh: 

197 new_content = fh.read() 

198 self.assertEqual(new_content, content) 

199 

200 if mode in ("symlink", "relsymlink"): 

201 self.assertTrue(os.path.islink(dest.ospath), f"Check that {dest} is symlink") 

202 

203 # If the source and destination are hardlinks of each other 

204 # the transfer should work even if overwrite=False. 

205 if mode in ("link", "hardlink"): 

206 dest.transfer_from(src, transfer=mode) 

207 else: 

208 with self.assertRaises( 

209 FileExistsError, msg=f"Overwrite of {dest} should not be allowed ({mode})" 

210 ): 

211 dest.transfer_from(src, transfer=mode) 

212 

213 dest.transfer_from(src, transfer=mode, overwrite=True) 

214 

215 os.remove(dest.ospath) 

216 

217 b = src.read() 

218 self.assertEqual(b.decode(), new_content) 

219 

220 nbytes = 10 

221 subset = src.read(size=nbytes) 

222 self.assertEqual(len(subset), nbytes) 

223 self.assertEqual(subset.decode(), content[:nbytes]) 

224 

225 with self.assertRaises(ValueError): 

226 src.transfer_from(src, transfer="unknown") 

227 

228 def testTransferIdentical(self): 

229 """Test overwrite of identical files.""" 

230 dir1 = ButlerURI(os.path.join(self.tmpdir, "dir1"), forceDirectory=True) 

231 dir1.mkdir() 

232 dir2 = os.path.join(self.tmpdir, "dir2") 

233 os.symlink(dir1.ospath, dir2) 

234 

235 # Write a test file. 

236 src_file = dir1.join("test.txt") 

237 content = "0123456" 

238 src_file.write(content.encode()) 

239 

240 # Construct URI to destination that should be identical. 

241 dest_file = ButlerURI(os.path.join(dir2), forceDirectory=True).join("test.txt") 

242 self.assertTrue(dest_file.exists()) 

243 self.assertNotEqual(src_file, dest_file) 

244 

245 # Transfer it over itself. 

246 dest_file.transfer_from(src_file, transfer="symlink", overwrite=True) 

247 new_content = dest_file.read().decode() 

248 self.assertEqual(content, new_content) 

249 

250 def testResource(self): 

251 u = ButlerURI("resource://lsst.daf.butler/configs/datastore.yaml") 

252 self.assertTrue(u.exists(), f"Check {u} exists") 

253 

254 content = u.read().decode() 

255 self.assertTrue(content.startswith("datastore:")) 

256 

257 truncated = u.read(size=9).decode() 

258 self.assertEqual(truncated, "datastore") 

259 

260 d = ButlerURI("resource://lsst.daf.butler/configs", forceDirectory=True) 

261 self.assertTrue(u.exists(), f"Check directory {d} exists") 

262 

263 j = d.join("datastore.yaml") 

264 self.assertEqual(u, j) 

265 self.assertFalse(j.dirLike) 

266 self.assertFalse(j.isdir()) 

267 not_there = d.join("not-there.yaml") 

268 self.assertFalse(not_there.exists()) 

269 

270 bad = ButlerURI("resource://bad.module/not.yaml") 

271 multi = ButlerURI.mexists([u, bad, not_there]) 

272 self.assertTrue(multi[u]) 

273 self.assertFalse(multi[bad]) 

274 self.assertFalse(multi[not_there]) 

275 

276 def testEscapes(self): 

277 """Special characters in file paths""" 

278 src = ButlerURI("bbb/???/test.txt", root=self.tmpdir, forceAbsolute=True) 

279 self.assertFalse(src.scheme) 

280 src.write(b"Some content") 

281 self.assertTrue(src.exists()) 

282 

283 # abspath always returns a file scheme 

284 file = src.abspath() 

285 self.assertTrue(file.exists()) 

286 self.assertIn("???", file.ospath) 

287 self.assertNotIn("???", file.path) 

288 

289 file = file.updatedFile("tests??.txt") 

290 self.assertNotIn("??.txt", file.path) 

291 file.write(b"Other content") 

292 self.assertEqual(file.read(), b"Other content") 

293 

294 src = src.updatedFile("tests??.txt") 

295 self.assertIn("??.txt", src.path) 

296 self.assertEqual(file.read(), src.read(), f"reading from {file.ospath} and {src.ospath}") 

297 

298 # File URI and schemeless URI 

299 parent = ButlerURI("file:" + urllib.parse.quote("/a/b/c/de/??/")) 

300 child = ButlerURI("e/f/g.txt", forceAbsolute=False) 

301 self.assertEqual(child.relative_to(parent), "e/f/g.txt") 

302 

303 child = ButlerURI("e/f??#/g.txt", forceAbsolute=False) 

304 self.assertEqual(child.relative_to(parent), "e/f??#/g.txt") 

305 

306 child = ButlerURI("file:" + urllib.parse.quote("/a/b/c/de/??/e/f??#/g.txt")) 

307 self.assertEqual(child.relative_to(parent), "e/f??#/g.txt") 

308 

309 self.assertEqual(child.relativeToPathRoot, "a/b/c/de/??/e/f??#/g.txt") 

310 

311 # Schemeless so should not quote 

312 dir = ButlerURI("bbb/???/", root=self.tmpdir, forceAbsolute=True, forceDirectory=True) 

313 self.assertIn("???", dir.ospath) 

314 self.assertIn("???", dir.path) 

315 self.assertFalse(dir.scheme) 

316 

317 # dir.join() morphs into a file scheme 

318 new = dir.join("test_j.txt") 

319 self.assertIn("???", new.ospath, f"Checking {new}") 

320 new.write(b"Content") 

321 

322 new2name = "###/test??.txt" 

323 new2 = dir.join(new2name) 

324 self.assertIn("???", new2.ospath) 

325 new2.write(b"Content") 

326 self.assertTrue(new2.ospath.endswith(new2name)) 

327 self.assertEqual(new.read(), new2.read()) 

328 

329 fdir = dir.abspath() 

330 self.assertNotIn("???", fdir.path) 

331 self.assertIn("???", fdir.ospath) 

332 self.assertEqual(fdir.scheme, "file") 

333 fnew = dir.join("test_jf.txt") 

334 fnew.write(b"Content") 

335 

336 fnew2 = fdir.join(new2name) 

337 fnew2.write(b"Content") 

338 self.assertTrue(fnew2.ospath.endswith(new2name)) 

339 self.assertNotIn("###", fnew2.path) 

340 

341 self.assertEqual(fnew.read(), fnew2.read()) 

342 

343 # Test that children relative to schemeless and file schemes 

344 # still return the same unquoted name 

345 self.assertEqual(fnew2.relative_to(fdir), new2name, f"{fnew2}.relative_to({fdir})") 

346 self.assertEqual(fnew2.relative_to(dir), new2name, f"{fnew2}.relative_to({dir})") 

347 self.assertEqual(new2.relative_to(fdir), new2name, f"{new2}.relative_to({fdir})") 

348 self.assertEqual(new2.relative_to(dir), new2name, f"{new2}.relative_to({dir})") 

349 

350 # Check for double quoting 

351 plus_path = "/a/b/c+d/" 

352 with self.assertLogs(level="WARNING"): 

353 uri = ButlerURI(urllib.parse.quote(plus_path), forceDirectory=True) 

354 self.assertEqual(uri.ospath, plus_path) 

355 

356 # Check that # is not escaped for schemeless URIs 

357 hash_path = "/a/b#/c&d#xyz" 

358 hpos = hash_path.rfind("#") 

359 uri = ButlerURI(hash_path) 

360 self.assertEqual(uri.ospath, hash_path[:hpos]) 

361 self.assertEqual(uri.fragment, hash_path[hpos + 1 :]) 

362 

363 def testHash(self): 

364 """Test that we can store URIs in sets and as keys.""" 

365 uri1 = ButlerURI(TESTDIR) 

366 uri2 = uri1.join("test/") 

367 s = {uri1, uri2} 

368 self.assertIn(uri1, s) 

369 

370 d = {uri1: "1", uri2: "2"} 

371 self.assertEqual(d[uri2], "2") 

372 

373 def testWalk(self): 

374 """Test ButlerURI.walk().""" 

375 test_dir_uri = ButlerURI(TESTDIR) 

376 

377 file = test_dir_uri.join("config/basic/butler.yaml") 

378 found = list(ButlerURI.findFileResources([file])) 

379 self.assertEqual(found[0], file) 

380 

381 # Compare against the full local paths 

382 expected = set( 

383 p for p in glob.glob(os.path.join(TESTDIR, "config", "**"), recursive=True) if os.path.isfile(p) 

384 ) 

385 found = set(u.ospath for u in ButlerURI.findFileResources([test_dir_uri.join("config")])) 

386 self.assertEqual(found, expected) 

387 

388 # Now solely the YAML files 

389 expected_yaml = set(glob.glob(os.path.join(TESTDIR, "config", "**", "*.yaml"), recursive=True)) 

390 found = set( 

391 u.ospath 

392 for u in ButlerURI.findFileResources([test_dir_uri.join("config")], file_filter=r".*\.yaml$") 

393 ) 

394 self.assertEqual(found, expected_yaml) 

395 

396 # Now two explicit directories and a file 

397 expected = set(glob.glob(os.path.join(TESTDIR, "config", "**", "basic", "*.yaml"), recursive=True)) 

398 expected.update( 

399 set(glob.glob(os.path.join(TESTDIR, "config", "**", "templates", "*.yaml"), recursive=True)) 

400 ) 

401 expected.add(file.ospath) 

402 

403 found = set( 

404 u.ospath 

405 for u in ButlerURI.findFileResources( 

406 [file, test_dir_uri.join("config/basic"), test_dir_uri.join("config/templates")], 

407 file_filter=r".*\.yaml$", 

408 ) 

409 ) 

410 self.assertEqual(found, expected) 

411 

412 # Group by directory -- find everything and compare it with what 

413 # we expected to be there in total. We expect to find 9 directories 

414 # containing yaml files so make sure we only iterate 9 times. 

415 found_yaml = set() 

416 counter = 0 

417 for uris in ButlerURI.findFileResources( 

418 [file, test_dir_uri.join("config/")], file_filter=r".*\.yaml$", grouped=True 

419 ): 

420 found = set(u.ospath for u in uris) 

421 if found: 

422 counter += 1 

423 

424 found_yaml.update(found) 

425 

426 self.assertEqual(found_yaml, expected_yaml) 

427 self.assertEqual(counter, 9) 

428 

429 # Grouping but check that single files are returned in a single group 

430 # at the end 

431 file2 = test_dir_uri.join("config/templates/templates-bad.yaml") 

432 found = list( 

433 ButlerURI.findFileResources([file, file2, test_dir_uri.join("config/dbAuth")], grouped=True) 

434 ) 

435 self.assertEqual(len(found), 2) 

436 self.assertEqual(list(found[1]), [file, file2]) 

437 

438 with self.assertRaises(ValueError): 

439 list(file.walk()) 

440 

441 def testRootURI(self): 

442 """Test ButlerURI.root_uri().""" 

443 uri = ButlerURI("https://www.notexist.com:8080/file/test") 

444 uri2 = ButlerURI("s3://www.notexist.com/file/test") 

445 self.assertEqual(uri.root_uri().geturl(), "https://www.notexist.com:8080/") 

446 self.assertEqual(uri2.root_uri().geturl(), "s3://www.notexist.com/") 

447 

448 def testJoin(self): 

449 """Test .join method.""" 

450 

451 root_str = "s3://bucket/hsc/payload/" 

452 root = ButlerURI(root_str) 

453 

454 self.assertEqual(root.join("b/test.txt").geturl(), f"{root_str}b/test.txt") 

455 add_dir = root.join("b/c/d/") 

456 self.assertTrue(add_dir.isdir()) 

457 self.assertEqual(add_dir.geturl(), f"{root_str}b/c/d/") 

458 

459 quote_example = "b&c.t@x#t" 

460 needs_quote = root.join(quote_example) 

461 self.assertEqual(needs_quote.unquoted_path, f"/hsc/payload/{quote_example}") 

462 

463 other = ButlerURI("file://localhost/test.txt") 

464 self.assertEqual(root.join(other), other) 

465 self.assertEqual(other.join("b/new.txt").geturl(), "file://localhost/b/new.txt") 

466 

467 joined = ButlerURI("s3://bucket/hsc/payload/").join(ButlerURI("test.qgraph", forceAbsolute=False)) 

468 self.assertEqual(joined, ButlerURI("s3://bucket/hsc/payload/test.qgraph")) 

469 

470 with self.assertRaises(ValueError): 

471 ButlerURI("s3://bucket/hsc/payload/").join(ButlerURI("test.qgraph")) 

472 

473 def testTemporary(self): 

474 with ButlerURI.temporary_uri(suffix=".json") as tmp: 

475 self.assertEqual(tmp.getExtension(), ".json", f"uri: {tmp}") 

476 self.assertTrue(tmp.isabs(), f"uri: {tmp}") 

477 self.assertFalse(tmp.exists(), f"uri: {tmp}") 

478 tmp.write(b"abcd") 

479 self.assertTrue(tmp.exists(), f"uri: {tmp}") 

480 self.assertTrue(tmp.isTemporary) 

481 self.assertFalse(tmp.exists(), f"uri: {tmp}") 

482 

483 tmpdir = ButlerURI(self.tmpdir, forceDirectory=True) 

484 with ButlerURI.temporary_uri(prefix=tmpdir, suffix=".yaml") as tmp: 

485 # Use a specified tmpdir and check it is okay for the file 

486 # to not be created. 

487 self.assertFalse(tmp.exists(), f"uri: {tmp}") 

488 self.assertTrue(tmpdir.exists(), f"uri: {tmpdir} still exists") 

489 

490 

491if __name__ == "__main__": 491 ↛ 492line 491 didn't jump to line 492, because the condition on line 491 was never true

492 unittest.main()