Coverage for tests/test_uri.py: 12%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

580 statements  

1# This file is part of lsst-resources. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# Use of this source code is governed by a 3-clause BSD-style 

10# license that can be found in the LICENSE file. 

11 

12import glob 

13import os 

14import pathlib 

15import shutil 

16import unittest 

17import urllib.parse 

18import uuid 

19 

20import responses 

21 

22try: 

23 import boto3 

24 import botocore 

25 from moto import mock_s3 

26except ImportError: 

27 boto3 = None 

28 

29 def mock_s3(cls): 

30 """A no-op decorator in case moto mock_s3 can not be imported.""" 

31 return cls 

32 

33 

34from lsst.resources import ResourcePath 

35from lsst.resources.s3utils import setAwsEnvCredentials, unsetAwsEnvCredentials 

36from lsst.resources.utils import makeTestTempDir, removeTestTempDir 

37 

38TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

39 

40 

41def _check_open(test_case, uri, *, mode_suffixes=("", "t", "b"), **kwargs) -> None: 

42 """Test an implementation of ButlerURI.open. 

43 

44 Parameters 

45 ---------- 

46 test_case : `unittest.TestCase` 

47 Test case to use for assertions. 

48 uri : `ButlerURI` 

49 URI to use for tests. Must point to a writeable location that is not 

50 yet occupied by a file. On return, the location may point to a file 

51 only if the test fails. 

52 mode_suffixes : `Iterable` of `str` 

53 Suffixes to pass as part of the ``mode`` argument to `ButlerURI.open`, 

54 indicating whether to open as binary or as text; the only permitted 

55 elements are ``""``, ``"t"``, and ``""b""`. 

56 **kwargs 

57 Additional keyword arguments to forward to all calls to `open`. 

58 """ 

59 text_content = "wxyz🙂" 

60 bytes_content = uuid.uuid4().bytes 

61 content_by_mode_suffix = { 

62 "": text_content, 

63 "t": text_content, 

64 "b": bytes_content, 

65 } 

66 empty_content_by_mode_suffix = { 

67 "": "", 

68 "t": "", 

69 "b": b"", 

70 } 

71 for mode_suffix in mode_suffixes: 

72 content = content_by_mode_suffix[mode_suffix] 

73 # Create file with mode='x', which prohibits overwriting. 

74 with uri.open("x" + mode_suffix, **kwargs) as write_buffer: 

75 write_buffer.write(content) 

76 test_case.assertTrue(uri.exists()) 

77 # Check that opening with 'x' now raises, and does not modify content. 

78 with test_case.assertRaises(FileExistsError): 

79 with uri.open("x" + mode_suffix, **kwargs) as write_buffer: 

80 write_buffer.write("bad") 

81 # Read the file we created and check the contents. 

82 with uri.open("r" + mode_suffix, **kwargs) as read_buffer: 

83 test_case.assertEqual(read_buffer.read(), content) 

84 # Write two copies of the content, overwriting the single copy there. 

85 with uri.open("w" + mode_suffix, **kwargs) as write_buffer: 

86 write_buffer.write(content + content) 

87 # Read again, this time use mode='r+', which reads what is there and 

88 # then lets us write more; we'll use that to reset the file to one 

89 # copy of the content. 

90 with uri.open("r+" + mode_suffix, **kwargs) as rw_buffer: 

91 test_case.assertEqual(rw_buffer.read(), content + content) 

92 rw_buffer.seek(0) 

93 rw_buffer.truncate() 

94 rw_buffer.write(content) 

95 rw_buffer.seek(0) 

96 test_case.assertEqual(rw_buffer.read(), content) 

97 with uri.open("r" + mode_suffix, **kwargs) as read_buffer: 

98 test_case.assertEqual(read_buffer.read(), content) 

99 # Append some more content to the file; should now have two copies. 

100 with uri.open("a" + mode_suffix, **kwargs) as append_buffer: 

101 append_buffer.write(content) 

102 with uri.open("r" + mode_suffix, **kwargs) as read_buffer: 

103 test_case.assertEqual(read_buffer.read(), content + content) 

104 # Final mode to check is w+, which does read/write but truncates first. 

105 with uri.open("w+" + mode_suffix, **kwargs) as rw_buffer: 

106 test_case.assertEqual(rw_buffer.read(), empty_content_by_mode_suffix[mode_suffix]) 

107 rw_buffer.write(content) 

108 rw_buffer.seek(0) 

109 test_case.assertEqual(rw_buffer.read(), content) 

110 with uri.open("r" + mode_suffix, **kwargs) as read_buffer: 

111 test_case.assertEqual(read_buffer.read(), content) 

112 # Remove file to make room for the next loop of tests with this URI. 

113 uri.remove() 

114 

115 

116class FileURITestCase(unittest.TestCase): 

117 """Concrete tests for local files.""" 

118 

119 def setUp(self): 

120 # Use a local tempdir because on macOS the temp dirs use symlinks 

121 # so relsymlink gets quite confused. 

122 self.tmpdir = makeTestTempDir(TESTDIR) 

123 

124 def tearDown(self): 

125 removeTestTempDir(self.tmpdir) 

126 

127 def testFile(self): 

128 file = os.path.join(self.tmpdir, "test.txt") 

129 uri = ResourcePath(file) 

130 self.assertFalse(uri.exists(), f"{uri} should not exist") 

131 self.assertEqual(uri.ospath, file) 

132 

133 path = pathlib.Path(file) 

134 uri = ResourcePath(path) 

135 self.assertEqual(uri.ospath, file) 

136 

137 content = "abcdefghijklmnopqrstuv\n" 

138 uri.write(content.encode()) 

139 self.assertTrue(os.path.exists(file), "File should exist locally") 

140 self.assertTrue(uri.exists(), f"{uri} should now exist") 

141 self.assertEqual(uri.read().decode(), content) 

142 self.assertEqual(uri.size(), len(content.encode())) 

143 

144 with self.assertRaises(FileNotFoundError): 

145 ResourcePath("file/not/there.txt").size() 

146 

147 # Check that creating a URI from a URI returns the same thing 

148 uri2 = ResourcePath(uri) 

149 self.assertEqual(uri, uri2) 

150 self.assertEqual(id(uri), id(uri2)) 

151 

152 with self.assertRaises(ValueError): 

153 # Scheme-less URIs are not allowed to support non-file roots 

154 # at the present time. This may change in the future to become 

155 # equivalent to ResourcePath.join() 

156 ResourcePath("a/b.txt", root=ResourcePath("s3://bucket/a/b/")) 

157 

158 def testExtension(self): 

159 file = ResourcePath(os.path.join(self.tmpdir, "test.txt")) 

160 self.assertEqual(file.updatedExtension(None), file) 

161 self.assertEqual(file.updatedExtension(".txt"), file) 

162 self.assertEqual(id(file.updatedExtension(".txt")), id(file)) 

163 

164 fits = file.updatedExtension(".fits.gz") 

165 self.assertEqual(fits.basename(), "test.fits.gz") 

166 self.assertEqual(fits.updatedExtension(".jpeg").basename(), "test.jpeg") 

167 

168 def testRelative(self): 

169 """Check that we can get subpaths back from two URIs""" 

170 parent = ResourcePath(self.tmpdir, forceDirectory=True, forceAbsolute=True) 

171 self.assertTrue(parent.isdir()) 

172 child = ResourcePath(os.path.join(self.tmpdir, "dir1", "file.txt"), forceAbsolute=True) 

173 

174 self.assertEqual(child.relative_to(parent), "dir1/file.txt") 

175 

176 not_child = ResourcePath("/a/b/dir1/file.txt") 

177 self.assertIsNone(not_child.relative_to(parent)) 

178 self.assertFalse(not_child.isdir()) 

179 

180 not_directory = ResourcePath(os.path.join(self.tmpdir, "dir1", "file2.txt")) 

181 self.assertIsNone(child.relative_to(not_directory)) 

182 

183 # Relative URIs 

184 parent = ResourcePath("a/b/", forceAbsolute=False) 

185 child = ResourcePath("a/b/c/d.txt", forceAbsolute=False) 

186 self.assertFalse(child.scheme) 

187 self.assertEqual(child.relative_to(parent), "c/d.txt") 

188 

189 # forceAbsolute=True should work even on an existing ResourcePath 

190 self.assertTrue(pathlib.Path(ResourcePath(child, forceAbsolute=True).ospath).is_absolute()) 

191 

192 # File URI and schemeless URI 

193 parent = ResourcePath("file:/a/b/c/") 

194 child = ResourcePath("e/f/g.txt", forceAbsolute=False) 

195 

196 # If the child is relative and the parent is absolute we assume 

197 # that the child is a child of the parent unless it uses ".." 

198 self.assertEqual(child.relative_to(parent), "e/f/g.txt") 

199 

200 child = ResourcePath("../e/f/g.txt", forceAbsolute=False) 

201 self.assertIsNone(child.relative_to(parent)) 

202 

203 child = ResourcePath("../c/e/f/g.txt", forceAbsolute=False) 

204 self.assertEqual(child.relative_to(parent), "e/f/g.txt") 

205 

206 # Test non-file root with relative path. 

207 child = ResourcePath("e/f/g.txt", forceAbsolute=False) 

208 parent = ResourcePath("s3://hello/a/b/c/") 

209 self.assertEqual(child.relative_to(parent), "e/f/g.txt") 

210 

211 # Test with different netloc 

212 child = ResourcePath("http://my.host/a/b/c.txt") 

213 parent = ResourcePath("http://other.host/a/") 

214 self.assertIsNone(child.relative_to(parent), f"{child}.relative_to({parent})") 

215 

216 # Schemeless absolute child. 

217 # Schemeless absolute URI is constructed using root= parameter. 

218 parent = ResourcePath("file:///a/b/c/") 

219 child = ResourcePath("d/e.txt", root=parent) 

220 self.assertEqual(child.relative_to(parent), "d/e.txt", f"{child}.relative_to({parent})") 

221 

222 parent = ResourcePath("c/", root="/a/b/") 

223 self.assertEqual(child.relative_to(parent), "d/e.txt", f"{child}.relative_to({parent})") 

224 

225 # Absolute schemeless child with relative parent will always fail. 

226 parent = ResourcePath("d/e.txt", forceAbsolute=False) 

227 self.assertIsNone(child.relative_to(parent), f"{child}.relative_to({parent})") 

228 

229 def testParents(self): 

230 """Test of splitting and parent walking.""" 

231 parent = ResourcePath(self.tmpdir, forceDirectory=True, forceAbsolute=True) 

232 child_file = parent.join("subdir/file.txt") 

233 self.assertFalse(child_file.isdir()) 

234 child_subdir, file = child_file.split() 

235 self.assertEqual(file, "file.txt") 

236 self.assertTrue(child_subdir.isdir()) 

237 self.assertEqual(child_file.dirname(), child_subdir) 

238 self.assertEqual(child_file.basename(), file) 

239 self.assertEqual(child_file.parent(), child_subdir) 

240 derived_parent = child_subdir.parent() 

241 self.assertEqual(derived_parent, parent) 

242 self.assertTrue(derived_parent.isdir()) 

243 self.assertEqual(child_file.parent().parent(), parent) 

244 

245 def testEnvVar(self): 

246 """Test that environment variables are expanded.""" 

247 

248 with unittest.mock.patch.dict(os.environ, {"MY_TEST_DIR": "/a/b/c"}): 

249 uri = ResourcePath("${MY_TEST_DIR}/d.txt") 

250 self.assertEqual(uri.path, "/a/b/c/d.txt") 

251 self.assertEqual(uri.scheme, "file") 

252 

253 # This will not expand 

254 uri = ResourcePath("${MY_TEST_DIR}/d.txt", forceAbsolute=False) 

255 self.assertEqual(uri.path, "${MY_TEST_DIR}/d.txt") 

256 self.assertFalse(uri.scheme) 

257 

258 def testMkdir(self): 

259 tmpdir = ResourcePath(self.tmpdir) 

260 newdir = tmpdir.join("newdir/seconddir") 

261 newdir.mkdir() 

262 self.assertTrue(newdir.exists()) 

263 newfile = newdir.join("temp.txt") 

264 newfile.write("Data".encode()) 

265 self.assertTrue(newfile.exists()) 

266 

267 def testTransfer(self): 

268 src = ResourcePath(os.path.join(self.tmpdir, "test.txt")) 

269 content = "Content is some content\nwith something to say\n\n" 

270 src.write(content.encode()) 

271 

272 for mode in ("copy", "link", "hardlink", "symlink", "relsymlink"): 

273 dest = ResourcePath(os.path.join(self.tmpdir, f"dest_{mode}.txt")) 

274 dest.transfer_from(src, transfer=mode) 

275 self.assertTrue(dest.exists(), f"Check that {dest} exists (transfer={mode})") 

276 

277 with open(dest.ospath, "r") as fh: 

278 new_content = fh.read() 

279 self.assertEqual(new_content, content) 

280 

281 if mode in ("symlink", "relsymlink"): 

282 self.assertTrue(os.path.islink(dest.ospath), f"Check that {dest} is symlink") 

283 

284 # If the source and destination are hardlinks of each other 

285 # the transfer should work even if overwrite=False. 

286 if mode in ("link", "hardlink"): 

287 dest.transfer_from(src, transfer=mode) 

288 else: 

289 with self.assertRaises( 

290 FileExistsError, msg=f"Overwrite of {dest} should not be allowed ({mode})" 

291 ): 

292 dest.transfer_from(src, transfer=mode) 

293 

294 dest.transfer_from(src, transfer=mode, overwrite=True) 

295 

296 os.remove(dest.ospath) 

297 

298 b = src.read() 

299 self.assertEqual(b.decode(), new_content) 

300 

301 nbytes = 10 

302 subset = src.read(size=nbytes) 

303 self.assertEqual(len(subset), nbytes) 

304 self.assertEqual(subset.decode(), content[:nbytes]) 

305 

306 with self.assertRaises(ValueError): 

307 src.transfer_from(src, transfer="unknown") 

308 

309 def testTransferIdentical(self): 

310 """Test overwrite of identical files.""" 

311 dir1 = ResourcePath(os.path.join(self.tmpdir, "dir1"), forceDirectory=True) 

312 dir1.mkdir() 

313 dir2 = os.path.join(self.tmpdir, "dir2") 

314 os.symlink(dir1.ospath, dir2) 

315 

316 # Write a test file. 

317 src_file = dir1.join("test.txt") 

318 content = "0123456" 

319 src_file.write(content.encode()) 

320 

321 # Construct URI to destination that should be identical. 

322 dest_file = ResourcePath(os.path.join(dir2), forceDirectory=True).join("test.txt") 

323 self.assertTrue(dest_file.exists()) 

324 self.assertNotEqual(src_file, dest_file) 

325 

326 # Transfer it over itself. 

327 dest_file.transfer_from(src_file, transfer="symlink", overwrite=True) 

328 new_content = dest_file.read().decode() 

329 self.assertEqual(content, new_content) 

330 

331 def testResource(self): 

332 # No resources in this package so need a resource in the main 

333 # python distribution. 

334 u = ResourcePath("resource://idlelib/Icons/README.txt") 

335 self.assertTrue(u.exists(), f"Check {u} exists") 

336 

337 content = u.read().decode() 

338 self.assertIn("IDLE", content) 

339 

340 truncated = u.read(size=9).decode() 

341 self.assertEqual(truncated, content[:9]) 

342 

343 d = ResourcePath("resource://idlelib/Icons", forceDirectory=True) 

344 self.assertTrue(u.exists(), f"Check directory {d} exists") 

345 

346 j = d.join("README.txt") 

347 self.assertEqual(u, j) 

348 self.assertFalse(j.dirLike) 

349 self.assertFalse(j.isdir()) 

350 not_there = d.join("not-there.yaml") 

351 self.assertFalse(not_there.exists()) 

352 

353 bad = ResourcePath("resource://bad.module/not.yaml") 

354 multi = ResourcePath.mexists([u, bad, not_there]) 

355 self.assertTrue(multi[u]) 

356 self.assertFalse(multi[bad]) 

357 self.assertFalse(multi[not_there]) 

358 

359 def testEscapes(self): 

360 """Special characters in file paths""" 

361 src = ResourcePath("bbb/???/test.txt", root=self.tmpdir, forceAbsolute=True) 

362 self.assertFalse(src.scheme) 

363 src.write(b"Some content") 

364 self.assertTrue(src.exists()) 

365 

366 # abspath always returns a file scheme 

367 file = src.abspath() 

368 self.assertTrue(file.exists()) 

369 self.assertIn("???", file.ospath) 

370 self.assertNotIn("???", file.path) 

371 

372 file = file.updatedFile("tests??.txt") 

373 self.assertNotIn("??.txt", file.path) 

374 file.write(b"Other content") 

375 self.assertEqual(file.read(), b"Other content") 

376 

377 src = src.updatedFile("tests??.txt") 

378 self.assertIn("??.txt", src.path) 

379 self.assertEqual(file.read(), src.read(), f"reading from {file.ospath} and {src.ospath}") 

380 

381 # File URI and schemeless URI 

382 parent = ResourcePath("file:" + urllib.parse.quote("/a/b/c/de/??/")) 

383 child = ResourcePath("e/f/g.txt", forceAbsolute=False) 

384 self.assertEqual(child.relative_to(parent), "e/f/g.txt") 

385 

386 child = ResourcePath("e/f??#/g.txt", forceAbsolute=False) 

387 self.assertEqual(child.relative_to(parent), "e/f??#/g.txt") 

388 

389 child = ResourcePath("file:" + urllib.parse.quote("/a/b/c/de/??/e/f??#/g.txt")) 

390 self.assertEqual(child.relative_to(parent), "e/f??#/g.txt") 

391 

392 self.assertEqual(child.relativeToPathRoot, "a/b/c/de/??/e/f??#/g.txt") 

393 

394 # Schemeless so should not quote 

395 dir = ResourcePath("bbb/???/", root=self.tmpdir, forceAbsolute=True, forceDirectory=True) 

396 self.assertIn("???", dir.ospath) 

397 self.assertIn("???", dir.path) 

398 self.assertFalse(dir.scheme) 

399 

400 # dir.join() morphs into a file scheme 

401 new = dir.join("test_j.txt") 

402 self.assertIn("???", new.ospath, f"Checking {new}") 

403 new.write(b"Content") 

404 

405 new2name = "###/test??.txt" 

406 new2 = dir.join(new2name) 

407 self.assertIn("???", new2.ospath) 

408 new2.write(b"Content") 

409 self.assertTrue(new2.ospath.endswith(new2name)) 

410 self.assertEqual(new.read(), new2.read()) 

411 

412 fdir = dir.abspath() 

413 self.assertNotIn("???", fdir.path) 

414 self.assertIn("???", fdir.ospath) 

415 self.assertEqual(fdir.scheme, "file") 

416 fnew = dir.join("test_jf.txt") 

417 fnew.write(b"Content") 

418 

419 fnew2 = fdir.join(new2name) 

420 fnew2.write(b"Content") 

421 self.assertTrue(fnew2.ospath.endswith(new2name)) 

422 self.assertNotIn("###", fnew2.path) 

423 

424 self.assertEqual(fnew.read(), fnew2.read()) 

425 

426 # Test that children relative to schemeless and file schemes 

427 # still return the same unquoted name 

428 self.assertEqual(fnew2.relative_to(fdir), new2name, f"{fnew2}.relative_to({fdir})") 

429 self.assertEqual(fnew2.relative_to(dir), new2name, f"{fnew2}.relative_to({dir})") 

430 self.assertEqual(new2.relative_to(fdir), new2name, f"{new2}.relative_to({fdir})") 

431 self.assertEqual(new2.relative_to(dir), new2name, f"{new2}.relative_to({dir})") 

432 

433 # Check for double quoting 

434 plus_path = "/a/b/c+d/" 

435 with self.assertLogs(level="WARNING"): 

436 uri = ResourcePath(urllib.parse.quote(plus_path), forceDirectory=True) 

437 self.assertEqual(uri.ospath, plus_path) 

438 

439 # Check that # is not escaped for schemeless URIs 

440 hash_path = "/a/b#/c&d#xyz" 

441 hpos = hash_path.rfind("#") 

442 uri = ResourcePath(hash_path) 

443 self.assertEqual(uri.ospath, hash_path[:hpos]) 

444 self.assertEqual(uri.fragment, hash_path[hpos + 1 :]) 

445 

446 def testHash(self): 

447 """Test that we can store URIs in sets and as keys.""" 

448 uri1 = ResourcePath(TESTDIR) 

449 uri2 = uri1.join("test/") 

450 s = {uri1, uri2} 

451 self.assertIn(uri1, s) 

452 

453 d = {uri1: "1", uri2: "2"} 

454 self.assertEqual(d[uri2], "2") 

455 

456 def testWalk(self): 

457 """Test ResourcePath.walk().""" 

458 test_dir_uri = ResourcePath(TESTDIR) 

459 

460 # Look for a file that is not there 

461 file = test_dir_uri.join("config/basic/butler.yaml") 

462 found = list(ResourcePath.findFileResources([file])) 

463 self.assertEqual(found[0], file) 

464 

465 # Compare against the full local paths 

466 expected = set( 

467 p for p in glob.glob(os.path.join(TESTDIR, "data", "**"), recursive=True) if os.path.isfile(p) 

468 ) 

469 found = set(u.ospath for u in ResourcePath.findFileResources([test_dir_uri.join("data")])) 

470 self.assertEqual(found, expected) 

471 

472 # Now solely the YAML files 

473 expected_yaml = set(glob.glob(os.path.join(TESTDIR, "data", "**", "*.yaml"), recursive=True)) 

474 found = set( 

475 u.ospath 

476 for u in ResourcePath.findFileResources([test_dir_uri.join("data")], file_filter=r".*\.yaml$") 

477 ) 

478 self.assertEqual(found, expected_yaml) 

479 

480 # Now two explicit directories and a file 

481 expected = set(glob.glob(os.path.join(TESTDIR, "data", "dir1", "*.yaml"), recursive=True)) 

482 expected.update(set(glob.glob(os.path.join(TESTDIR, "data", "dir2", "*.yaml"), recursive=True))) 

483 expected.add(file.ospath) 

484 

485 found = set( 

486 u.ospath 

487 for u in ResourcePath.findFileResources( 

488 [file, test_dir_uri.join("data/dir1"), test_dir_uri.join("data/dir2")], 

489 file_filter=r".*\.yaml$", 

490 ) 

491 ) 

492 self.assertEqual(found, expected) 

493 

494 # Group by directory -- find everything and compare it with what 

495 # we expected to be there in total. 

496 found_yaml = set() 

497 counter = 0 

498 for uris in ResourcePath.findFileResources( 

499 [file, test_dir_uri.join("data/")], file_filter=r".*\.yaml$", grouped=True 

500 ): 

501 found = set(u.ospath for u in uris) 

502 if found: 

503 counter += 1 

504 

505 found_yaml.update(found) 

506 

507 expected_yaml_2 = expected_yaml 

508 expected_yaml_2.add(file.ospath) 

509 self.assertEqual(found_yaml, expected_yaml) 

510 self.assertEqual(counter, 3) 

511 

512 # Grouping but check that single files are returned in a single group 

513 # at the end 

514 file2 = test_dir_uri.join("config/templates/templates-bad.yaml") 

515 found = list( 

516 ResourcePath.findFileResources([file, file2, test_dir_uri.join("data/dir2")], grouped=True) 

517 ) 

518 self.assertEqual(len(found), 2) 

519 self.assertEqual(list(found[1]), [file, file2]) 

520 

521 with self.assertRaises(ValueError): 

522 list(file.walk()) 

523 

524 def testRootURI(self): 

525 """Test ResourcePath.root_uri().""" 

526 uri = ResourcePath("https://www.notexist.com:8080/file/test") 

527 uri2 = ResourcePath("s3://www.notexist.com/file/test") 

528 self.assertEqual(uri.root_uri().geturl(), "https://www.notexist.com:8080/") 

529 self.assertEqual(uri2.root_uri().geturl(), "s3://www.notexist.com/") 

530 

531 def testJoin(self): 

532 """Test .join method.""" 

533 

534 root_str = "s3://bucket/hsc/payload/" 

535 root = ResourcePath(root_str) 

536 

537 self.assertEqual(root.join("b/test.txt").geturl(), f"{root_str}b/test.txt") 

538 add_dir = root.join("b/c/d/") 

539 self.assertTrue(add_dir.isdir()) 

540 self.assertEqual(add_dir.geturl(), f"{root_str}b/c/d/") 

541 

542 up_relative = root.join("../b/c.txt") 

543 self.assertFalse(up_relative.isdir()) 

544 self.assertEqual(up_relative.geturl(), "s3://bucket/hsc/b/c.txt") 

545 

546 quote_example = "b&c.t@x#t" 

547 needs_quote = root.join(quote_example) 

548 self.assertEqual(needs_quote.unquoted_path, f"/hsc/payload/{quote_example}") 

549 

550 other = ResourcePath("file://localhost/test.txt") 

551 self.assertEqual(root.join(other), other) 

552 self.assertEqual(other.join("b/new.txt").geturl(), "file://localhost/b/new.txt") 

553 

554 joined = ResourcePath("s3://bucket/hsc/payload/").join( 

555 ResourcePath("test.qgraph", forceAbsolute=False) 

556 ) 

557 self.assertEqual(joined, ResourcePath("s3://bucket/hsc/payload/test.qgraph")) 

558 

559 with self.assertRaises(ValueError): 

560 ResourcePath("s3://bucket/hsc/payload/").join(ResourcePath("test.qgraph")) 

561 

562 def testTemporary(self): 

563 with ResourcePath.temporary_uri(suffix=".json") as tmp: 

564 self.assertEqual(tmp.getExtension(), ".json", f"uri: {tmp}") 

565 self.assertTrue(tmp.isabs(), f"uri: {tmp}") 

566 self.assertFalse(tmp.exists(), f"uri: {tmp}") 

567 tmp.write(b"abcd") 

568 self.assertTrue(tmp.exists(), f"uri: {tmp}") 

569 self.assertTrue(tmp.isTemporary) 

570 self.assertFalse(tmp.exists(), f"uri: {tmp}") 

571 

572 tmpdir = ResourcePath(self.tmpdir, forceDirectory=True) 

573 with ResourcePath.temporary_uri(prefix=tmpdir, suffix=".yaml") as tmp: 

574 # Use a specified tmpdir and check it is okay for the file 

575 # to not be created. 

576 self.assertFalse(tmp.exists(), f"uri: {tmp}") 

577 self.assertTrue(tmpdir.exists(), f"uri: {tmpdir} still exists") 

578 

579 def test_open(self): 

580 tmpdir = ResourcePath(self.tmpdir, forceDirectory=True) 

581 with ResourcePath.temporary_uri(prefix=tmpdir, suffix=".txt") as tmp: 

582 _check_open(self, tmp, mode_suffixes=("", "t")) 

583 _check_open(self, tmp, mode_suffixes=("t",), encoding="utf-16") 

584 _check_open(self, tmp, mode_suffixes=("t",), prefer_file_temporary=True) 

585 _check_open(self, tmp, mode_suffixes=("t",), encoding="utf-16", prefer_file_temporary=True) 

586 with ResourcePath.temporary_uri(prefix=tmpdir, suffix=".dat") as tmp: 

587 _check_open(self, tmp, mode_suffixes=("b",)) 

588 _check_open(self, tmp, mode_suffixes=("b"), prefer_file_temporary=True) 

589 

590 

591@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!") 

592@mock_s3 

593class S3URITestCase(unittest.TestCase): 

594 """Tests involving S3""" 

595 

596 bucketName = "any_bucket" 

597 """Bucket name to use in tests""" 

598 

599 def setUp(self): 

600 # Local test directory 

601 self.tmpdir = makeTestTempDir(TESTDIR) 

602 

603 # set up some fake credentials if they do not exist 

604 self.usingDummyCredentials = setAwsEnvCredentials() 

605 

606 # MOTO needs to know that we expect Bucket bucketname to exist 

607 s3 = boto3.resource("s3") 

608 s3.create_bucket(Bucket=self.bucketName) 

609 

610 def tearDown(self): 

611 s3 = boto3.resource("s3") 

612 bucket = s3.Bucket(self.bucketName) 

613 try: 

614 bucket.objects.all().delete() 

615 except botocore.exceptions.ClientError as e: 

616 if e.response["Error"]["Code"] == "404": 

617 # the key was not reachable - pass 

618 pass 

619 else: 

620 raise 

621 

622 bucket = s3.Bucket(self.bucketName) 

623 bucket.delete() 

624 

625 # unset any potentially set dummy credentials 

626 if self.usingDummyCredentials: 

627 unsetAwsEnvCredentials() 

628 

629 shutil.rmtree(self.tmpdir, ignore_errors=True) 

630 

631 def makeS3Uri(self, path): 

632 return f"s3://{self.bucketName}/{path}" 

633 

634 def testTransfer(self): 

635 src = ResourcePath(os.path.join(self.tmpdir, "test.txt")) 

636 content = "Content is some content\nwith something to say\n\n" 

637 src.write(content.encode()) 

638 self.assertTrue(src.exists()) 

639 self.assertEqual(src.size(), len(content.encode())) 

640 

641 dest = ResourcePath(self.makeS3Uri("test.txt")) 

642 self.assertFalse(dest.exists()) 

643 

644 with self.assertRaises(FileNotFoundError): 

645 dest.size() 

646 

647 dest.transfer_from(src, transfer="copy") 

648 self.assertTrue(dest.exists()) 

649 

650 dest2 = ResourcePath(self.makeS3Uri("copied.txt")) 

651 dest2.transfer_from(dest, transfer="copy") 

652 self.assertTrue(dest2.exists()) 

653 

654 local = ResourcePath(os.path.join(self.tmpdir, "copied.txt")) 

655 local.transfer_from(dest2, transfer="copy") 

656 with open(local.ospath, "r") as fd: 

657 new_content = fd.read() 

658 self.assertEqual(new_content, content) 

659 

660 with self.assertRaises(ValueError): 

661 dest2.transfer_from(local, transfer="symlink") 

662 

663 b = dest.read() 

664 self.assertEqual(b.decode(), new_content) 

665 

666 nbytes = 10 

667 subset = dest.read(size=nbytes) 

668 self.assertEqual(len(subset), nbytes) # Extra byte comes back 

669 self.assertEqual(subset.decode(), content[:nbytes]) 

670 

671 with self.assertRaises(FileExistsError): 

672 dest.transfer_from(src, transfer="copy") 

673 

674 dest.transfer_from(src, transfer="copy", overwrite=True) 

675 

676 def testWalk(self): 

677 """Test that we can list an S3 bucket""" 

678 # Files we want to create 

679 expected = ("a/x.txt", "a/y.txt", "a/z.json", "a/b/w.txt", "a/b/c/d/v.json") 

680 expected_uris = [ResourcePath(self.makeS3Uri(path)) for path in expected] 

681 for uri in expected_uris: 

682 # Doesn't matter what we write 

683 uri.write("123".encode()) 

684 

685 # Find all the files in the a/ tree 

686 found = set(uri.path for uri in ResourcePath.findFileResources([ResourcePath(self.makeS3Uri("a/"))])) 

687 self.assertEqual(found, {uri.path for uri in expected_uris}) 

688 

689 # Find all the files in the a/ tree but group by folder 

690 found = ResourcePath.findFileResources([ResourcePath(self.makeS3Uri("a/"))], grouped=True) 

691 expected = (("/a/x.txt", "/a/y.txt", "/a/z.json"), ("/a/b/w.txt",), ("/a/b/c/d/v.json",)) 

692 

693 for got, expect in zip(found, expected): 

694 self.assertEqual(tuple(u.path for u in got), expect) 

695 

696 # Find only JSON files 

697 found = set( 

698 uri.path 

699 for uri in ResourcePath.findFileResources( 

700 [ResourcePath(self.makeS3Uri("a/"))], file_filter=r"\.json$" 

701 ) 

702 ) 

703 self.assertEqual(found, {uri.path for uri in expected_uris if uri.path.endswith(".json")}) 

704 

705 # JSON files grouped by directory 

706 found = ResourcePath.findFileResources( 

707 [ResourcePath(self.makeS3Uri("a/"))], file_filter=r"\.json$", grouped=True 

708 ) 

709 expected = (("/a/z.json",), ("/a/b/c/d/v.json",)) 

710 

711 for got, expect in zip(found, expected): 

712 self.assertEqual(tuple(u.path for u in got), expect) 

713 

714 # Check pagination works with large numbers of files. S3 API limits 

715 # us to 1000 response per list_objects call so create lots of files 

716 created = set() 

717 counter = 1 

718 n_dir1 = 1100 

719 while counter <= n_dir1: 

720 new = ResourcePath(self.makeS3Uri(f"test/file{counter:04d}.txt")) 

721 new.write(f"{counter}".encode()) 

722 created.add(str(new)) 

723 counter += 1 

724 counter = 1 

725 # Put some in a subdirectory to make sure we are looking in a 

726 # hierarchy. 

727 n_dir2 = 100 

728 while counter <= n_dir2: 

729 new = ResourcePath(self.makeS3Uri(f"test/subdir/file{counter:04d}.txt")) 

730 new.write(f"{counter}".encode()) 

731 created.add(str(new)) 

732 counter += 1 

733 

734 found = ResourcePath.findFileResources([ResourcePath(self.makeS3Uri("test/"))]) 

735 self.assertEqual({str(u) for u in found}, created) 

736 

737 # Again with grouping. 

738 found = list(ResourcePath.findFileResources([ResourcePath(self.makeS3Uri("test/"))], grouped=True)) 

739 self.assertEqual(len(found), 2) 

740 dir_1 = list(found[0]) 

741 dir_2 = list(found[1]) 

742 self.assertEqual(len(dir_1), n_dir1) 

743 self.assertEqual(len(dir_2), n_dir2) 

744 

745 def testWrite(self): 

746 s3write = ResourcePath(self.makeS3Uri("created.txt")) 

747 content = "abcdefghijklmnopqrstuv\n" 

748 s3write.write(content.encode()) 

749 self.assertEqual(s3write.read().decode(), content) 

750 

751 def testTemporary(self): 

752 s3root = ResourcePath(self.makeS3Uri("rootdir"), forceDirectory=True) 

753 with ResourcePath.temporary_uri(prefix=s3root, suffix=".json") as tmp: 

754 self.assertEqual(tmp.getExtension(), ".json", f"uri: {tmp}") 

755 self.assertEqual(tmp.scheme, "s3", f"uri: {tmp}") 

756 self.assertEqual(tmp.parent(), s3root) 

757 basename = tmp.basename() 

758 content = "abcd" 

759 tmp.write(content.encode()) 

760 self.assertTrue(tmp.exists(), f"uri: {tmp}") 

761 self.assertFalse(tmp.exists()) 

762 

763 # Again without writing anything, to check that there is no complaint 

764 # on exit of context manager. 

765 with ResourcePath.temporary_uri(prefix=s3root, suffix=".json") as tmp: 

766 self.assertFalse(tmp.exists()) 

767 # Check that the file has a different name than before. 

768 self.assertNotEqual(tmp.basename(), basename, f"uri: {tmp}") 

769 self.assertFalse(tmp.exists()) 

770 

771 def testRelative(self): 

772 """Check that we can get subpaths back from two URIs""" 

773 parent = ResourcePath(self.makeS3Uri("rootdir"), forceDirectory=True) 

774 child = ResourcePath(self.makeS3Uri("rootdir/dir1/file.txt")) 

775 

776 self.assertEqual(child.relative_to(parent), "dir1/file.txt") 

777 

778 not_child = ResourcePath(self.makeS3Uri("/a/b/dir1/file.txt")) 

779 self.assertFalse(not_child.relative_to(parent)) 

780 

781 not_s3 = ResourcePath(os.path.join(self.tmpdir, "dir1", "file2.txt")) 

782 self.assertFalse(child.relative_to(not_s3)) 

783 

784 def testQuoting(self): 

785 """Check that quoting works.""" 

786 parent = ResourcePath(self.makeS3Uri("rootdir"), forceDirectory=True) 

787 subpath = "rootdir/dir1+/file?.txt" 

788 child = ResourcePath(self.makeS3Uri(urllib.parse.quote(subpath))) 

789 

790 self.assertEqual(child.relative_to(parent), "dir1+/file?.txt") 

791 self.assertEqual(child.basename(), "file?.txt") 

792 self.assertEqual(child.relativeToPathRoot, subpath) 

793 self.assertIn("%", child.path) 

794 self.assertEqual(child.unquoted_path, "/" + subpath) 

795 

796 def test_open(self): 

797 text_uri = ResourcePath(self.makeS3Uri("file.txt")) 

798 _check_open(self, text_uri, mode_suffixes=("", "t")) 

799 _check_open(self, text_uri, mode_suffixes=("t",), encoding="utf-16") 

800 _check_open(self, text_uri, mode_suffixes=("t",), prefer_file_temporary=True) 

801 _check_open(self, text_uri, mode_suffixes=("t",), prefer_file_temporary=True, encoding="utf-16") 

802 binary_uri = ResourcePath(self.makeS3Uri("file.dat")) 

803 _check_open(self, binary_uri, mode_suffixes=("b",)) 

804 _check_open(self, binary_uri, mode_suffixes=("b",), prefer_file_temporary=True) 

805 

806 

807# Mock required environment variables during tests 

808@unittest.mock.patch.dict( 

809 os.environ, 

810 { 

811 "LSST_BUTLER_WEBDAV_AUTH": "TOKEN", 

812 "LSST_BUTLER_WEBDAV_TOKEN_FILE": os.path.join(TESTDIR, "data/webdav/token"), 

813 "LSST_BUTLER_WEBDAV_CA_BUNDLE": "/path/to/ca/certs", 

814 }, 

815) 

816class WebdavURITestCase(unittest.TestCase): 

817 def setUp(self): 

818 serverRoot = "www.not-exists.orgx" 

819 existingFolderName = "existingFolder" 

820 existingFileName = "existingFile" 

821 notExistingFileName = "notExistingFile" 

822 

823 self.baseURL = ResourcePath(f"https://{serverRoot}", forceDirectory=True) 

824 self.existingFileResourcePath = ResourcePath( 

825 f"https://{serverRoot}/{existingFolderName}/{existingFileName}" 

826 ) 

827 self.notExistingFileResourcePath = ResourcePath( 

828 f"https://{serverRoot}/{existingFolderName}/{notExistingFileName}" 

829 ) 

830 self.existingFolderResourcePath = ResourcePath( 

831 f"https://{serverRoot}/{existingFolderName}", forceDirectory=True 

832 ) 

833 self.notExistingFolderResourcePath = ResourcePath( 

834 f"https://{serverRoot}/{notExistingFileName}", forceDirectory=True 

835 ) 

836 

837 # Need to declare the options 

838 responses.add(responses.OPTIONS, self.baseURL.geturl(), status=200, headers={"DAV": "1,2,3"}) 

839 

840 # Used by HttpResourcePath.exists() 

841 responses.add( 

842 responses.HEAD, 

843 self.existingFileResourcePath.geturl(), 

844 status=200, 

845 headers={"Content-Length": "1024"}, 

846 ) 

847 responses.add(responses.HEAD, self.notExistingFileResourcePath.geturl(), status=404) 

848 

849 # Used by HttpResourcePath.read() 

850 responses.add( 

851 responses.GET, self.existingFileResourcePath.geturl(), status=200, body=str.encode("It works!") 

852 ) 

853 responses.add(responses.GET, self.notExistingFileResourcePath.geturl(), status=404) 

854 

855 # Used by HttpResourcePath.write() 

856 responses.add(responses.PUT, self.existingFileResourcePath.geturl(), status=201) 

857 

858 # Used by HttpResourcePath.transfer_from() 

859 responses.add( 

860 responses.Response( 

861 url=self.existingFileResourcePath.geturl(), 

862 method="COPY", 

863 headers={"Destination": self.existingFileResourcePath.geturl()}, 

864 status=201, 

865 ) 

866 ) 

867 responses.add( 

868 responses.Response( 

869 url=self.existingFileResourcePath.geturl(), 

870 method="COPY", 

871 headers={"Destination": self.notExistingFileResourcePath.geturl()}, 

872 status=201, 

873 ) 

874 ) 

875 responses.add( 

876 responses.Response( 

877 url=self.existingFileResourcePath.geturl(), 

878 method="MOVE", 

879 headers={"Destination": self.notExistingFileResourcePath.geturl()}, 

880 status=201, 

881 ) 

882 ) 

883 

884 # Used by HttpResourcePath.remove() 

885 responses.add(responses.DELETE, self.existingFileResourcePath.geturl(), status=200) 

886 responses.add(responses.DELETE, self.notExistingFileResourcePath.geturl(), status=404) 

887 

888 # Used by HttpResourcePath.mkdir() 

889 responses.add( 

890 responses.HEAD, 

891 self.existingFolderResourcePath.geturl(), 

892 status=200, 

893 headers={"Content-Length": "1024"}, 

894 ) 

895 responses.add(responses.HEAD, self.baseURL.geturl(), status=200, headers={"Content-Length": "1024"}) 

896 responses.add(responses.HEAD, self.notExistingFolderResourcePath.geturl(), status=404) 

897 responses.add( 

898 responses.Response(url=self.notExistingFolderResourcePath.geturl(), method="MKCOL", status=201) 

899 ) 

900 responses.add( 

901 responses.Response(url=self.existingFolderResourcePath.geturl(), method="MKCOL", status=403) 

902 ) 

903 

904 @responses.activate 

905 def testExists(self): 

906 

907 self.assertTrue(self.existingFileResourcePath.exists()) 

908 self.assertFalse(self.notExistingFileResourcePath.exists()) 

909 

910 self.assertEqual(self.existingFileResourcePath.size(), 1024) 

911 with self.assertRaises(FileNotFoundError): 

912 self.notExistingFileResourcePath.size() 

913 

914 @responses.activate 

915 def testRemove(self): 

916 

917 self.assertIsNone(self.existingFileResourcePath.remove()) 

918 with self.assertRaises(FileNotFoundError): 

919 self.notExistingFileResourcePath.remove() 

920 

921 @responses.activate 

922 def testMkdir(self): 

923 

924 # The mock means that we can't check this now exists 

925 self.notExistingFolderResourcePath.mkdir() 

926 

927 # This should do nothing 

928 self.existingFolderResourcePath.mkdir() 

929 

930 with self.assertRaises(ValueError): 

931 self.notExistingFileResourcePath.mkdir() 

932 

933 @responses.activate 

934 def testRead(self): 

935 

936 self.assertEqual(self.existingFileResourcePath.read().decode(), "It works!") 

937 self.assertNotEqual(self.existingFileResourcePath.read().decode(), "Nope.") 

938 with self.assertRaises(FileNotFoundError): 

939 self.notExistingFileResourcePath.read() 

940 

941 @responses.activate 

942 def testWrite(self): 

943 

944 self.assertIsNone(self.existingFileResourcePath.write(data=str.encode("Some content."))) 

945 with self.assertRaises(FileExistsError): 

946 self.existingFileResourcePath.write(data=str.encode("Some content."), overwrite=False) 

947 

948 @responses.activate 

949 def testTransfer(self): 

950 

951 self.assertIsNone(self.notExistingFileResourcePath.transfer_from(src=self.existingFileResourcePath)) 

952 self.assertIsNone( 

953 self.notExistingFileResourcePath.transfer_from(src=self.existingFileResourcePath, transfer="move") 

954 ) 

955 with self.assertRaises(FileExistsError): 

956 self.existingFileResourcePath.transfer_from(src=self.existingFileResourcePath) 

957 with self.assertRaises(ValueError): 

958 self.notExistingFileResourcePath.transfer_from( 

959 src=self.existingFileResourcePath, transfer="unsupported" 

960 ) 

961 

962 def testParent(self): 

963 

964 self.assertEqual( 

965 self.existingFolderResourcePath.geturl(), self.notExistingFileResourcePath.parent().geturl() 

966 ) 

967 self.assertEqual(self.baseURL.geturl(), self.baseURL.parent().geturl()) 

968 self.assertEqual( 

969 self.existingFileResourcePath.parent().geturl(), self.existingFileResourcePath.dirname().geturl() 

970 ) 

971 

972 

973if __name__ == "__main__": 973 ↛ 974line 973 didn't jump to line 974, because the condition on line 973 was never true

974 unittest.main()