Coverage for tests/test_uri.py: 12%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

518 statements  

1# This file is part of lsst-resources. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# Use of this source code is governed by a 3-clause BSD-style 

10# license that can be found in the LICENSE file. 

11 

12import glob 

13import os 

14import pathlib 

15import shutil 

16import unittest 

17import urllib.parse 

18 

19import responses 

20 

21try: 

22 import boto3 

23 import botocore 

24 from moto import mock_s3 

25except ImportError: 

26 boto3 = None 

27 

28 def mock_s3(cls): 

29 """A no-op decorator in case moto mock_s3 can not be imported.""" 

30 return cls 

31 

32 

33from lsst.resources import ResourcePath 

34from lsst.resources.s3utils import setAwsEnvCredentials, unsetAwsEnvCredentials 

35from lsst.resources.utils import makeTestTempDir, removeTestTempDir 

36 

37TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

38 

39 

40class FileURITestCase(unittest.TestCase): 

41 """Concrete tests for local files.""" 

42 

43 def setUp(self): 

44 # Use a local tempdir because on macOS the temp dirs use symlinks 

45 # so relsymlink gets quite confused. 

46 self.tmpdir = makeTestTempDir(TESTDIR) 

47 

48 def tearDown(self): 

49 removeTestTempDir(self.tmpdir) 

50 

51 def testFile(self): 

52 file = os.path.join(self.tmpdir, "test.txt") 

53 uri = ResourcePath(file) 

54 self.assertFalse(uri.exists(), f"{uri} should not exist") 

55 self.assertEqual(uri.ospath, file) 

56 

57 path = pathlib.Path(file) 

58 uri = ResourcePath(path) 

59 self.assertEqual(uri.ospath, file) 

60 

61 content = "abcdefghijklmnopqrstuv\n" 

62 uri.write(content.encode()) 

63 self.assertTrue(os.path.exists(file), "File should exist locally") 

64 self.assertTrue(uri.exists(), f"{uri} should now exist") 

65 self.assertEqual(uri.read().decode(), content) 

66 self.assertEqual(uri.size(), len(content.encode())) 

67 

68 with self.assertRaises(FileNotFoundError): 

69 ResourcePath("file/not/there.txt").size() 

70 

71 # Check that creating a URI from a URI returns the same thing 

72 uri2 = ResourcePath(uri) 

73 self.assertEqual(uri, uri2) 

74 self.assertEqual(id(uri), id(uri2)) 

75 

76 with self.assertRaises(ValueError): 

77 # Scheme-less URIs are not allowed to support non-file roots 

78 # at the present time. This may change in the future to become 

79 # equivalent to ResourcePath.join() 

80 ResourcePath("a/b.txt", root=ResourcePath("s3://bucket/a/b/")) 

81 

82 def testExtension(self): 

83 file = ResourcePath(os.path.join(self.tmpdir, "test.txt")) 

84 self.assertEqual(file.updatedExtension(None), file) 

85 self.assertEqual(file.updatedExtension(".txt"), file) 

86 self.assertEqual(id(file.updatedExtension(".txt")), id(file)) 

87 

88 fits = file.updatedExtension(".fits.gz") 

89 self.assertEqual(fits.basename(), "test.fits.gz") 

90 self.assertEqual(fits.updatedExtension(".jpeg").basename(), "test.jpeg") 

91 

92 def testRelative(self): 

93 """Check that we can get subpaths back from two URIs""" 

94 parent = ResourcePath(self.tmpdir, forceDirectory=True, forceAbsolute=True) 

95 self.assertTrue(parent.isdir()) 

96 child = ResourcePath(os.path.join(self.tmpdir, "dir1", "file.txt"), forceAbsolute=True) 

97 

98 self.assertEqual(child.relative_to(parent), "dir1/file.txt") 

99 

100 not_child = ResourcePath("/a/b/dir1/file.txt") 

101 self.assertIsNone(not_child.relative_to(parent)) 

102 self.assertFalse(not_child.isdir()) 

103 

104 not_directory = ResourcePath(os.path.join(self.tmpdir, "dir1", "file2.txt")) 

105 self.assertIsNone(child.relative_to(not_directory)) 

106 

107 # Relative URIs 

108 parent = ResourcePath("a/b/", forceAbsolute=False) 

109 child = ResourcePath("a/b/c/d.txt", forceAbsolute=False) 

110 self.assertFalse(child.scheme) 

111 self.assertEqual(child.relative_to(parent), "c/d.txt") 

112 

113 # File URI and schemeless URI 

114 parent = ResourcePath("file:/a/b/c/") 

115 child = ResourcePath("e/f/g.txt", forceAbsolute=False) 

116 

117 # If the child is relative and the parent is absolute we assume 

118 # that the child is a child of the parent unless it uses ".." 

119 self.assertEqual(child.relative_to(parent), "e/f/g.txt") 

120 

121 child = ResourcePath("../e/f/g.txt", forceAbsolute=False) 

122 self.assertIsNone(child.relative_to(parent)) 

123 

124 child = ResourcePath("../c/e/f/g.txt", forceAbsolute=False) 

125 self.assertEqual(child.relative_to(parent), "e/f/g.txt") 

126 

127 # Test non-file root with relative path. 

128 child = ResourcePath("e/f/g.txt", forceAbsolute=False) 

129 parent = ResourcePath("s3://hello/a/b/c/") 

130 self.assertEqual(child.relative_to(parent), "e/f/g.txt") 

131 

132 # Test with different netloc 

133 child = ResourcePath("http://my.host/a/b/c.txt") 

134 parent = ResourcePath("http://other.host/a/") 

135 self.assertIsNone(child.relative_to(parent), f"{child}.relative_to({parent})") 

136 

137 # Schemeless absolute child. 

138 # Schemeless absolute URI is constructed using root= parameter. 

139 parent = ResourcePath("file:///a/b/c/") 

140 child = ResourcePath("d/e.txt", root=parent) 

141 self.assertEqual(child.relative_to(parent), "d/e.txt", f"{child}.relative_to({parent})") 

142 

143 parent = ResourcePath("c/", root="/a/b/") 

144 self.assertEqual(child.relative_to(parent), "d/e.txt", f"{child}.relative_to({parent})") 

145 

146 # Absolute schemeless child with relative parent will always fail. 

147 parent = ResourcePath("d/e.txt", forceAbsolute=False) 

148 self.assertIsNone(child.relative_to(parent), f"{child}.relative_to({parent})") 

149 

150 def testParents(self): 

151 """Test of splitting and parent walking.""" 

152 parent = ResourcePath(self.tmpdir, forceDirectory=True, forceAbsolute=True) 

153 child_file = parent.join("subdir/file.txt") 

154 self.assertFalse(child_file.isdir()) 

155 child_subdir, file = child_file.split() 

156 self.assertEqual(file, "file.txt") 

157 self.assertTrue(child_subdir.isdir()) 

158 self.assertEqual(child_file.dirname(), child_subdir) 

159 self.assertEqual(child_file.basename(), file) 

160 self.assertEqual(child_file.parent(), child_subdir) 

161 derived_parent = child_subdir.parent() 

162 self.assertEqual(derived_parent, parent) 

163 self.assertTrue(derived_parent.isdir()) 

164 self.assertEqual(child_file.parent().parent(), parent) 

165 

166 def testEnvVar(self): 

167 """Test that environment variables are expanded.""" 

168 

169 with unittest.mock.patch.dict(os.environ, {"MY_TEST_DIR": "/a/b/c"}): 

170 uri = ResourcePath("${MY_TEST_DIR}/d.txt") 

171 self.assertEqual(uri.path, "/a/b/c/d.txt") 

172 self.assertEqual(uri.scheme, "file") 

173 

174 # This will not expand 

175 uri = ResourcePath("${MY_TEST_DIR}/d.txt", forceAbsolute=False) 

176 self.assertEqual(uri.path, "${MY_TEST_DIR}/d.txt") 

177 self.assertFalse(uri.scheme) 

178 

179 def testMkdir(self): 

180 tmpdir = ResourcePath(self.tmpdir) 

181 newdir = tmpdir.join("newdir/seconddir") 

182 newdir.mkdir() 

183 self.assertTrue(newdir.exists()) 

184 newfile = newdir.join("temp.txt") 

185 newfile.write("Data".encode()) 

186 self.assertTrue(newfile.exists()) 

187 

188 def testTransfer(self): 

189 src = ResourcePath(os.path.join(self.tmpdir, "test.txt")) 

190 content = "Content is some content\nwith something to say\n\n" 

191 src.write(content.encode()) 

192 

193 for mode in ("copy", "link", "hardlink", "symlink", "relsymlink"): 

194 dest = ResourcePath(os.path.join(self.tmpdir, f"dest_{mode}.txt")) 

195 dest.transfer_from(src, transfer=mode) 

196 self.assertTrue(dest.exists(), f"Check that {dest} exists (transfer={mode})") 

197 

198 with open(dest.ospath, "r") as fh: 

199 new_content = fh.read() 

200 self.assertEqual(new_content, content) 

201 

202 if mode in ("symlink", "relsymlink"): 

203 self.assertTrue(os.path.islink(dest.ospath), f"Check that {dest} is symlink") 

204 

205 # If the source and destination are hardlinks of each other 

206 # the transfer should work even if overwrite=False. 

207 if mode in ("link", "hardlink"): 

208 dest.transfer_from(src, transfer=mode) 

209 else: 

210 with self.assertRaises( 

211 FileExistsError, msg=f"Overwrite of {dest} should not be allowed ({mode})" 

212 ): 

213 dest.transfer_from(src, transfer=mode) 

214 

215 dest.transfer_from(src, transfer=mode, overwrite=True) 

216 

217 os.remove(dest.ospath) 

218 

219 b = src.read() 

220 self.assertEqual(b.decode(), new_content) 

221 

222 nbytes = 10 

223 subset = src.read(size=nbytes) 

224 self.assertEqual(len(subset), nbytes) 

225 self.assertEqual(subset.decode(), content[:nbytes]) 

226 

227 with self.assertRaises(ValueError): 

228 src.transfer_from(src, transfer="unknown") 

229 

230 def testTransferIdentical(self): 

231 """Test overwrite of identical files.""" 

232 dir1 = ResourcePath(os.path.join(self.tmpdir, "dir1"), forceDirectory=True) 

233 dir1.mkdir() 

234 dir2 = os.path.join(self.tmpdir, "dir2") 

235 os.symlink(dir1.ospath, dir2) 

236 

237 # Write a test file. 

238 src_file = dir1.join("test.txt") 

239 content = "0123456" 

240 src_file.write(content.encode()) 

241 

242 # Construct URI to destination that should be identical. 

243 dest_file = ResourcePath(os.path.join(dir2), forceDirectory=True).join("test.txt") 

244 self.assertTrue(dest_file.exists()) 

245 self.assertNotEqual(src_file, dest_file) 

246 

247 # Transfer it over itself. 

248 dest_file.transfer_from(src_file, transfer="symlink", overwrite=True) 

249 new_content = dest_file.read().decode() 

250 self.assertEqual(content, new_content) 

251 

252 def testResource(self): 

253 # No resources in this package so need a resource in the main 

254 # python distribution. 

255 u = ResourcePath("resource://idlelib/Icons/README.txt") 

256 self.assertTrue(u.exists(), f"Check {u} exists") 

257 

258 content = u.read().decode() 

259 self.assertIn("IDLE", content) 

260 

261 truncated = u.read(size=9).decode() 

262 self.assertEqual(truncated, content[:9]) 

263 

264 d = ResourcePath("resource://idlelib/Icons", forceDirectory=True) 

265 self.assertTrue(u.exists(), f"Check directory {d} exists") 

266 

267 j = d.join("README.txt") 

268 self.assertEqual(u, j) 

269 self.assertFalse(j.dirLike) 

270 self.assertFalse(j.isdir()) 

271 not_there = d.join("not-there.yaml") 

272 self.assertFalse(not_there.exists()) 

273 

274 bad = ResourcePath("resource://bad.module/not.yaml") 

275 multi = ResourcePath.mexists([u, bad, not_there]) 

276 self.assertTrue(multi[u]) 

277 self.assertFalse(multi[bad]) 

278 self.assertFalse(multi[not_there]) 

279 

280 def testEscapes(self): 

281 """Special characters in file paths""" 

282 src = ResourcePath("bbb/???/test.txt", root=self.tmpdir, forceAbsolute=True) 

283 self.assertFalse(src.scheme) 

284 src.write(b"Some content") 

285 self.assertTrue(src.exists()) 

286 

287 # abspath always returns a file scheme 

288 file = src.abspath() 

289 self.assertTrue(file.exists()) 

290 self.assertIn("???", file.ospath) 

291 self.assertNotIn("???", file.path) 

292 

293 file = file.updatedFile("tests??.txt") 

294 self.assertNotIn("??.txt", file.path) 

295 file.write(b"Other content") 

296 self.assertEqual(file.read(), b"Other content") 

297 

298 src = src.updatedFile("tests??.txt") 

299 self.assertIn("??.txt", src.path) 

300 self.assertEqual(file.read(), src.read(), f"reading from {file.ospath} and {src.ospath}") 

301 

302 # File URI and schemeless URI 

303 parent = ResourcePath("file:" + urllib.parse.quote("/a/b/c/de/??/")) 

304 child = ResourcePath("e/f/g.txt", forceAbsolute=False) 

305 self.assertEqual(child.relative_to(parent), "e/f/g.txt") 

306 

307 child = ResourcePath("e/f??#/g.txt", forceAbsolute=False) 

308 self.assertEqual(child.relative_to(parent), "e/f??#/g.txt") 

309 

310 child = ResourcePath("file:" + urllib.parse.quote("/a/b/c/de/??/e/f??#/g.txt")) 

311 self.assertEqual(child.relative_to(parent), "e/f??#/g.txt") 

312 

313 self.assertEqual(child.relativeToPathRoot, "a/b/c/de/??/e/f??#/g.txt") 

314 

315 # Schemeless so should not quote 

316 dir = ResourcePath("bbb/???/", root=self.tmpdir, forceAbsolute=True, forceDirectory=True) 

317 self.assertIn("???", dir.ospath) 

318 self.assertIn("???", dir.path) 

319 self.assertFalse(dir.scheme) 

320 

321 # dir.join() morphs into a file scheme 

322 new = dir.join("test_j.txt") 

323 self.assertIn("???", new.ospath, f"Checking {new}") 

324 new.write(b"Content") 

325 

326 new2name = "###/test??.txt" 

327 new2 = dir.join(new2name) 

328 self.assertIn("???", new2.ospath) 

329 new2.write(b"Content") 

330 self.assertTrue(new2.ospath.endswith(new2name)) 

331 self.assertEqual(new.read(), new2.read()) 

332 

333 fdir = dir.abspath() 

334 self.assertNotIn("???", fdir.path) 

335 self.assertIn("???", fdir.ospath) 

336 self.assertEqual(fdir.scheme, "file") 

337 fnew = dir.join("test_jf.txt") 

338 fnew.write(b"Content") 

339 

340 fnew2 = fdir.join(new2name) 

341 fnew2.write(b"Content") 

342 self.assertTrue(fnew2.ospath.endswith(new2name)) 

343 self.assertNotIn("###", fnew2.path) 

344 

345 self.assertEqual(fnew.read(), fnew2.read()) 

346 

347 # Test that children relative to schemeless and file schemes 

348 # still return the same unquoted name 

349 self.assertEqual(fnew2.relative_to(fdir), new2name, f"{fnew2}.relative_to({fdir})") 

350 self.assertEqual(fnew2.relative_to(dir), new2name, f"{fnew2}.relative_to({dir})") 

351 self.assertEqual(new2.relative_to(fdir), new2name, f"{new2}.relative_to({fdir})") 

352 self.assertEqual(new2.relative_to(dir), new2name, f"{new2}.relative_to({dir})") 

353 

354 # Check for double quoting 

355 plus_path = "/a/b/c+d/" 

356 with self.assertLogs(level="WARNING"): 

357 uri = ResourcePath(urllib.parse.quote(plus_path), forceDirectory=True) 

358 self.assertEqual(uri.ospath, plus_path) 

359 

360 # Check that # is not escaped for schemeless URIs 

361 hash_path = "/a/b#/c&d#xyz" 

362 hpos = hash_path.rfind("#") 

363 uri = ResourcePath(hash_path) 

364 self.assertEqual(uri.ospath, hash_path[:hpos]) 

365 self.assertEqual(uri.fragment, hash_path[hpos + 1 :]) 

366 

367 def testHash(self): 

368 """Test that we can store URIs in sets and as keys.""" 

369 uri1 = ResourcePath(TESTDIR) 

370 uri2 = uri1.join("test/") 

371 s = {uri1, uri2} 

372 self.assertIn(uri1, s) 

373 

374 d = {uri1: "1", uri2: "2"} 

375 self.assertEqual(d[uri2], "2") 

376 

377 def testWalk(self): 

378 """Test ResourcePath.walk().""" 

379 test_dir_uri = ResourcePath(TESTDIR) 

380 

381 # Look for a file that is not there 

382 file = test_dir_uri.join("config/basic/butler.yaml") 

383 found = list(ResourcePath.findFileResources([file])) 

384 self.assertEqual(found[0], file) 

385 

386 # Compare against the full local paths 

387 expected = set( 

388 p for p in glob.glob(os.path.join(TESTDIR, "data", "**"), recursive=True) if os.path.isfile(p) 

389 ) 

390 found = set(u.ospath for u in ResourcePath.findFileResources([test_dir_uri.join("data")])) 

391 self.assertEqual(found, expected) 

392 

393 # Now solely the YAML files 

394 expected_yaml = set(glob.glob(os.path.join(TESTDIR, "data", "**", "*.yaml"), recursive=True)) 

395 found = set( 

396 u.ospath 

397 for u in ResourcePath.findFileResources([test_dir_uri.join("data")], file_filter=r".*\.yaml$") 

398 ) 

399 self.assertEqual(found, expected_yaml) 

400 

401 # Now two explicit directories and a file 

402 expected = set(glob.glob(os.path.join(TESTDIR, "data", "dir1", "*.yaml"), recursive=True)) 

403 expected.update(set(glob.glob(os.path.join(TESTDIR, "data", "dir2", "*.yaml"), recursive=True))) 

404 expected.add(file.ospath) 

405 

406 found = set( 

407 u.ospath 

408 for u in ResourcePath.findFileResources( 

409 [file, test_dir_uri.join("data/dir1"), test_dir_uri.join("data/dir2")], 

410 file_filter=r".*\.yaml$", 

411 ) 

412 ) 

413 self.assertEqual(found, expected) 

414 

415 # Group by directory -- find everything and compare it with what 

416 # we expected to be there in total. 

417 found_yaml = set() 

418 counter = 0 

419 for uris in ResourcePath.findFileResources( 

420 [file, test_dir_uri.join("data/")], file_filter=r".*\.yaml$", grouped=True 

421 ): 

422 found = set(u.ospath for u in uris) 

423 if found: 

424 counter += 1 

425 

426 found_yaml.update(found) 

427 

428 expected_yaml_2 = expected_yaml 

429 expected_yaml_2.add(file.ospath) 

430 self.assertEqual(found_yaml, expected_yaml) 

431 self.assertEqual(counter, 3) 

432 

433 # Grouping but check that single files are returned in a single group 

434 # at the end 

435 file2 = test_dir_uri.join("config/templates/templates-bad.yaml") 

436 found = list( 

437 ResourcePath.findFileResources([file, file2, test_dir_uri.join("data/dir2")], grouped=True) 

438 ) 

439 self.assertEqual(len(found), 2) 

440 self.assertEqual(list(found[1]), [file, file2]) 

441 

442 with self.assertRaises(ValueError): 

443 list(file.walk()) 

444 

445 def testRootURI(self): 

446 """Test ResourcePath.root_uri().""" 

447 uri = ResourcePath("https://www.notexist.com:8080/file/test") 

448 uri2 = ResourcePath("s3://www.notexist.com/file/test") 

449 self.assertEqual(uri.root_uri().geturl(), "https://www.notexist.com:8080/") 

450 self.assertEqual(uri2.root_uri().geturl(), "s3://www.notexist.com/") 

451 

452 def testJoin(self): 

453 """Test .join method.""" 

454 

455 root_str = "s3://bucket/hsc/payload/" 

456 root = ResourcePath(root_str) 

457 

458 self.assertEqual(root.join("b/test.txt").geturl(), f"{root_str}b/test.txt") 

459 add_dir = root.join("b/c/d/") 

460 self.assertTrue(add_dir.isdir()) 

461 self.assertEqual(add_dir.geturl(), f"{root_str}b/c/d/") 

462 

463 quote_example = "b&c.t@x#t" 

464 needs_quote = root.join(quote_example) 

465 self.assertEqual(needs_quote.unquoted_path, f"/hsc/payload/{quote_example}") 

466 

467 other = ResourcePath("file://localhost/test.txt") 

468 self.assertEqual(root.join(other), other) 

469 self.assertEqual(other.join("b/new.txt").geturl(), "file://localhost/b/new.txt") 

470 

471 joined = ResourcePath("s3://bucket/hsc/payload/").join( 

472 ResourcePath("test.qgraph", forceAbsolute=False) 

473 ) 

474 self.assertEqual(joined, ResourcePath("s3://bucket/hsc/payload/test.qgraph")) 

475 

476 with self.assertRaises(ValueError): 

477 ResourcePath("s3://bucket/hsc/payload/").join(ResourcePath("test.qgraph")) 

478 

479 def testTemporary(self): 

480 with ResourcePath.temporary_uri(suffix=".json") as tmp: 

481 self.assertEqual(tmp.getExtension(), ".json", f"uri: {tmp}") 

482 self.assertTrue(tmp.isabs(), f"uri: {tmp}") 

483 self.assertFalse(tmp.exists(), f"uri: {tmp}") 

484 tmp.write(b"abcd") 

485 self.assertTrue(tmp.exists(), f"uri: {tmp}") 

486 self.assertTrue(tmp.isTemporary) 

487 self.assertFalse(tmp.exists(), f"uri: {tmp}") 

488 

489 tmpdir = ResourcePath(self.tmpdir, forceDirectory=True) 

490 with ResourcePath.temporary_uri(prefix=tmpdir, suffix=".yaml") as tmp: 

491 # Use a specified tmpdir and check it is okay for the file 

492 # to not be created. 

493 self.assertFalse(tmp.exists(), f"uri: {tmp}") 

494 self.assertTrue(tmpdir.exists(), f"uri: {tmpdir} still exists") 

495 

496 

497@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!") 

498@mock_s3 

499class S3URITestCase(unittest.TestCase): 

500 """Tests involving S3""" 

501 

502 bucketName = "any_bucket" 

503 """Bucket name to use in tests""" 

504 

505 def setUp(self): 

506 # Local test directory 

507 self.tmpdir = makeTestTempDir(TESTDIR) 

508 

509 # set up some fake credentials if they do not exist 

510 self.usingDummyCredentials = setAwsEnvCredentials() 

511 

512 # MOTO needs to know that we expect Bucket bucketname to exist 

513 s3 = boto3.resource("s3") 

514 s3.create_bucket(Bucket=self.bucketName) 

515 

516 def tearDown(self): 

517 s3 = boto3.resource("s3") 

518 bucket = s3.Bucket(self.bucketName) 

519 try: 

520 bucket.objects.all().delete() 

521 except botocore.exceptions.ClientError as e: 

522 if e.response["Error"]["Code"] == "404": 

523 # the key was not reachable - pass 

524 pass 

525 else: 

526 raise 

527 

528 bucket = s3.Bucket(self.bucketName) 

529 bucket.delete() 

530 

531 # unset any potentially set dummy credentials 

532 if self.usingDummyCredentials: 

533 unsetAwsEnvCredentials() 

534 

535 shutil.rmtree(self.tmpdir, ignore_errors=True) 

536 

537 def makeS3Uri(self, path): 

538 return f"s3://{self.bucketName}/{path}" 

539 

540 def testTransfer(self): 

541 src = ResourcePath(os.path.join(self.tmpdir, "test.txt")) 

542 content = "Content is some content\nwith something to say\n\n" 

543 src.write(content.encode()) 

544 self.assertTrue(src.exists()) 

545 self.assertEqual(src.size(), len(content.encode())) 

546 

547 dest = ResourcePath(self.makeS3Uri("test.txt")) 

548 self.assertFalse(dest.exists()) 

549 

550 with self.assertRaises(FileNotFoundError): 

551 dest.size() 

552 

553 dest.transfer_from(src, transfer="copy") 

554 self.assertTrue(dest.exists()) 

555 

556 dest2 = ResourcePath(self.makeS3Uri("copied.txt")) 

557 dest2.transfer_from(dest, transfer="copy") 

558 self.assertTrue(dest2.exists()) 

559 

560 local = ResourcePath(os.path.join(self.tmpdir, "copied.txt")) 

561 local.transfer_from(dest2, transfer="copy") 

562 with open(local.ospath, "r") as fd: 

563 new_content = fd.read() 

564 self.assertEqual(new_content, content) 

565 

566 with self.assertRaises(ValueError): 

567 dest2.transfer_from(local, transfer="symlink") 

568 

569 b = dest.read() 

570 self.assertEqual(b.decode(), new_content) 

571 

572 nbytes = 10 

573 subset = dest.read(size=nbytes) 

574 self.assertEqual(len(subset), nbytes) # Extra byte comes back 

575 self.assertEqual(subset.decode(), content[:nbytes]) 

576 

577 with self.assertRaises(FileExistsError): 

578 dest.transfer_from(src, transfer="copy") 

579 

580 dest.transfer_from(src, transfer="copy", overwrite=True) 

581 

582 def testWalk(self): 

583 """Test that we can list an S3 bucket""" 

584 # Files we want to create 

585 expected = ("a/x.txt", "a/y.txt", "a/z.json", "a/b/w.txt", "a/b/c/d/v.json") 

586 expected_uris = [ResourcePath(self.makeS3Uri(path)) for path in expected] 

587 for uri in expected_uris: 

588 # Doesn't matter what we write 

589 uri.write("123".encode()) 

590 

591 # Find all the files in the a/ tree 

592 found = set(uri.path for uri in ResourcePath.findFileResources([ResourcePath(self.makeS3Uri("a/"))])) 

593 self.assertEqual(found, {uri.path for uri in expected_uris}) 

594 

595 # Find all the files in the a/ tree but group by folder 

596 found = ResourcePath.findFileResources([ResourcePath(self.makeS3Uri("a/"))], grouped=True) 

597 expected = (("/a/x.txt", "/a/y.txt", "/a/z.json"), ("/a/b/w.txt",), ("/a/b/c/d/v.json",)) 

598 

599 for got, expect in zip(found, expected): 

600 self.assertEqual(tuple(u.path for u in got), expect) 

601 

602 # Find only JSON files 

603 found = set( 

604 uri.path 

605 for uri in ResourcePath.findFileResources( 

606 [ResourcePath(self.makeS3Uri("a/"))], file_filter=r"\.json$" 

607 ) 

608 ) 

609 self.assertEqual(found, {uri.path for uri in expected_uris if uri.path.endswith(".json")}) 

610 

611 # JSON files grouped by directory 

612 found = ResourcePath.findFileResources( 

613 [ResourcePath(self.makeS3Uri("a/"))], file_filter=r"\.json$", grouped=True 

614 ) 

615 expected = (("/a/z.json",), ("/a/b/c/d/v.json",)) 

616 

617 for got, expect in zip(found, expected): 

618 self.assertEqual(tuple(u.path for u in got), expect) 

619 

620 # Check pagination works with large numbers of files. S3 API limits 

621 # us to 1000 response per list_objects call so create lots of files 

622 created = set() 

623 counter = 1 

624 n_dir1 = 1100 

625 while counter <= n_dir1: 

626 new = ResourcePath(self.makeS3Uri(f"test/file{counter:04d}.txt")) 

627 new.write(f"{counter}".encode()) 

628 created.add(str(new)) 

629 counter += 1 

630 counter = 1 

631 # Put some in a subdirectory to make sure we are looking in a 

632 # hierarchy. 

633 n_dir2 = 100 

634 while counter <= n_dir2: 

635 new = ResourcePath(self.makeS3Uri(f"test/subdir/file{counter:04d}.txt")) 

636 new.write(f"{counter}".encode()) 

637 created.add(str(new)) 

638 counter += 1 

639 

640 found = ResourcePath.findFileResources([ResourcePath(self.makeS3Uri("test/"))]) 

641 self.assertEqual({str(u) for u in found}, created) 

642 

643 # Again with grouping. 

644 found = list(ResourcePath.findFileResources([ResourcePath(self.makeS3Uri("test/"))], grouped=True)) 

645 self.assertEqual(len(found), 2) 

646 dir_1 = list(found[0]) 

647 dir_2 = list(found[1]) 

648 self.assertEqual(len(dir_1), n_dir1) 

649 self.assertEqual(len(dir_2), n_dir2) 

650 

651 def testWrite(self): 

652 s3write = ResourcePath(self.makeS3Uri("created.txt")) 

653 content = "abcdefghijklmnopqrstuv\n" 

654 s3write.write(content.encode()) 

655 self.assertEqual(s3write.read().decode(), content) 

656 

657 def testTemporary(self): 

658 s3root = ResourcePath(self.makeS3Uri("rootdir"), forceDirectory=True) 

659 with ResourcePath.temporary_uri(prefix=s3root, suffix=".json") as tmp: 

660 self.assertEqual(tmp.getExtension(), ".json", f"uri: {tmp}") 

661 self.assertEqual(tmp.scheme, "s3", f"uri: {tmp}") 

662 self.assertEqual(tmp.parent(), s3root) 

663 basename = tmp.basename() 

664 content = "abcd" 

665 tmp.write(content.encode()) 

666 self.assertTrue(tmp.exists(), f"uri: {tmp}") 

667 self.assertFalse(tmp.exists()) 

668 

669 # Again without writing anything, to check that there is no complaint 

670 # on exit of context manager. 

671 with ResourcePath.temporary_uri(prefix=s3root, suffix=".json") as tmp: 

672 self.assertFalse(tmp.exists()) 

673 # Check that the file has a different name than before. 

674 self.assertNotEqual(tmp.basename(), basename, f"uri: {tmp}") 

675 self.assertFalse(tmp.exists()) 

676 

677 def testRelative(self): 

678 """Check that we can get subpaths back from two URIs""" 

679 parent = ResourcePath(self.makeS3Uri("rootdir"), forceDirectory=True) 

680 child = ResourcePath(self.makeS3Uri("rootdir/dir1/file.txt")) 

681 

682 self.assertEqual(child.relative_to(parent), "dir1/file.txt") 

683 

684 not_child = ResourcePath(self.makeS3Uri("/a/b/dir1/file.txt")) 

685 self.assertFalse(not_child.relative_to(parent)) 

686 

687 not_s3 = ResourcePath(os.path.join(self.tmpdir, "dir1", "file2.txt")) 

688 self.assertFalse(child.relative_to(not_s3)) 

689 

690 def testQuoting(self): 

691 """Check that quoting works.""" 

692 parent = ResourcePath(self.makeS3Uri("rootdir"), forceDirectory=True) 

693 subpath = "rootdir/dir1+/file?.txt" 

694 child = ResourcePath(self.makeS3Uri(urllib.parse.quote(subpath))) 

695 

696 self.assertEqual(child.relative_to(parent), "dir1+/file?.txt") 

697 self.assertEqual(child.basename(), "file?.txt") 

698 self.assertEqual(child.relativeToPathRoot, subpath) 

699 self.assertIn("%", child.path) 

700 self.assertEqual(child.unquoted_path, "/" + subpath) 

701 

702 

703# Mock required environment variables during tests 

704@unittest.mock.patch.dict( 

705 os.environ, 

706 { 

707 "LSST_BUTLER_WEBDAV_AUTH": "TOKEN", 

708 "LSST_BUTLER_WEBDAV_TOKEN_FILE": os.path.join(TESTDIR, "data/webdav/token"), 

709 "LSST_BUTLER_WEBDAV_CA_BUNDLE": "/path/to/ca/certs", 

710 }, 

711) 

712class WebdavURITestCase(unittest.TestCase): 

713 def setUp(self): 

714 serverRoot = "www.not-exists.orgx" 

715 existingFolderName = "existingFolder" 

716 existingFileName = "existingFile" 

717 notExistingFileName = "notExistingFile" 

718 

719 self.baseURL = ResourcePath(f"https://{serverRoot}", forceDirectory=True) 

720 self.existingFileResourcePath = ResourcePath( 

721 f"https://{serverRoot}/{existingFolderName}/{existingFileName}" 

722 ) 

723 self.notExistingFileResourcePath = ResourcePath( 

724 f"https://{serverRoot}/{existingFolderName}/{notExistingFileName}" 

725 ) 

726 self.existingFolderResourcePath = ResourcePath( 

727 f"https://{serverRoot}/{existingFolderName}", forceDirectory=True 

728 ) 

729 self.notExistingFolderResourcePath = ResourcePath( 

730 f"https://{serverRoot}/{notExistingFileName}", forceDirectory=True 

731 ) 

732 

733 # Need to declare the options 

734 responses.add(responses.OPTIONS, self.baseURL.geturl(), status=200, headers={"DAV": "1,2,3"}) 

735 

736 # Used by HttpResourcePath.exists() 

737 responses.add( 

738 responses.HEAD, 

739 self.existingFileResourcePath.geturl(), 

740 status=200, 

741 headers={"Content-Length": "1024"}, 

742 ) 

743 responses.add(responses.HEAD, self.notExistingFileResourcePath.geturl(), status=404) 

744 

745 # Used by HttpResourcePath.read() 

746 responses.add( 

747 responses.GET, self.existingFileResourcePath.geturl(), status=200, body=str.encode("It works!") 

748 ) 

749 responses.add(responses.GET, self.notExistingFileResourcePath.geturl(), status=404) 

750 

751 # Used by HttpResourcePath.write() 

752 responses.add(responses.PUT, self.existingFileResourcePath.geturl(), status=201) 

753 

754 # Used by HttpResourcePath.transfer_from() 

755 responses.add( 

756 responses.Response( 

757 url=self.existingFileResourcePath.geturl(), 

758 method="COPY", 

759 headers={"Destination": self.existingFileResourcePath.geturl()}, 

760 status=201, 

761 ) 

762 ) 

763 responses.add( 

764 responses.Response( 

765 url=self.existingFileResourcePath.geturl(), 

766 method="COPY", 

767 headers={"Destination": self.notExistingFileResourcePath.geturl()}, 

768 status=201, 

769 ) 

770 ) 

771 responses.add( 

772 responses.Response( 

773 url=self.existingFileResourcePath.geturl(), 

774 method="MOVE", 

775 headers={"Destination": self.notExistingFileResourcePath.geturl()}, 

776 status=201, 

777 ) 

778 ) 

779 

780 # Used by HttpResourcePath.remove() 

781 responses.add(responses.DELETE, self.existingFileResourcePath.geturl(), status=200) 

782 responses.add(responses.DELETE, self.notExistingFileResourcePath.geturl(), status=404) 

783 

784 # Used by HttpResourcePath.mkdir() 

785 responses.add( 

786 responses.HEAD, 

787 self.existingFolderResourcePath.geturl(), 

788 status=200, 

789 headers={"Content-Length": "1024"}, 

790 ) 

791 responses.add(responses.HEAD, self.baseURL.geturl(), status=200, headers={"Content-Length": "1024"}) 

792 responses.add(responses.HEAD, self.notExistingFolderResourcePath.geturl(), status=404) 

793 responses.add( 

794 responses.Response(url=self.notExistingFolderResourcePath.geturl(), method="MKCOL", status=201) 

795 ) 

796 responses.add( 

797 responses.Response(url=self.existingFolderResourcePath.geturl(), method="MKCOL", status=403) 

798 ) 

799 

800 @responses.activate 

801 def testExists(self): 

802 

803 self.assertTrue(self.existingFileResourcePath.exists()) 

804 self.assertFalse(self.notExistingFileResourcePath.exists()) 

805 

806 self.assertEqual(self.existingFileResourcePath.size(), 1024) 

807 with self.assertRaises(FileNotFoundError): 

808 self.notExistingFileResourcePath.size() 

809 

810 @responses.activate 

811 def testRemove(self): 

812 

813 self.assertIsNone(self.existingFileResourcePath.remove()) 

814 with self.assertRaises(FileNotFoundError): 

815 self.notExistingFileResourcePath.remove() 

816 

817 @responses.activate 

818 def testMkdir(self): 

819 

820 # The mock means that we can't check this now exists 

821 self.notExistingFolderResourcePath.mkdir() 

822 

823 # This should do nothing 

824 self.existingFolderResourcePath.mkdir() 

825 

826 with self.assertRaises(ValueError): 

827 self.notExistingFileResourcePath.mkdir() 

828 

829 @responses.activate 

830 def testRead(self): 

831 

832 self.assertEqual(self.existingFileResourcePath.read().decode(), "It works!") 

833 self.assertNotEqual(self.existingFileResourcePath.read().decode(), "Nope.") 

834 with self.assertRaises(FileNotFoundError): 

835 self.notExistingFileResourcePath.read() 

836 

837 @responses.activate 

838 def testWrite(self): 

839 

840 self.assertIsNone(self.existingFileResourcePath.write(data=str.encode("Some content."))) 

841 with self.assertRaises(FileExistsError): 

842 self.existingFileResourcePath.write(data=str.encode("Some content."), overwrite=False) 

843 

844 @responses.activate 

845 def testTransfer(self): 

846 

847 self.assertIsNone(self.notExistingFileResourcePath.transfer_from(src=self.existingFileResourcePath)) 

848 self.assertIsNone( 

849 self.notExistingFileResourcePath.transfer_from(src=self.existingFileResourcePath, transfer="move") 

850 ) 

851 with self.assertRaises(FileExistsError): 

852 self.existingFileResourcePath.transfer_from(src=self.existingFileResourcePath) 

853 with self.assertRaises(ValueError): 

854 self.notExistingFileResourcePath.transfer_from( 

855 src=self.existingFileResourcePath, transfer="unsupported" 

856 ) 

857 

858 def testParent(self): 

859 

860 self.assertEqual( 

861 self.existingFolderResourcePath.geturl(), self.notExistingFileResourcePath.parent().geturl() 

862 ) 

863 self.assertEqual(self.baseURL.geturl(), self.baseURL.parent().geturl()) 

864 self.assertEqual( 

865 self.existingFileResourcePath.parent().geturl(), self.existingFileResourcePath.dirname().geturl() 

866 ) 

867 

868 

869if __name__ == "__main__": 869 ↛ 870line 869 didn't jump to line 870, because the condition on line 869 was never true

870 unittest.main()