Coverage for tests/test_uri.py: 12%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

516 statements  

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22import glob 

23import os 

24import shutil 

25import unittest 

26import urllib.parse 

27import responses 

28import pathlib 

29 

30try: 

31 import boto3 

32 import botocore 

33 from moto import mock_s3 

34except ImportError: 

35 boto3 = None 

36 

37 def mock_s3(cls): 

38 """A no-op decorator in case moto mock_s3 can not be imported. 

39 """ 

40 return cls 

41 

42from lsst.daf.butler import ButlerURI 

43from lsst.resources.s3utils import setAwsEnvCredentials, unsetAwsEnvCredentials 

44from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir 

45 

46TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

47 

48 

49class FileURITestCase(unittest.TestCase): 

50 """Concrete tests for local files""" 

51 

52 def setUp(self): 

53 # Use a local tempdir because on macOS the temp dirs use symlinks 

54 # so relsymlink gets quite confused. 

55 self.tmpdir = makeTestTempDir(TESTDIR) 

56 

57 def tearDown(self): 

58 removeTestTempDir(self.tmpdir) 

59 

60 def testFile(self): 

61 file = os.path.join(self.tmpdir, "test.txt") 

62 uri = ButlerURI(file) 

63 self.assertFalse(uri.exists(), f"{uri} should not exist") 

64 self.assertEqual(uri.ospath, file) 

65 

66 path = pathlib.Path(file) 

67 uri = ButlerURI(path) 

68 self.assertEqual(uri.ospath, file) 

69 

70 content = "abcdefghijklmnopqrstuv\n" 

71 uri.write(content.encode()) 

72 self.assertTrue(os.path.exists(file), "File should exist locally") 

73 self.assertTrue(uri.exists(), f"{uri} should now exist") 

74 self.assertEqual(uri.read().decode(), content) 

75 self.assertEqual(uri.size(), len(content.encode())) 

76 

77 with self.assertRaises(FileNotFoundError): 

78 ButlerURI("file/not/there.txt").size() 

79 

80 # Check that creating a URI from a URI returns the same thing 

81 uri2 = ButlerURI(uri) 

82 self.assertEqual(uri, uri2) 

83 self.assertEqual(id(uri), id(uri2)) 

84 

85 with self.assertRaises(ValueError): 

86 # Scheme-less URIs are not allowed to support non-file roots 

87 # at the present time. This may change in the future to become 

88 # equivalent to ButlerURI.join() 

89 ButlerURI("a/b.txt", root=ButlerURI("s3://bucket/a/b/")) 

90 

91 def testExtension(self): 

92 file = ButlerURI(os.path.join(self.tmpdir, "test.txt")) 

93 self.assertEqual(file.updatedExtension(None), file) 

94 self.assertEqual(file.updatedExtension(".txt"), file) 

95 self.assertEqual(id(file.updatedExtension(".txt")), id(file)) 

96 

97 fits = file.updatedExtension(".fits.gz") 

98 self.assertEqual(fits.basename(), "test.fits.gz") 

99 self.assertEqual(fits.updatedExtension(".jpeg").basename(), "test.jpeg") 

100 

101 def testRelative(self): 

102 """Check that we can get subpaths back from two URIs""" 

103 parent = ButlerURI(self.tmpdir, forceDirectory=True, forceAbsolute=True) 

104 self.assertTrue(parent.isdir()) 

105 child = ButlerURI(os.path.join(self.tmpdir, "dir1", "file.txt"), forceAbsolute=True) 

106 

107 self.assertEqual(child.relative_to(parent), "dir1/file.txt") 

108 

109 not_child = ButlerURI("/a/b/dir1/file.txt") 

110 self.assertIsNone(not_child.relative_to(parent)) 

111 self.assertFalse(not_child.isdir()) 

112 

113 not_directory = ButlerURI(os.path.join(self.tmpdir, "dir1", "file2.txt")) 

114 self.assertIsNone(child.relative_to(not_directory)) 

115 

116 # Relative URIs 

117 parent = ButlerURI("a/b/", forceAbsolute=False) 

118 child = ButlerURI("a/b/c/d.txt", forceAbsolute=False) 

119 self.assertFalse(child.scheme) 

120 self.assertEqual(child.relative_to(parent), "c/d.txt") 

121 

122 # File URI and schemeless URI 

123 parent = ButlerURI("file:/a/b/c/") 

124 child = ButlerURI("e/f/g.txt", forceAbsolute=False) 

125 

126 # If the child is relative and the parent is absolute we assume 

127 # that the child is a child of the parent unless it uses ".." 

128 self.assertEqual(child.relative_to(parent), "e/f/g.txt") 

129 

130 child = ButlerURI("../e/f/g.txt", forceAbsolute=False) 

131 self.assertIsNone(child.relative_to(parent)) 

132 

133 child = ButlerURI("../c/e/f/g.txt", forceAbsolute=False) 

134 self.assertEqual(child.relative_to(parent), "e/f/g.txt") 

135 

136 # Test non-file root with relative path. 

137 child = ButlerURI("e/f/g.txt", forceAbsolute=False) 

138 parent = ButlerURI("s3://hello/a/b/c/") 

139 self.assertEqual(child.relative_to(parent), "e/f/g.txt") 

140 

141 # Test with different netloc 

142 child = ButlerURI("http://my.host/a/b/c.txt") 

143 parent = ButlerURI("http://other.host/a/") 

144 self.assertIsNone(child.relative_to(parent), f"{child}.relative_to({parent})") 

145 

146 # Schemeless absolute child. 

147 # Schemeless absolute URI is constructed using root= parameter. 

148 parent = ButlerURI("file:///a/b/c/") 

149 child = ButlerURI("d/e.txt", root=parent) 

150 self.assertEqual(child.relative_to(parent), "d/e.txt", f"{child}.relative_to({parent})") 

151 

152 parent = ButlerURI("c/", root="/a/b/") 

153 self.assertEqual(child.relative_to(parent), "d/e.txt", f"{child}.relative_to({parent})") 

154 

155 # Absolute schemeless child with relative parent will always fail. 

156 parent = ButlerURI("d/e.txt", forceAbsolute=False) 

157 self.assertIsNone(child.relative_to(parent), f"{child}.relative_to({parent})") 

158 

159 def testParents(self): 

160 """Test of splitting and parent walking.""" 

161 parent = ButlerURI(self.tmpdir, forceDirectory=True, forceAbsolute=True) 

162 child_file = parent.join("subdir/file.txt") 

163 self.assertFalse(child_file.isdir()) 

164 child_subdir, file = child_file.split() 

165 self.assertEqual(file, "file.txt") 

166 self.assertTrue(child_subdir.isdir()) 

167 self.assertEqual(child_file.dirname(), child_subdir) 

168 self.assertEqual(child_file.basename(), file) 

169 self.assertEqual(child_file.parent(), child_subdir) 

170 derived_parent = child_subdir.parent() 

171 self.assertEqual(derived_parent, parent) 

172 self.assertTrue(derived_parent.isdir()) 

173 self.assertEqual(child_file.parent().parent(), parent) 

174 

175 def testEnvVar(self): 

176 """Test that environment variables are expanded.""" 

177 

178 with unittest.mock.patch.dict(os.environ, {"MY_TEST_DIR": "/a/b/c"}): 

179 uri = ButlerURI("${MY_TEST_DIR}/d.txt") 

180 self.assertEqual(uri.path, "/a/b/c/d.txt") 

181 self.assertEqual(uri.scheme, "file") 

182 

183 # This will not expand 

184 uri = ButlerURI("${MY_TEST_DIR}/d.txt", forceAbsolute=False) 

185 self.assertEqual(uri.path, "${MY_TEST_DIR}/d.txt") 

186 self.assertFalse(uri.scheme) 

187 

188 def testMkdir(self): 

189 tmpdir = ButlerURI(self.tmpdir) 

190 newdir = tmpdir.join("newdir/seconddir") 

191 newdir.mkdir() 

192 self.assertTrue(newdir.exists()) 

193 newfile = newdir.join("temp.txt") 

194 newfile.write("Data".encode()) 

195 self.assertTrue(newfile.exists()) 

196 

197 def testTransfer(self): 

198 src = ButlerURI(os.path.join(self.tmpdir, "test.txt")) 

199 content = "Content is some content\nwith something to say\n\n" 

200 src.write(content.encode()) 

201 

202 for mode in ("copy", "link", "hardlink", "symlink", "relsymlink"): 

203 dest = ButlerURI(os.path.join(self.tmpdir, f"dest_{mode}.txt")) 

204 dest.transfer_from(src, transfer=mode) 

205 self.assertTrue(dest.exists(), f"Check that {dest} exists (transfer={mode})") 

206 

207 with open(dest.ospath, "r") as fh: 

208 new_content = fh.read() 

209 self.assertEqual(new_content, content) 

210 

211 if mode in ("symlink", "relsymlink"): 

212 self.assertTrue(os.path.islink(dest.ospath), f"Check that {dest} is symlink") 

213 

214 # If the source and destination are hardlinks of each other 

215 # the transfer should work even if overwrite=False. 

216 if mode in ("link", "hardlink"): 

217 dest.transfer_from(src, transfer=mode) 

218 else: 

219 with self.assertRaises(FileExistsError, 

220 msg=f"Overwrite of {dest} should not be allowed ({mode})"): 

221 dest.transfer_from(src, transfer=mode) 

222 

223 dest.transfer_from(src, transfer=mode, overwrite=True) 

224 

225 os.remove(dest.ospath) 

226 

227 b = src.read() 

228 self.assertEqual(b.decode(), new_content) 

229 

230 nbytes = 10 

231 subset = src.read(size=nbytes) 

232 self.assertEqual(len(subset), nbytes) 

233 self.assertEqual(subset.decode(), content[:nbytes]) 

234 

235 with self.assertRaises(ValueError): 

236 src.transfer_from(src, transfer="unknown") 

237 

238 def testTransferIdentical(self): 

239 """Test overwrite of identical files.""" 

240 dir1 = ButlerURI(os.path.join(self.tmpdir, "dir1"), forceDirectory=True) 

241 dir1.mkdir() 

242 dir2 = os.path.join(self.tmpdir, "dir2") 

243 os.symlink(dir1.ospath, dir2) 

244 

245 # Write a test file. 

246 src_file = dir1.join("test.txt") 

247 content = "0123456" 

248 src_file.write(content.encode()) 

249 

250 # Construct URI to destination that should be identical. 

251 dest_file = ButlerURI(os.path.join(dir2), forceDirectory=True).join("test.txt") 

252 self.assertTrue(dest_file.exists()) 

253 self.assertNotEqual(src_file, dest_file) 

254 

255 # Transfer it over itself. 

256 dest_file.transfer_from(src_file, transfer="symlink", overwrite=True) 

257 new_content = dest_file.read().decode() 

258 self.assertEqual(content, new_content) 

259 

260 def testResource(self): 

261 u = ButlerURI("resource://lsst.daf.butler/configs/datastore.yaml") 

262 self.assertTrue(u.exists(), f"Check {u} exists") 

263 

264 content = u.read().decode() 

265 self.assertTrue(content.startswith("datastore:")) 

266 

267 truncated = u.read(size=9).decode() 

268 self.assertEqual(truncated, "datastore") 

269 

270 d = ButlerURI("resource://lsst.daf.butler/configs", forceDirectory=True) 

271 self.assertTrue(u.exists(), f"Check directory {d} exists") 

272 

273 j = d.join("datastore.yaml") 

274 self.assertEqual(u, j) 

275 self.assertFalse(j.dirLike) 

276 self.assertFalse(j.isdir()) 

277 not_there = d.join("not-there.yaml") 

278 self.assertFalse(not_there.exists()) 

279 

280 bad = ButlerURI("resource://bad.module/not.yaml") 

281 multi = ButlerURI.mexists([u, bad, not_there]) 

282 self.assertTrue(multi[u]) 

283 self.assertFalse(multi[bad]) 

284 self.assertFalse(multi[not_there]) 

285 

286 def testEscapes(self): 

287 """Special characters in file paths""" 

288 src = ButlerURI("bbb/???/test.txt", root=self.tmpdir, forceAbsolute=True) 

289 self.assertFalse(src.scheme) 

290 src.write(b"Some content") 

291 self.assertTrue(src.exists()) 

292 

293 # abspath always returns a file scheme 

294 file = src.abspath() 

295 self.assertTrue(file.exists()) 

296 self.assertIn("???", file.ospath) 

297 self.assertNotIn("???", file.path) 

298 

299 file = file.updatedFile("tests??.txt") 

300 self.assertNotIn("??.txt", file.path) 

301 file.write(b"Other content") 

302 self.assertEqual(file.read(), b"Other content") 

303 

304 src = src.updatedFile("tests??.txt") 

305 self.assertIn("??.txt", src.path) 

306 self.assertEqual(file.read(), src.read(), f"reading from {file.ospath} and {src.ospath}") 

307 

308 # File URI and schemeless URI 

309 parent = ButlerURI("file:" + urllib.parse.quote("/a/b/c/de/??/")) 

310 child = ButlerURI("e/f/g.txt", forceAbsolute=False) 

311 self.assertEqual(child.relative_to(parent), "e/f/g.txt") 

312 

313 child = ButlerURI("e/f??#/g.txt", forceAbsolute=False) 

314 self.assertEqual(child.relative_to(parent), "e/f??#/g.txt") 

315 

316 child = ButlerURI("file:" + urllib.parse.quote("/a/b/c/de/??/e/f??#/g.txt")) 

317 self.assertEqual(child.relative_to(parent), "e/f??#/g.txt") 

318 

319 self.assertEqual(child.relativeToPathRoot, "a/b/c/de/??/e/f??#/g.txt") 

320 

321 # Schemeless so should not quote 

322 dir = ButlerURI("bbb/???/", root=self.tmpdir, forceAbsolute=True, forceDirectory=True) 

323 self.assertIn("???", dir.ospath) 

324 self.assertIn("???", dir.path) 

325 self.assertFalse(dir.scheme) 

326 

327 # dir.join() morphs into a file scheme 

328 new = dir.join("test_j.txt") 

329 self.assertIn("???", new.ospath, f"Checking {new}") 

330 new.write(b"Content") 

331 

332 new2name = "###/test??.txt" 

333 new2 = dir.join(new2name) 

334 self.assertIn("???", new2.ospath) 

335 new2.write(b"Content") 

336 self.assertTrue(new2.ospath.endswith(new2name)) 

337 self.assertEqual(new.read(), new2.read()) 

338 

339 fdir = dir.abspath() 

340 self.assertNotIn("???", fdir.path) 

341 self.assertIn("???", fdir.ospath) 

342 self.assertEqual(fdir.scheme, "file") 

343 fnew = dir.join("test_jf.txt") 

344 fnew.write(b"Content") 

345 

346 fnew2 = fdir.join(new2name) 

347 fnew2.write(b"Content") 

348 self.assertTrue(fnew2.ospath.endswith(new2name)) 

349 self.assertNotIn("###", fnew2.path) 

350 

351 self.assertEqual(fnew.read(), fnew2.read()) 

352 

353 # Test that children relative to schemeless and file schemes 

354 # still return the same unquoted name 

355 self.assertEqual(fnew2.relative_to(fdir), new2name, f"{fnew2}.relative_to({fdir})") 

356 self.assertEqual(fnew2.relative_to(dir), new2name, f"{fnew2}.relative_to({dir})") 

357 self.assertEqual(new2.relative_to(fdir), new2name, f"{new2}.relative_to({fdir})") 

358 self.assertEqual(new2.relative_to(dir), new2name, f"{new2}.relative_to({dir})") 

359 

360 # Check for double quoting 

361 plus_path = "/a/b/c+d/" 

362 with self.assertLogs(level="WARNING"): 

363 uri = ButlerURI(urllib.parse.quote(plus_path), forceDirectory=True) 

364 self.assertEqual(uri.ospath, plus_path) 

365 

366 # Check that # is not escaped for schemeless URIs 

367 hash_path = "/a/b#/c&d#xyz" 

368 hpos = hash_path.rfind("#") 

369 uri = ButlerURI(hash_path) 

370 self.assertEqual(uri.ospath, hash_path[:hpos]) 

371 self.assertEqual(uri.fragment, hash_path[hpos + 1:]) 

372 

373 def testHash(self): 

374 """Test that we can store URIs in sets and as keys.""" 

375 uri1 = ButlerURI(TESTDIR) 

376 uri2 = uri1.join("test/") 

377 s = {uri1, uri2} 

378 self.assertIn(uri1, s) 

379 

380 d = {uri1: "1", uri2: "2"} 

381 self.assertEqual(d[uri2], "2") 

382 

383 def testWalk(self): 

384 """Test ButlerURI.walk().""" 

385 test_dir_uri = ButlerURI(TESTDIR) 

386 

387 file = test_dir_uri.join("config/basic/butler.yaml") 

388 found = list(ButlerURI.findFileResources([file])) 

389 self.assertEqual(found[0], file) 

390 

391 # Compare against the full local paths 

392 expected = set(p for p in glob.glob(os.path.join(TESTDIR, "config", "**"), recursive=True) 

393 if os.path.isfile(p)) 

394 found = set(u.ospath for u in ButlerURI.findFileResources([test_dir_uri.join("config")])) 

395 self.assertEqual(found, expected) 

396 

397 # Now solely the YAML files 

398 expected_yaml = set(glob.glob(os.path.join(TESTDIR, "config", "**", "*.yaml"), recursive=True)) 

399 found = set(u.ospath for u in ButlerURI.findFileResources([test_dir_uri.join("config")], 

400 file_filter=r".*\.yaml$")) 

401 self.assertEqual(found, expected_yaml) 

402 

403 # Now two explicit directories and a file 

404 expected = set(glob.glob(os.path.join(TESTDIR, "config", "**", "basic", "*.yaml"), recursive=True)) 

405 expected.update(set(glob.glob(os.path.join(TESTDIR, "config", "**", "templates", "*.yaml"), 

406 recursive=True))) 

407 expected.add(file.ospath) 

408 

409 found = set(u.ospath for u in ButlerURI.findFileResources([file, test_dir_uri.join("config/basic"), 

410 test_dir_uri.join("config/templates")], 

411 file_filter=r".*\.yaml$")) 

412 self.assertEqual(found, expected) 

413 

414 # Group by directory -- find everything and compare it with what 

415 # we expected to be there in total. We expect to find 9 directories 

416 # containing yaml files so make sure we only iterate 9 times. 

417 found_yaml = set() 

418 counter = 0 

419 for uris in ButlerURI.findFileResources([file, test_dir_uri.join("config/")], 

420 file_filter=r".*\.yaml$", grouped=True): 

421 found = set(u.ospath for u in uris) 

422 if found: 

423 counter += 1 

424 

425 found_yaml.update(found) 

426 

427 self.assertEqual(found_yaml, expected_yaml) 

428 self.assertEqual(counter, 9) 

429 

430 # Grouping but check that single files are returned in a single group 

431 # at the end 

432 file2 = test_dir_uri.join("config/templates/templates-bad.yaml") 

433 found = list(ButlerURI.findFileResources([file, file2, test_dir_uri.join("config/dbAuth")], 

434 grouped=True)) 

435 self.assertEqual(len(found), 2) 

436 self.assertEqual(list(found[1]), [file, file2]) 

437 

438 with self.assertRaises(ValueError): 

439 list(file.walk()) 

440 

441 def testRootURI(self): 

442 """Test ButlerURI.root_uri().""" 

443 uri = ButlerURI("https://www.notexist.com:8080/file/test") 

444 uri2 = ButlerURI("s3://www.notexist.com/file/test") 

445 self.assertEqual(uri.root_uri().geturl(), "https://www.notexist.com:8080/") 

446 self.assertEqual(uri2.root_uri().geturl(), "s3://www.notexist.com/") 

447 

448 def testJoin(self): 

449 """Test .join method.""" 

450 

451 root_str = "s3://bucket/hsc/payload/" 

452 root = ButlerURI(root_str) 

453 

454 self.assertEqual(root.join("b/test.txt").geturl(), f"{root_str}b/test.txt") 

455 add_dir = root.join("b/c/d/") 

456 self.assertTrue(add_dir.isdir()) 

457 self.assertEqual(add_dir.geturl(), f"{root_str}b/c/d/") 

458 

459 quote_example = "b&c.t@x#t" 

460 needs_quote = root.join(quote_example) 

461 self.assertEqual(needs_quote.unquoted_path, f"/hsc/payload/{quote_example}") 

462 

463 other = ButlerURI("file://localhost/test.txt") 

464 self.assertEqual(root.join(other), other) 

465 self.assertEqual(other.join("b/new.txt").geturl(), "file://localhost/b/new.txt") 

466 

467 joined = ButlerURI("s3://bucket/hsc/payload/").join(ButlerURI("test.qgraph", forceAbsolute=False)) 

468 self.assertEqual(joined, ButlerURI("s3://bucket/hsc/payload/test.qgraph")) 

469 

470 with self.assertRaises(ValueError): 

471 ButlerURI("s3://bucket/hsc/payload/").join(ButlerURI("test.qgraph")) 

472 

473 def testTemporary(self): 

474 with ButlerURI.temporary_uri(suffix=".json") as tmp: 

475 self.assertEqual(tmp.getExtension(), ".json", f"uri: {tmp}") 

476 self.assertTrue(tmp.isabs(), f"uri: {tmp}") 

477 self.assertFalse(tmp.exists(), f"uri: {tmp}") 

478 tmp.write(b"abcd") 

479 self.assertTrue(tmp.exists(), f"uri: {tmp}") 

480 self.assertTrue(tmp.isTemporary) 

481 self.assertFalse(tmp.exists(), f"uri: {tmp}") 

482 

483 tmpdir = ButlerURI(self.tmpdir, forceDirectory=True) 

484 with ButlerURI.temporary_uri(prefix=tmpdir, suffix=".yaml") as tmp: 

485 # Use a specified tmpdir and check it is okay for the file 

486 # to not be created. 

487 self.assertFalse(tmp.exists(), f"uri: {tmp}") 

488 self.assertTrue(tmpdir.exists(), f"uri: {tmpdir} still exists") 

489 

490 

491@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!") 

492@mock_s3 

493class S3URITestCase(unittest.TestCase): 

494 """Tests involving S3""" 

495 

496 bucketName = "any_bucket" 

497 """Bucket name to use in tests""" 

498 

499 def setUp(self): 

500 # Local test directory 

501 self.tmpdir = makeTestTempDir(TESTDIR) 

502 

503 # set up some fake credentials if they do not exist 

504 self.usingDummyCredentials = setAwsEnvCredentials() 

505 

506 # MOTO needs to know that we expect Bucket bucketname to exist 

507 s3 = boto3.resource("s3") 

508 s3.create_bucket(Bucket=self.bucketName) 

509 

510 def tearDown(self): 

511 s3 = boto3.resource("s3") 

512 bucket = s3.Bucket(self.bucketName) 

513 try: 

514 bucket.objects.all().delete() 

515 except botocore.exceptions.ClientError as e: 

516 if e.response["Error"]["Code"] == "404": 

517 # the key was not reachable - pass 

518 pass 

519 else: 

520 raise 

521 

522 bucket = s3.Bucket(self.bucketName) 

523 bucket.delete() 

524 

525 # unset any potentially set dummy credentials 

526 if self.usingDummyCredentials: 

527 unsetAwsEnvCredentials() 

528 

529 shutil.rmtree(self.tmpdir, ignore_errors=True) 

530 

531 def makeS3Uri(self, path): 

532 return f"s3://{self.bucketName}/{path}" 

533 

534 def testTransfer(self): 

535 src = ButlerURI(os.path.join(self.tmpdir, "test.txt")) 

536 content = "Content is some content\nwith something to say\n\n" 

537 src.write(content.encode()) 

538 self.assertTrue(src.exists()) 

539 self.assertEqual(src.size(), len(content.encode())) 

540 

541 dest = ButlerURI(self.makeS3Uri("test.txt")) 

542 self.assertFalse(dest.exists()) 

543 

544 with self.assertRaises(FileNotFoundError): 

545 dest.size() 

546 

547 dest.transfer_from(src, transfer="copy") 

548 self.assertTrue(dest.exists()) 

549 

550 dest2 = ButlerURI(self.makeS3Uri("copied.txt")) 

551 dest2.transfer_from(dest, transfer="copy") 

552 self.assertTrue(dest2.exists()) 

553 

554 local = ButlerURI(os.path.join(self.tmpdir, "copied.txt")) 

555 local.transfer_from(dest2, transfer="copy") 

556 with open(local.ospath, "r") as fd: 

557 new_content = fd.read() 

558 self.assertEqual(new_content, content) 

559 

560 with self.assertRaises(ValueError): 

561 dest2.transfer_from(local, transfer="symlink") 

562 

563 b = dest.read() 

564 self.assertEqual(b.decode(), new_content) 

565 

566 nbytes = 10 

567 subset = dest.read(size=nbytes) 

568 self.assertEqual(len(subset), nbytes) # Extra byte comes back 

569 self.assertEqual(subset.decode(), content[:nbytes]) 

570 

571 with self.assertRaises(FileExistsError): 

572 dest.transfer_from(src, transfer="copy") 

573 

574 dest.transfer_from(src, transfer="copy", overwrite=True) 

575 

576 def testWalk(self): 

577 """Test that we can list an S3 bucket""" 

578 # Files we want to create 

579 expected = ("a/x.txt", "a/y.txt", "a/z.json", "a/b/w.txt", "a/b/c/d/v.json") 

580 expected_uris = [ButlerURI(self.makeS3Uri(path)) for path in expected] 

581 for uri in expected_uris: 

582 # Doesn't matter what we write 

583 uri.write("123".encode()) 

584 

585 # Find all the files in the a/ tree 

586 found = set(uri.path for uri in ButlerURI.findFileResources([ButlerURI(self.makeS3Uri("a/"))])) 

587 self.assertEqual(found, {uri.path for uri in expected_uris}) 

588 

589 # Find all the files in the a/ tree but group by folder 

590 found = ButlerURI.findFileResources([ButlerURI(self.makeS3Uri("a/"))], 

591 grouped=True) 

592 expected = (("/a/x.txt", "/a/y.txt", "/a/z.json"), ("/a/b/w.txt",), ("/a/b/c/d/v.json",)) 

593 

594 for got, expect in zip(found, expected): 

595 self.assertEqual(tuple(u.path for u in got), expect) 

596 

597 # Find only JSON files 

598 found = set(uri.path for uri in ButlerURI.findFileResources([ButlerURI(self.makeS3Uri("a/"))], 

599 file_filter=r"\.json$")) 

600 self.assertEqual(found, {uri.path for uri in expected_uris if uri.path.endswith(".json")}) 

601 

602 # JSON files grouped by directory 

603 found = ButlerURI.findFileResources([ButlerURI(self.makeS3Uri("a/"))], 

604 file_filter=r"\.json$", grouped=True) 

605 expected = (("/a/z.json",), ("/a/b/c/d/v.json",)) 

606 

607 for got, expect in zip(found, expected): 

608 self.assertEqual(tuple(u.path for u in got), expect) 

609 

610 # Check pagination works with large numbers of files. S3 API limits 

611 # us to 1000 response per list_objects call so create lots of files 

612 created = set() 

613 counter = 1 

614 n_dir1 = 1100 

615 while counter <= n_dir1: 

616 new = ButlerURI(self.makeS3Uri(f"test/file{counter:04d}.txt")) 

617 new.write(f"{counter}".encode()) 

618 created.add(str(new)) 

619 counter += 1 

620 counter = 1 

621 # Put some in a subdirectory to make sure we are looking in a 

622 # hierarchy. 

623 n_dir2 = 100 

624 while counter <= n_dir2: 

625 new = ButlerURI(self.makeS3Uri(f"test/subdir/file{counter:04d}.txt")) 

626 new.write(f"{counter}".encode()) 

627 created.add(str(new)) 

628 counter += 1 

629 

630 found = ButlerURI.findFileResources([ButlerURI(self.makeS3Uri("test/"))]) 

631 self.assertEqual({str(u) for u in found}, created) 

632 

633 # Again with grouping. 

634 found = list(ButlerURI.findFileResources([ButlerURI(self.makeS3Uri("test/"))], grouped=True)) 

635 self.assertEqual(len(found), 2) 

636 dir_1 = list(found[0]) 

637 dir_2 = list(found[1]) 

638 self.assertEqual(len(dir_1), n_dir1) 

639 self.assertEqual(len(dir_2), n_dir2) 

640 

641 def testWrite(self): 

642 s3write = ButlerURI(self.makeS3Uri("created.txt")) 

643 content = "abcdefghijklmnopqrstuv\n" 

644 s3write.write(content.encode()) 

645 self.assertEqual(s3write.read().decode(), content) 

646 

647 def testTemporary(self): 

648 s3root = ButlerURI(self.makeS3Uri("rootdir"), forceDirectory=True) 

649 with ButlerURI.temporary_uri(prefix=s3root, suffix=".json") as tmp: 

650 self.assertEqual(tmp.getExtension(), ".json", f"uri: {tmp}") 

651 self.assertEqual(tmp.scheme, "s3", f"uri: {tmp}") 

652 self.assertEqual(tmp.parent(), s3root) 

653 basename = tmp.basename() 

654 content = "abcd" 

655 tmp.write(content.encode()) 

656 self.assertTrue(tmp.exists(), f"uri: {tmp}") 

657 self.assertFalse(tmp.exists()) 

658 

659 # Again without writing anything, to check that there is no complaint 

660 # on exit of context manager. 

661 with ButlerURI.temporary_uri(prefix=s3root, suffix=".json") as tmp: 

662 self.assertFalse(tmp.exists()) 

663 # Check that the file has a different name than before. 

664 self.assertNotEqual(tmp.basename(), basename, f"uri: {tmp}") 

665 self.assertFalse(tmp.exists()) 

666 

667 def testRelative(self): 

668 """Check that we can get subpaths back from two URIs""" 

669 parent = ButlerURI(self.makeS3Uri("rootdir"), forceDirectory=True) 

670 child = ButlerURI(self.makeS3Uri("rootdir/dir1/file.txt")) 

671 

672 self.assertEqual(child.relative_to(parent), "dir1/file.txt") 

673 

674 not_child = ButlerURI(self.makeS3Uri("/a/b/dir1/file.txt")) 

675 self.assertFalse(not_child.relative_to(parent)) 

676 

677 not_s3 = ButlerURI(os.path.join(self.tmpdir, "dir1", "file2.txt")) 

678 self.assertFalse(child.relative_to(not_s3)) 

679 

680 def testQuoting(self): 

681 """Check that quoting works.""" 

682 parent = ButlerURI(self.makeS3Uri("rootdir"), forceDirectory=True) 

683 subpath = "rootdir/dir1+/file?.txt" 

684 child = ButlerURI(self.makeS3Uri(urllib.parse.quote(subpath))) 

685 

686 self.assertEqual(child.relative_to(parent), "dir1+/file?.txt") 

687 self.assertEqual(child.basename(), "file?.txt") 

688 self.assertEqual(child.relativeToPathRoot, subpath) 

689 self.assertIn("%", child.path) 

690 self.assertEqual(child.unquoted_path, "/" + subpath) 

691 

692 

693# Mock required environment variables during tests 

694@unittest.mock.patch.dict(os.environ, {"LSST_BUTLER_WEBDAV_AUTH": "TOKEN", 

695 "LSST_BUTLER_WEBDAV_TOKEN_FILE": os.path.join( 

696 TESTDIR, "config/testConfigs/webdav/token"), 

697 "LSST_BUTLER_WEBDAV_CA_BUNDLE": "/path/to/ca/certs"}) 

698class WebdavURITestCase(unittest.TestCase): 

699 

700 def setUp(self): 

701 serverRoot = "www.not-exists.orgx" 

702 existingFolderName = "existingFolder" 

703 existingFileName = "existingFile" 

704 notExistingFileName = "notExistingFile" 

705 

706 self.baseURL = ButlerURI( 

707 f"https://{serverRoot}", forceDirectory=True) 

708 self.existingFileButlerURI = ButlerURI( 

709 f"https://{serverRoot}/{existingFolderName}/{existingFileName}") 

710 self.notExistingFileButlerURI = ButlerURI( 

711 f"https://{serverRoot}/{existingFolderName}/{notExistingFileName}") 

712 self.existingFolderButlerURI = ButlerURI( 

713 f"https://{serverRoot}/{existingFolderName}", forceDirectory=True) 

714 self.notExistingFolderButlerURI = ButlerURI( 

715 f"https://{serverRoot}/{notExistingFileName}", forceDirectory=True) 

716 

717 # Need to declare the options 

718 responses.add(responses.OPTIONS, 

719 self.baseURL.geturl(), 

720 status=200, headers={"DAV": "1,2,3"}) 

721 

722 # Used by ButlerHttpURI.exists() 

723 responses.add(responses.HEAD, 

724 self.existingFileButlerURI.geturl(), 

725 status=200, headers={'Content-Length': '1024'}) 

726 responses.add(responses.HEAD, 

727 self.notExistingFileButlerURI.geturl(), 

728 status=404) 

729 

730 # Used by ButlerHttpURI.read() 

731 responses.add(responses.GET, 

732 self.existingFileButlerURI.geturl(), 

733 status=200, 

734 body=str.encode("It works!")) 

735 responses.add(responses.GET, 

736 self.notExistingFileButlerURI.geturl(), 

737 status=404) 

738 

739 # Used by ButlerHttpURI.write() 

740 responses.add(responses.PUT, 

741 self.existingFileButlerURI.geturl(), 

742 status=201) 

743 

744 # Used by ButlerHttpURI.transfer_from() 

745 responses.add(responses.Response(url=self.existingFileButlerURI.geturl(), 

746 method="COPY", 

747 headers={"Destination": self.existingFileButlerURI.geturl()}, 

748 status=201)) 

749 responses.add(responses.Response(url=self.existingFileButlerURI.geturl(), 

750 method="COPY", 

751 headers={"Destination": self.notExistingFileButlerURI.geturl()}, 

752 status=201)) 

753 responses.add(responses.Response(url=self.existingFileButlerURI.geturl(), 

754 method="MOVE", 

755 headers={"Destination": self.notExistingFileButlerURI.geturl()}, 

756 status=201)) 

757 

758 # Used by ButlerHttpURI.remove() 

759 responses.add(responses.DELETE, 

760 self.existingFileButlerURI.geturl(), 

761 status=200) 

762 responses.add(responses.DELETE, 

763 self.notExistingFileButlerURI.geturl(), 

764 status=404) 

765 

766 # Used by ButlerHttpURI.mkdir() 

767 responses.add(responses.HEAD, 

768 self.existingFolderButlerURI.geturl(), 

769 status=200, headers={'Content-Length': '1024'}) 

770 responses.add(responses.HEAD, 

771 self.baseURL.geturl(), 

772 status=200, headers={'Content-Length': '1024'}) 

773 responses.add(responses.HEAD, 

774 self.notExistingFolderButlerURI.geturl(), 

775 status=404) 

776 responses.add(responses.Response(url=self.notExistingFolderButlerURI.geturl(), 

777 method="MKCOL", 

778 status=201)) 

779 responses.add(responses.Response(url=self.existingFolderButlerURI.geturl(), 

780 method="MKCOL", 

781 status=403)) 

782 

783 @responses.activate 

784 def testExists(self): 

785 

786 self.assertTrue(self.existingFileButlerURI.exists()) 

787 self.assertFalse(self.notExistingFileButlerURI.exists()) 

788 

789 self.assertEqual(self.existingFileButlerURI.size(), 1024) 

790 with self.assertRaises(FileNotFoundError): 

791 self.notExistingFileButlerURI.size() 

792 

793 @responses.activate 

794 def testRemove(self): 

795 

796 self.assertIsNone(self.existingFileButlerURI.remove()) 

797 with self.assertRaises(FileNotFoundError): 

798 self.notExistingFileButlerURI.remove() 

799 

800 @responses.activate 

801 def testMkdir(self): 

802 

803 # The mock means that we can't check this now exists 

804 self.notExistingFolderButlerURI.mkdir() 

805 

806 # This should do nothing 

807 self.existingFolderButlerURI.mkdir() 

808 

809 with self.assertRaises(ValueError): 

810 self.notExistingFileButlerURI.mkdir() 

811 

812 @responses.activate 

813 def testRead(self): 

814 

815 self.assertEqual(self.existingFileButlerURI.read().decode(), "It works!") 

816 self.assertNotEqual(self.existingFileButlerURI.read().decode(), "Nope.") 

817 with self.assertRaises(FileNotFoundError): 

818 self.notExistingFileButlerURI.read() 

819 

820 @responses.activate 

821 def testWrite(self): 

822 

823 self.assertIsNone(self.existingFileButlerURI.write(data=str.encode("Some content."))) 

824 with self.assertRaises(FileExistsError): 

825 self.existingFileButlerURI.write(data=str.encode("Some content."), overwrite=False) 

826 

827 @responses.activate 

828 def testTransfer(self): 

829 

830 self.assertIsNone(self.notExistingFileButlerURI.transfer_from( 

831 src=self.existingFileButlerURI)) 

832 self.assertIsNone(self.notExistingFileButlerURI.transfer_from( 

833 src=self.existingFileButlerURI, 

834 transfer="move")) 

835 with self.assertRaises(FileExistsError): 

836 self.existingFileButlerURI.transfer_from(src=self.existingFileButlerURI) 

837 with self.assertRaises(ValueError): 

838 self.notExistingFileButlerURI.transfer_from( 

839 src=self.existingFileButlerURI, 

840 transfer="unsupported") 

841 

842 def testParent(self): 

843 

844 self.assertEqual(self.existingFolderButlerURI.geturl(), 

845 self.notExistingFileButlerURI.parent().geturl()) 

846 self.assertEqual(self.baseURL.geturl(), 

847 self.baseURL.parent().geturl()) 

848 self.assertEqual(self.existingFileButlerURI.parent().geturl(), 

849 self.existingFileButlerURI.dirname().geturl()) 

850 

851 

852if __name__ == "__main__": 852 ↛ 853line 852 didn't jump to line 853, because the condition on line 852 was never true

853 unittest.main()