Coverage for tests/test_uri.py: 12%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

516 statements  

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22import glob 

23import os 

24import pathlib 

25import shutil 

26import unittest 

27import urllib.parse 

28 

29import responses 

30 

31try: 

32 import boto3 

33 import botocore 

34 from moto import mock_s3 

35except ImportError: 

36 boto3 = None 

37 

38 def mock_s3(cls): 

39 """A no-op decorator in case moto mock_s3 can not be imported.""" 

40 return cls 

41 

42 

43from lsst.daf.butler import ButlerURI 

44from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir 

45from lsst.resources.s3utils import setAwsEnvCredentials, unsetAwsEnvCredentials 

46 

47TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

48 

49 

50class FileURITestCase(unittest.TestCase): 

51 """Concrete tests for local files""" 

52 

53 def setUp(self): 

54 # Use a local tempdir because on macOS the temp dirs use symlinks 

55 # so relsymlink gets quite confused. 

56 self.tmpdir = makeTestTempDir(TESTDIR) 

57 

58 def tearDown(self): 

59 removeTestTempDir(self.tmpdir) 

60 

61 def testFile(self): 

62 file = os.path.join(self.tmpdir, "test.txt") 

63 uri = ButlerURI(file) 

64 self.assertFalse(uri.exists(), f"{uri} should not exist") 

65 self.assertEqual(uri.ospath, file) 

66 

67 path = pathlib.Path(file) 

68 uri = ButlerURI(path) 

69 self.assertEqual(uri.ospath, file) 

70 

71 content = "abcdefghijklmnopqrstuv\n" 

72 uri.write(content.encode()) 

73 self.assertTrue(os.path.exists(file), "File should exist locally") 

74 self.assertTrue(uri.exists(), f"{uri} should now exist") 

75 self.assertEqual(uri.read().decode(), content) 

76 self.assertEqual(uri.size(), len(content.encode())) 

77 

78 with self.assertRaises(FileNotFoundError): 

79 ButlerURI("file/not/there.txt").size() 

80 

81 # Check that creating a URI from a URI returns the same thing 

82 uri2 = ButlerURI(uri) 

83 self.assertEqual(uri, uri2) 

84 self.assertEqual(id(uri), id(uri2)) 

85 

86 with self.assertRaises(ValueError): 

87 # Scheme-less URIs are not allowed to support non-file roots 

88 # at the present time. This may change in the future to become 

89 # equivalent to ButlerURI.join() 

90 ButlerURI("a/b.txt", root=ButlerURI("s3://bucket/a/b/")) 

91 

92 def testExtension(self): 

93 file = ButlerURI(os.path.join(self.tmpdir, "test.txt")) 

94 self.assertEqual(file.updatedExtension(None), file) 

95 self.assertEqual(file.updatedExtension(".txt"), file) 

96 self.assertEqual(id(file.updatedExtension(".txt")), id(file)) 

97 

98 fits = file.updatedExtension(".fits.gz") 

99 self.assertEqual(fits.basename(), "test.fits.gz") 

100 self.assertEqual(fits.updatedExtension(".jpeg").basename(), "test.jpeg") 

101 

102 def testRelative(self): 

103 """Check that we can get subpaths back from two URIs""" 

104 parent = ButlerURI(self.tmpdir, forceDirectory=True, forceAbsolute=True) 

105 self.assertTrue(parent.isdir()) 

106 child = ButlerURI(os.path.join(self.tmpdir, "dir1", "file.txt"), forceAbsolute=True) 

107 

108 self.assertEqual(child.relative_to(parent), "dir1/file.txt") 

109 

110 not_child = ButlerURI("/a/b/dir1/file.txt") 

111 self.assertIsNone(not_child.relative_to(parent)) 

112 self.assertFalse(not_child.isdir()) 

113 

114 not_directory = ButlerURI(os.path.join(self.tmpdir, "dir1", "file2.txt")) 

115 self.assertIsNone(child.relative_to(not_directory)) 

116 

117 # Relative URIs 

118 parent = ButlerURI("a/b/", forceAbsolute=False) 

119 child = ButlerURI("a/b/c/d.txt", forceAbsolute=False) 

120 self.assertFalse(child.scheme) 

121 self.assertEqual(child.relative_to(parent), "c/d.txt") 

122 

123 # File URI and schemeless URI 

124 parent = ButlerURI("file:/a/b/c/") 

125 child = ButlerURI("e/f/g.txt", forceAbsolute=False) 

126 

127 # If the child is relative and the parent is absolute we assume 

128 # that the child is a child of the parent unless it uses ".." 

129 self.assertEqual(child.relative_to(parent), "e/f/g.txt") 

130 

131 child = ButlerURI("../e/f/g.txt", forceAbsolute=False) 

132 self.assertIsNone(child.relative_to(parent)) 

133 

134 child = ButlerURI("../c/e/f/g.txt", forceAbsolute=False) 

135 self.assertEqual(child.relative_to(parent), "e/f/g.txt") 

136 

137 # Test non-file root with relative path. 

138 child = ButlerURI("e/f/g.txt", forceAbsolute=False) 

139 parent = ButlerURI("s3://hello/a/b/c/") 

140 self.assertEqual(child.relative_to(parent), "e/f/g.txt") 

141 

142 # Test with different netloc 

143 child = ButlerURI("http://my.host/a/b/c.txt") 

144 parent = ButlerURI("http://other.host/a/") 

145 self.assertIsNone(child.relative_to(parent), f"{child}.relative_to({parent})") 

146 

147 # Schemeless absolute child. 

148 # Schemeless absolute URI is constructed using root= parameter. 

149 parent = ButlerURI("file:///a/b/c/") 

150 child = ButlerURI("d/e.txt", root=parent) 

151 self.assertEqual(child.relative_to(parent), "d/e.txt", f"{child}.relative_to({parent})") 

152 

153 parent = ButlerURI("c/", root="/a/b/") 

154 self.assertEqual(child.relative_to(parent), "d/e.txt", f"{child}.relative_to({parent})") 

155 

156 # Absolute schemeless child with relative parent will always fail. 

157 parent = ButlerURI("d/e.txt", forceAbsolute=False) 

158 self.assertIsNone(child.relative_to(parent), f"{child}.relative_to({parent})") 

159 

160 def testParents(self): 

161 """Test of splitting and parent walking.""" 

162 parent = ButlerURI(self.tmpdir, forceDirectory=True, forceAbsolute=True) 

163 child_file = parent.join("subdir/file.txt") 

164 self.assertFalse(child_file.isdir()) 

165 child_subdir, file = child_file.split() 

166 self.assertEqual(file, "file.txt") 

167 self.assertTrue(child_subdir.isdir()) 

168 self.assertEqual(child_file.dirname(), child_subdir) 

169 self.assertEqual(child_file.basename(), file) 

170 self.assertEqual(child_file.parent(), child_subdir) 

171 derived_parent = child_subdir.parent() 

172 self.assertEqual(derived_parent, parent) 

173 self.assertTrue(derived_parent.isdir()) 

174 self.assertEqual(child_file.parent().parent(), parent) 

175 

176 def testEnvVar(self): 

177 """Test that environment variables are expanded.""" 

178 

179 with unittest.mock.patch.dict(os.environ, {"MY_TEST_DIR": "/a/b/c"}): 

180 uri = ButlerURI("${MY_TEST_DIR}/d.txt") 

181 self.assertEqual(uri.path, "/a/b/c/d.txt") 

182 self.assertEqual(uri.scheme, "file") 

183 

184 # This will not expand 

185 uri = ButlerURI("${MY_TEST_DIR}/d.txt", forceAbsolute=False) 

186 self.assertEqual(uri.path, "${MY_TEST_DIR}/d.txt") 

187 self.assertFalse(uri.scheme) 

188 

189 def testMkdir(self): 

190 tmpdir = ButlerURI(self.tmpdir) 

191 newdir = tmpdir.join("newdir/seconddir") 

192 newdir.mkdir() 

193 self.assertTrue(newdir.exists()) 

194 newfile = newdir.join("temp.txt") 

195 newfile.write("Data".encode()) 

196 self.assertTrue(newfile.exists()) 

197 

198 def testTransfer(self): 

199 src = ButlerURI(os.path.join(self.tmpdir, "test.txt")) 

200 content = "Content is some content\nwith something to say\n\n" 

201 src.write(content.encode()) 

202 

203 for mode in ("copy", "link", "hardlink", "symlink", "relsymlink"): 

204 dest = ButlerURI(os.path.join(self.tmpdir, f"dest_{mode}.txt")) 

205 dest.transfer_from(src, transfer=mode) 

206 self.assertTrue(dest.exists(), f"Check that {dest} exists (transfer={mode})") 

207 

208 with open(dest.ospath, "r") as fh: 

209 new_content = fh.read() 

210 self.assertEqual(new_content, content) 

211 

212 if mode in ("symlink", "relsymlink"): 

213 self.assertTrue(os.path.islink(dest.ospath), f"Check that {dest} is symlink") 

214 

215 # If the source and destination are hardlinks of each other 

216 # the transfer should work even if overwrite=False. 

217 if mode in ("link", "hardlink"): 

218 dest.transfer_from(src, transfer=mode) 

219 else: 

220 with self.assertRaises( 

221 FileExistsError, msg=f"Overwrite of {dest} should not be allowed ({mode})" 

222 ): 

223 dest.transfer_from(src, transfer=mode) 

224 

225 dest.transfer_from(src, transfer=mode, overwrite=True) 

226 

227 os.remove(dest.ospath) 

228 

229 b = src.read() 

230 self.assertEqual(b.decode(), new_content) 

231 

232 nbytes = 10 

233 subset = src.read(size=nbytes) 

234 self.assertEqual(len(subset), nbytes) 

235 self.assertEqual(subset.decode(), content[:nbytes]) 

236 

237 with self.assertRaises(ValueError): 

238 src.transfer_from(src, transfer="unknown") 

239 

240 def testTransferIdentical(self): 

241 """Test overwrite of identical files.""" 

242 dir1 = ButlerURI(os.path.join(self.tmpdir, "dir1"), forceDirectory=True) 

243 dir1.mkdir() 

244 dir2 = os.path.join(self.tmpdir, "dir2") 

245 os.symlink(dir1.ospath, dir2) 

246 

247 # Write a test file. 

248 src_file = dir1.join("test.txt") 

249 content = "0123456" 

250 src_file.write(content.encode()) 

251 

252 # Construct URI to destination that should be identical. 

253 dest_file = ButlerURI(os.path.join(dir2), forceDirectory=True).join("test.txt") 

254 self.assertTrue(dest_file.exists()) 

255 self.assertNotEqual(src_file, dest_file) 

256 

257 # Transfer it over itself. 

258 dest_file.transfer_from(src_file, transfer="symlink", overwrite=True) 

259 new_content = dest_file.read().decode() 

260 self.assertEqual(content, new_content) 

261 

262 def testResource(self): 

263 u = ButlerURI("resource://lsst.daf.butler/configs/datastore.yaml") 

264 self.assertTrue(u.exists(), f"Check {u} exists") 

265 

266 content = u.read().decode() 

267 self.assertTrue(content.startswith("datastore:")) 

268 

269 truncated = u.read(size=9).decode() 

270 self.assertEqual(truncated, "datastore") 

271 

272 d = ButlerURI("resource://lsst.daf.butler/configs", forceDirectory=True) 

273 self.assertTrue(u.exists(), f"Check directory {d} exists") 

274 

275 j = d.join("datastore.yaml") 

276 self.assertEqual(u, j) 

277 self.assertFalse(j.dirLike) 

278 self.assertFalse(j.isdir()) 

279 not_there = d.join("not-there.yaml") 

280 self.assertFalse(not_there.exists()) 

281 

282 bad = ButlerURI("resource://bad.module/not.yaml") 

283 multi = ButlerURI.mexists([u, bad, not_there]) 

284 self.assertTrue(multi[u]) 

285 self.assertFalse(multi[bad]) 

286 self.assertFalse(multi[not_there]) 

287 

288 def testEscapes(self): 

289 """Special characters in file paths""" 

290 src = ButlerURI("bbb/???/test.txt", root=self.tmpdir, forceAbsolute=True) 

291 self.assertFalse(src.scheme) 

292 src.write(b"Some content") 

293 self.assertTrue(src.exists()) 

294 

295 # abspath always returns a file scheme 

296 file = src.abspath() 

297 self.assertTrue(file.exists()) 

298 self.assertIn("???", file.ospath) 

299 self.assertNotIn("???", file.path) 

300 

301 file = file.updatedFile("tests??.txt") 

302 self.assertNotIn("??.txt", file.path) 

303 file.write(b"Other content") 

304 self.assertEqual(file.read(), b"Other content") 

305 

306 src = src.updatedFile("tests??.txt") 

307 self.assertIn("??.txt", src.path) 

308 self.assertEqual(file.read(), src.read(), f"reading from {file.ospath} and {src.ospath}") 

309 

310 # File URI and schemeless URI 

311 parent = ButlerURI("file:" + urllib.parse.quote("/a/b/c/de/??/")) 

312 child = ButlerURI("e/f/g.txt", forceAbsolute=False) 

313 self.assertEqual(child.relative_to(parent), "e/f/g.txt") 

314 

315 child = ButlerURI("e/f??#/g.txt", forceAbsolute=False) 

316 self.assertEqual(child.relative_to(parent), "e/f??#/g.txt") 

317 

318 child = ButlerURI("file:" + urllib.parse.quote("/a/b/c/de/??/e/f??#/g.txt")) 

319 self.assertEqual(child.relative_to(parent), "e/f??#/g.txt") 

320 

321 self.assertEqual(child.relativeToPathRoot, "a/b/c/de/??/e/f??#/g.txt") 

322 

323 # Schemeless so should not quote 

324 dir = ButlerURI("bbb/???/", root=self.tmpdir, forceAbsolute=True, forceDirectory=True) 

325 self.assertIn("???", dir.ospath) 

326 self.assertIn("???", dir.path) 

327 self.assertFalse(dir.scheme) 

328 

329 # dir.join() morphs into a file scheme 

330 new = dir.join("test_j.txt") 

331 self.assertIn("???", new.ospath, f"Checking {new}") 

332 new.write(b"Content") 

333 

334 new2name = "###/test??.txt" 

335 new2 = dir.join(new2name) 

336 self.assertIn("???", new2.ospath) 

337 new2.write(b"Content") 

338 self.assertTrue(new2.ospath.endswith(new2name)) 

339 self.assertEqual(new.read(), new2.read()) 

340 

341 fdir = dir.abspath() 

342 self.assertNotIn("???", fdir.path) 

343 self.assertIn("???", fdir.ospath) 

344 self.assertEqual(fdir.scheme, "file") 

345 fnew = dir.join("test_jf.txt") 

346 fnew.write(b"Content") 

347 

348 fnew2 = fdir.join(new2name) 

349 fnew2.write(b"Content") 

350 self.assertTrue(fnew2.ospath.endswith(new2name)) 

351 self.assertNotIn("###", fnew2.path) 

352 

353 self.assertEqual(fnew.read(), fnew2.read()) 

354 

355 # Test that children relative to schemeless and file schemes 

356 # still return the same unquoted name 

357 self.assertEqual(fnew2.relative_to(fdir), new2name, f"{fnew2}.relative_to({fdir})") 

358 self.assertEqual(fnew2.relative_to(dir), new2name, f"{fnew2}.relative_to({dir})") 

359 self.assertEqual(new2.relative_to(fdir), new2name, f"{new2}.relative_to({fdir})") 

360 self.assertEqual(new2.relative_to(dir), new2name, f"{new2}.relative_to({dir})") 

361 

362 # Check for double quoting 

363 plus_path = "/a/b/c+d/" 

364 with self.assertLogs(level="WARNING"): 

365 uri = ButlerURI(urllib.parse.quote(plus_path), forceDirectory=True) 

366 self.assertEqual(uri.ospath, plus_path) 

367 

368 # Check that # is not escaped for schemeless URIs 

369 hash_path = "/a/b#/c&d#xyz" 

370 hpos = hash_path.rfind("#") 

371 uri = ButlerURI(hash_path) 

372 self.assertEqual(uri.ospath, hash_path[:hpos]) 

373 self.assertEqual(uri.fragment, hash_path[hpos + 1 :]) 

374 

375 def testHash(self): 

376 """Test that we can store URIs in sets and as keys.""" 

377 uri1 = ButlerURI(TESTDIR) 

378 uri2 = uri1.join("test/") 

379 s = {uri1, uri2} 

380 self.assertIn(uri1, s) 

381 

382 d = {uri1: "1", uri2: "2"} 

383 self.assertEqual(d[uri2], "2") 

384 

385 def testWalk(self): 

386 """Test ButlerURI.walk().""" 

387 test_dir_uri = ButlerURI(TESTDIR) 

388 

389 file = test_dir_uri.join("config/basic/butler.yaml") 

390 found = list(ButlerURI.findFileResources([file])) 

391 self.assertEqual(found[0], file) 

392 

393 # Compare against the full local paths 

394 expected = set( 

395 p for p in glob.glob(os.path.join(TESTDIR, "config", "**"), recursive=True) if os.path.isfile(p) 

396 ) 

397 found = set(u.ospath for u in ButlerURI.findFileResources([test_dir_uri.join("config")])) 

398 self.assertEqual(found, expected) 

399 

400 # Now solely the YAML files 

401 expected_yaml = set(glob.glob(os.path.join(TESTDIR, "config", "**", "*.yaml"), recursive=True)) 

402 found = set( 

403 u.ospath 

404 for u in ButlerURI.findFileResources([test_dir_uri.join("config")], file_filter=r".*\.yaml$") 

405 ) 

406 self.assertEqual(found, expected_yaml) 

407 

408 # Now two explicit directories and a file 

409 expected = set(glob.glob(os.path.join(TESTDIR, "config", "**", "basic", "*.yaml"), recursive=True)) 

410 expected.update( 

411 set(glob.glob(os.path.join(TESTDIR, "config", "**", "templates", "*.yaml"), recursive=True)) 

412 ) 

413 expected.add(file.ospath) 

414 

415 found = set( 

416 u.ospath 

417 for u in ButlerURI.findFileResources( 

418 [file, test_dir_uri.join("config/basic"), test_dir_uri.join("config/templates")], 

419 file_filter=r".*\.yaml$", 

420 ) 

421 ) 

422 self.assertEqual(found, expected) 

423 

424 # Group by directory -- find everything and compare it with what 

425 # we expected to be there in total. We expect to find 9 directories 

426 # containing yaml files so make sure we only iterate 9 times. 

427 found_yaml = set() 

428 counter = 0 

429 for uris in ButlerURI.findFileResources( 

430 [file, test_dir_uri.join("config/")], file_filter=r".*\.yaml$", grouped=True 

431 ): 

432 found = set(u.ospath for u in uris) 

433 if found: 

434 counter += 1 

435 

436 found_yaml.update(found) 

437 

438 self.assertEqual(found_yaml, expected_yaml) 

439 self.assertEqual(counter, 9) 

440 

441 # Grouping but check that single files are returned in a single group 

442 # at the end 

443 file2 = test_dir_uri.join("config/templates/templates-bad.yaml") 

444 found = list( 

445 ButlerURI.findFileResources([file, file2, test_dir_uri.join("config/dbAuth")], grouped=True) 

446 ) 

447 self.assertEqual(len(found), 2) 

448 self.assertEqual(list(found[1]), [file, file2]) 

449 

450 with self.assertRaises(ValueError): 

451 list(file.walk()) 

452 

453 def testRootURI(self): 

454 """Test ButlerURI.root_uri().""" 

455 uri = ButlerURI("https://www.notexist.com:8080/file/test") 

456 uri2 = ButlerURI("s3://www.notexist.com/file/test") 

457 self.assertEqual(uri.root_uri().geturl(), "https://www.notexist.com:8080/") 

458 self.assertEqual(uri2.root_uri().geturl(), "s3://www.notexist.com/") 

459 

460 def testJoin(self): 

461 """Test .join method.""" 

462 

463 root_str = "s3://bucket/hsc/payload/" 

464 root = ButlerURI(root_str) 

465 

466 self.assertEqual(root.join("b/test.txt").geturl(), f"{root_str}b/test.txt") 

467 add_dir = root.join("b/c/d/") 

468 self.assertTrue(add_dir.isdir()) 

469 self.assertEqual(add_dir.geturl(), f"{root_str}b/c/d/") 

470 

471 quote_example = "b&c.t@x#t" 

472 needs_quote = root.join(quote_example) 

473 self.assertEqual(needs_quote.unquoted_path, f"/hsc/payload/{quote_example}") 

474 

475 other = ButlerURI("file://localhost/test.txt") 

476 self.assertEqual(root.join(other), other) 

477 self.assertEqual(other.join("b/new.txt").geturl(), "file://localhost/b/new.txt") 

478 

479 joined = ButlerURI("s3://bucket/hsc/payload/").join(ButlerURI("test.qgraph", forceAbsolute=False)) 

480 self.assertEqual(joined, ButlerURI("s3://bucket/hsc/payload/test.qgraph")) 

481 

482 with self.assertRaises(ValueError): 

483 ButlerURI("s3://bucket/hsc/payload/").join(ButlerURI("test.qgraph")) 

484 

485 def testTemporary(self): 

486 with ButlerURI.temporary_uri(suffix=".json") as tmp: 

487 self.assertEqual(tmp.getExtension(), ".json", f"uri: {tmp}") 

488 self.assertTrue(tmp.isabs(), f"uri: {tmp}") 

489 self.assertFalse(tmp.exists(), f"uri: {tmp}") 

490 tmp.write(b"abcd") 

491 self.assertTrue(tmp.exists(), f"uri: {tmp}") 

492 self.assertTrue(tmp.isTemporary) 

493 self.assertFalse(tmp.exists(), f"uri: {tmp}") 

494 

495 tmpdir = ButlerURI(self.tmpdir, forceDirectory=True) 

496 with ButlerURI.temporary_uri(prefix=tmpdir, suffix=".yaml") as tmp: 

497 # Use a specified tmpdir and check it is okay for the file 

498 # to not be created. 

499 self.assertFalse(tmp.exists(), f"uri: {tmp}") 

500 self.assertTrue(tmpdir.exists(), f"uri: {tmpdir} still exists") 

501 

502 

503@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!") 

504@mock_s3 

505class S3URITestCase(unittest.TestCase): 

506 """Tests involving S3""" 

507 

508 bucketName = "any_bucket" 

509 """Bucket name to use in tests""" 

510 

511 def setUp(self): 

512 # Local test directory 

513 self.tmpdir = makeTestTempDir(TESTDIR) 

514 

515 # set up some fake credentials if they do not exist 

516 self.usingDummyCredentials = setAwsEnvCredentials() 

517 

518 # MOTO needs to know that we expect Bucket bucketname to exist 

519 s3 = boto3.resource("s3") 

520 s3.create_bucket(Bucket=self.bucketName) 

521 

522 def tearDown(self): 

523 s3 = boto3.resource("s3") 

524 bucket = s3.Bucket(self.bucketName) 

525 try: 

526 bucket.objects.all().delete() 

527 except botocore.exceptions.ClientError as e: 

528 if e.response["Error"]["Code"] == "404": 

529 # the key was not reachable - pass 

530 pass 

531 else: 

532 raise 

533 

534 bucket = s3.Bucket(self.bucketName) 

535 bucket.delete() 

536 

537 # unset any potentially set dummy credentials 

538 if self.usingDummyCredentials: 

539 unsetAwsEnvCredentials() 

540 

541 shutil.rmtree(self.tmpdir, ignore_errors=True) 

542 

543 def makeS3Uri(self, path): 

544 return f"s3://{self.bucketName}/{path}" 

545 

546 def testTransfer(self): 

547 src = ButlerURI(os.path.join(self.tmpdir, "test.txt")) 

548 content = "Content is some content\nwith something to say\n\n" 

549 src.write(content.encode()) 

550 self.assertTrue(src.exists()) 

551 self.assertEqual(src.size(), len(content.encode())) 

552 

553 dest = ButlerURI(self.makeS3Uri("test.txt")) 

554 self.assertFalse(dest.exists()) 

555 

556 with self.assertRaises(FileNotFoundError): 

557 dest.size() 

558 

559 dest.transfer_from(src, transfer="copy") 

560 self.assertTrue(dest.exists()) 

561 

562 dest2 = ButlerURI(self.makeS3Uri("copied.txt")) 

563 dest2.transfer_from(dest, transfer="copy") 

564 self.assertTrue(dest2.exists()) 

565 

566 local = ButlerURI(os.path.join(self.tmpdir, "copied.txt")) 

567 local.transfer_from(dest2, transfer="copy") 

568 with open(local.ospath, "r") as fd: 

569 new_content = fd.read() 

570 self.assertEqual(new_content, content) 

571 

572 with self.assertRaises(ValueError): 

573 dest2.transfer_from(local, transfer="symlink") 

574 

575 b = dest.read() 

576 self.assertEqual(b.decode(), new_content) 

577 

578 nbytes = 10 

579 subset = dest.read(size=nbytes) 

580 self.assertEqual(len(subset), nbytes) # Extra byte comes back 

581 self.assertEqual(subset.decode(), content[:nbytes]) 

582 

583 with self.assertRaises(FileExistsError): 

584 dest.transfer_from(src, transfer="copy") 

585 

586 dest.transfer_from(src, transfer="copy", overwrite=True) 

587 

588 def testWalk(self): 

589 """Test that we can list an S3 bucket""" 

590 # Files we want to create 

591 expected = ("a/x.txt", "a/y.txt", "a/z.json", "a/b/w.txt", "a/b/c/d/v.json") 

592 expected_uris = [ButlerURI(self.makeS3Uri(path)) for path in expected] 

593 for uri in expected_uris: 

594 # Doesn't matter what we write 

595 uri.write("123".encode()) 

596 

597 # Find all the files in the a/ tree 

598 found = set(uri.path for uri in ButlerURI.findFileResources([ButlerURI(self.makeS3Uri("a/"))])) 

599 self.assertEqual(found, {uri.path for uri in expected_uris}) 

600 

601 # Find all the files in the a/ tree but group by folder 

602 found = ButlerURI.findFileResources([ButlerURI(self.makeS3Uri("a/"))], grouped=True) 

603 expected = (("/a/x.txt", "/a/y.txt", "/a/z.json"), ("/a/b/w.txt",), ("/a/b/c/d/v.json",)) 

604 

605 for got, expect in zip(found, expected): 

606 self.assertEqual(tuple(u.path for u in got), expect) 

607 

608 # Find only JSON files 

609 found = set( 

610 uri.path 

611 for uri in ButlerURI.findFileResources([ButlerURI(self.makeS3Uri("a/"))], file_filter=r"\.json$") 

612 ) 

613 self.assertEqual(found, {uri.path for uri in expected_uris if uri.path.endswith(".json")}) 

614 

615 # JSON files grouped by directory 

616 found = ButlerURI.findFileResources( 

617 [ButlerURI(self.makeS3Uri("a/"))], file_filter=r"\.json$", grouped=True 

618 ) 

619 expected = (("/a/z.json",), ("/a/b/c/d/v.json",)) 

620 

621 for got, expect in zip(found, expected): 

622 self.assertEqual(tuple(u.path for u in got), expect) 

623 

624 # Check pagination works with large numbers of files. S3 API limits 

625 # us to 1000 response per list_objects call so create lots of files 

626 created = set() 

627 counter = 1 

628 n_dir1 = 1100 

629 while counter <= n_dir1: 

630 new = ButlerURI(self.makeS3Uri(f"test/file{counter:04d}.txt")) 

631 new.write(f"{counter}".encode()) 

632 created.add(str(new)) 

633 counter += 1 

634 counter = 1 

635 # Put some in a subdirectory to make sure we are looking in a 

636 # hierarchy. 

637 n_dir2 = 100 

638 while counter <= n_dir2: 

639 new = ButlerURI(self.makeS3Uri(f"test/subdir/file{counter:04d}.txt")) 

640 new.write(f"{counter}".encode()) 

641 created.add(str(new)) 

642 counter += 1 

643 

644 found = ButlerURI.findFileResources([ButlerURI(self.makeS3Uri("test/"))]) 

645 self.assertEqual({str(u) for u in found}, created) 

646 

647 # Again with grouping. 

648 found = list(ButlerURI.findFileResources([ButlerURI(self.makeS3Uri("test/"))], grouped=True)) 

649 self.assertEqual(len(found), 2) 

650 dir_1 = list(found[0]) 

651 dir_2 = list(found[1]) 

652 self.assertEqual(len(dir_1), n_dir1) 

653 self.assertEqual(len(dir_2), n_dir2) 

654 

655 def testWrite(self): 

656 s3write = ButlerURI(self.makeS3Uri("created.txt")) 

657 content = "abcdefghijklmnopqrstuv\n" 

658 s3write.write(content.encode()) 

659 self.assertEqual(s3write.read().decode(), content) 

660 

661 def testTemporary(self): 

662 s3root = ButlerURI(self.makeS3Uri("rootdir"), forceDirectory=True) 

663 with ButlerURI.temporary_uri(prefix=s3root, suffix=".json") as tmp: 

664 self.assertEqual(tmp.getExtension(), ".json", f"uri: {tmp}") 

665 self.assertEqual(tmp.scheme, "s3", f"uri: {tmp}") 

666 self.assertEqual(tmp.parent(), s3root) 

667 basename = tmp.basename() 

668 content = "abcd" 

669 tmp.write(content.encode()) 

670 self.assertTrue(tmp.exists(), f"uri: {tmp}") 

671 self.assertFalse(tmp.exists()) 

672 

673 # Again without writing anything, to check that there is no complaint 

674 # on exit of context manager. 

675 with ButlerURI.temporary_uri(prefix=s3root, suffix=".json") as tmp: 

676 self.assertFalse(tmp.exists()) 

677 # Check that the file has a different name than before. 

678 self.assertNotEqual(tmp.basename(), basename, f"uri: {tmp}") 

679 self.assertFalse(tmp.exists()) 

680 

681 def testRelative(self): 

682 """Check that we can get subpaths back from two URIs""" 

683 parent = ButlerURI(self.makeS3Uri("rootdir"), forceDirectory=True) 

684 child = ButlerURI(self.makeS3Uri("rootdir/dir1/file.txt")) 

685 

686 self.assertEqual(child.relative_to(parent), "dir1/file.txt") 

687 

688 not_child = ButlerURI(self.makeS3Uri("/a/b/dir1/file.txt")) 

689 self.assertFalse(not_child.relative_to(parent)) 

690 

691 not_s3 = ButlerURI(os.path.join(self.tmpdir, "dir1", "file2.txt")) 

692 self.assertFalse(child.relative_to(not_s3)) 

693 

694 def testQuoting(self): 

695 """Check that quoting works.""" 

696 parent = ButlerURI(self.makeS3Uri("rootdir"), forceDirectory=True) 

697 subpath = "rootdir/dir1+/file?.txt" 

698 child = ButlerURI(self.makeS3Uri(urllib.parse.quote(subpath))) 

699 

700 self.assertEqual(child.relative_to(parent), "dir1+/file?.txt") 

701 self.assertEqual(child.basename(), "file?.txt") 

702 self.assertEqual(child.relativeToPathRoot, subpath) 

703 self.assertIn("%", child.path) 

704 self.assertEqual(child.unquoted_path, "/" + subpath) 

705 

706 

707# Mock required environment variables during tests 

708@unittest.mock.patch.dict( 

709 os.environ, 

710 { 

711 "LSST_BUTLER_WEBDAV_AUTH": "TOKEN", 

712 "LSST_BUTLER_WEBDAV_TOKEN_FILE": os.path.join(TESTDIR, "config/testConfigs/webdav/token"), 

713 "LSST_BUTLER_WEBDAV_CA_BUNDLE": "/path/to/ca/certs", 

714 }, 

715) 

716class WebdavURITestCase(unittest.TestCase): 

717 def setUp(self): 

718 serverRoot = "www.not-exists.orgx" 

719 existingFolderName = "existingFolder" 

720 existingFileName = "existingFile" 

721 notExistingFileName = "notExistingFile" 

722 

723 self.baseURL = ButlerURI(f"https://{serverRoot}", forceDirectory=True) 

724 self.existingFileButlerURI = ButlerURI( 

725 f"https://{serverRoot}/{existingFolderName}/{existingFileName}" 

726 ) 

727 self.notExistingFileButlerURI = ButlerURI( 

728 f"https://{serverRoot}/{existingFolderName}/{notExistingFileName}" 

729 ) 

730 self.existingFolderButlerURI = ButlerURI( 

731 f"https://{serverRoot}/{existingFolderName}", forceDirectory=True 

732 ) 

733 self.notExistingFolderButlerURI = ButlerURI( 

734 f"https://{serverRoot}/{notExistingFileName}", forceDirectory=True 

735 ) 

736 

737 # Need to declare the options 

738 responses.add(responses.OPTIONS, self.baseURL.geturl(), status=200, headers={"DAV": "1,2,3"}) 

739 

740 # Used by ButlerHttpURI.exists() 

741 responses.add( 

742 responses.HEAD, 

743 self.existingFileButlerURI.geturl(), 

744 status=200, 

745 headers={"Content-Length": "1024"}, 

746 ) 

747 responses.add(responses.HEAD, self.notExistingFileButlerURI.geturl(), status=404) 

748 

749 # Used by ButlerHttpURI.read() 

750 responses.add( 

751 responses.GET, self.existingFileButlerURI.geturl(), status=200, body=str.encode("It works!") 

752 ) 

753 responses.add(responses.GET, self.notExistingFileButlerURI.geturl(), status=404) 

754 

755 # Used by ButlerHttpURI.write() 

756 responses.add(responses.PUT, self.existingFileButlerURI.geturl(), status=201) 

757 

758 # Used by ButlerHttpURI.transfer_from() 

759 responses.add( 

760 responses.Response( 

761 url=self.existingFileButlerURI.geturl(), 

762 method="COPY", 

763 headers={"Destination": self.existingFileButlerURI.geturl()}, 

764 status=201, 

765 ) 

766 ) 

767 responses.add( 

768 responses.Response( 

769 url=self.existingFileButlerURI.geturl(), 

770 method="COPY", 

771 headers={"Destination": self.notExistingFileButlerURI.geturl()}, 

772 status=201, 

773 ) 

774 ) 

775 responses.add( 

776 responses.Response( 

777 url=self.existingFileButlerURI.geturl(), 

778 method="MOVE", 

779 headers={"Destination": self.notExistingFileButlerURI.geturl()}, 

780 status=201, 

781 ) 

782 ) 

783 

784 # Used by ButlerHttpURI.remove() 

785 responses.add(responses.DELETE, self.existingFileButlerURI.geturl(), status=200) 

786 responses.add(responses.DELETE, self.notExistingFileButlerURI.geturl(), status=404) 

787 

788 # Used by ButlerHttpURI.mkdir() 

789 responses.add( 

790 responses.HEAD, 

791 self.existingFolderButlerURI.geturl(), 

792 status=200, 

793 headers={"Content-Length": "1024"}, 

794 ) 

795 responses.add(responses.HEAD, self.baseURL.geturl(), status=200, headers={"Content-Length": "1024"}) 

796 responses.add(responses.HEAD, self.notExistingFolderButlerURI.geturl(), status=404) 

797 responses.add( 

798 responses.Response(url=self.notExistingFolderButlerURI.geturl(), method="MKCOL", status=201) 

799 ) 

800 responses.add( 

801 responses.Response(url=self.existingFolderButlerURI.geturl(), method="MKCOL", status=403) 

802 ) 

803 

804 @responses.activate 

805 def testExists(self): 

806 

807 self.assertTrue(self.existingFileButlerURI.exists()) 

808 self.assertFalse(self.notExistingFileButlerURI.exists()) 

809 

810 self.assertEqual(self.existingFileButlerURI.size(), 1024) 

811 with self.assertRaises(FileNotFoundError): 

812 self.notExistingFileButlerURI.size() 

813 

814 @responses.activate 

815 def testRemove(self): 

816 

817 self.assertIsNone(self.existingFileButlerURI.remove()) 

818 with self.assertRaises(FileNotFoundError): 

819 self.notExistingFileButlerURI.remove() 

820 

821 @responses.activate 

822 def testMkdir(self): 

823 

824 # The mock means that we can't check this now exists 

825 self.notExistingFolderButlerURI.mkdir() 

826 

827 # This should do nothing 

828 self.existingFolderButlerURI.mkdir() 

829 

830 with self.assertRaises(ValueError): 

831 self.notExistingFileButlerURI.mkdir() 

832 

833 @responses.activate 

834 def testRead(self): 

835 

836 self.assertEqual(self.existingFileButlerURI.read().decode(), "It works!") 

837 self.assertNotEqual(self.existingFileButlerURI.read().decode(), "Nope.") 

838 with self.assertRaises(FileNotFoundError): 

839 self.notExistingFileButlerURI.read() 

840 

841 @responses.activate 

842 def testWrite(self): 

843 

844 self.assertIsNone(self.existingFileButlerURI.write(data=str.encode("Some content."))) 

845 with self.assertRaises(FileExistsError): 

846 self.existingFileButlerURI.write(data=str.encode("Some content."), overwrite=False) 

847 

848 @responses.activate 

849 def testTransfer(self): 

850 

851 self.assertIsNone(self.notExistingFileButlerURI.transfer_from(src=self.existingFileButlerURI)) 

852 self.assertIsNone( 

853 self.notExistingFileButlerURI.transfer_from(src=self.existingFileButlerURI, transfer="move") 

854 ) 

855 with self.assertRaises(FileExistsError): 

856 self.existingFileButlerURI.transfer_from(src=self.existingFileButlerURI) 

857 with self.assertRaises(ValueError): 

858 self.notExistingFileButlerURI.transfer_from( 

859 src=self.existingFileButlerURI, transfer="unsupported" 

860 ) 

861 

862 def testParent(self): 

863 

864 self.assertEqual( 

865 self.existingFolderButlerURI.geturl(), self.notExistingFileButlerURI.parent().geturl() 

866 ) 

867 self.assertEqual(self.baseURL.geturl(), self.baseURL.parent().geturl()) 

868 self.assertEqual( 

869 self.existingFileButlerURI.parent().geturl(), self.existingFileButlerURI.dirname().geturl() 

870 ) 

871 

872 

873if __name__ == "__main__": 873 ↛ 874line 873 didn't jump to line 874, because the condition on line 873 was never true

874 unittest.main()