Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22import glob 

23import os 

24import shutil 

25import unittest 

26import urllib.parse 

27import responses 

28 

29try: 

30 import boto3 

31 import botocore 

32 from moto import mock_s3 

33except ImportError: 

34 boto3 = None 

35 

36 def mock_s3(cls): 

37 """A no-op decorator in case moto mock_s3 can not be imported. 

38 """ 

39 return cls 

40 

41from lsst.daf.butler import ButlerURI 

42from lsst.daf.butler.core._butlerUri.s3utils import (setAwsEnvCredentials, 

43 unsetAwsEnvCredentials) 

44from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir 

45 

46TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

47 

48 

49class FileURITestCase(unittest.TestCase): 

50 """Concrete tests for local files""" 

51 

52 def setUp(self): 

53 # Use a local tempdir because on macOS the temp dirs use symlinks 

54 # so relsymlink gets quite confused. 

55 self.tmpdir = makeTestTempDir(TESTDIR) 

56 

57 def tearDown(self): 

58 removeTestTempDir(self.tmpdir) 

59 

60 def testFile(self): 

61 file = os.path.join(self.tmpdir, "test.txt") 

62 uri = ButlerURI(file) 

63 self.assertFalse(uri.exists(), f"{uri} should not exist") 

64 self.assertEqual(uri.ospath, file) 

65 

66 content = "abcdefghijklmnopqrstuv\n" 

67 uri.write(content.encode()) 

68 self.assertTrue(os.path.exists(file), "File should exist locally") 

69 self.assertTrue(uri.exists(), f"{uri} should now exist") 

70 self.assertEqual(uri.read().decode(), content) 

71 self.assertEqual(uri.size(), len(content.encode())) 

72 

73 with self.assertRaises(FileNotFoundError): 

74 ButlerURI("file/not/there.txt").size() 

75 

76 # Check that creating a URI from a URI returns the same thing 

77 uri2 = ButlerURI(uri) 

78 self.assertEqual(uri, uri2) 

79 self.assertEqual(id(uri), id(uri2)) 

80 

81 with self.assertRaises(ValueError): 

82 # Scheme-less URIs are not allowed to support non-file roots 

83 # at the present time. This may change in the future to become 

84 # equivalent to ButlerURI.join() 

85 ButlerURI("a/b.txt", root=ButlerURI("s3://bucket/a/b/")) 

86 

87 def testExtension(self): 

88 file = ButlerURI(os.path.join(self.tmpdir, "test.txt")) 

89 self.assertEqual(file.updatedExtension(None), file) 

90 self.assertEqual(file.updatedExtension(".txt"), file) 

91 self.assertEqual(id(file.updatedExtension(".txt")), id(file)) 

92 

93 fits = file.updatedExtension(".fits.gz") 

94 self.assertEqual(fits.basename(), "test.fits.gz") 

95 self.assertEqual(fits.updatedExtension(".jpeg").basename(), "test.jpeg") 

96 

97 def testRelative(self): 

98 """Check that we can get subpaths back from two URIs""" 

99 parent = ButlerURI(self.tmpdir, forceDirectory=True, forceAbsolute=True) 

100 self.assertTrue(parent.isdir()) 

101 child = ButlerURI(os.path.join(self.tmpdir, "dir1", "file.txt"), forceAbsolute=True) 

102 

103 self.assertEqual(child.relative_to(parent), "dir1/file.txt") 

104 

105 not_child = ButlerURI("/a/b/dir1/file.txt") 

106 self.assertIsNone(not_child.relative_to(parent)) 

107 self.assertFalse(not_child.isdir()) 

108 

109 not_directory = ButlerURI(os.path.join(self.tmpdir, "dir1", "file2.txt")) 

110 self.assertIsNone(child.relative_to(not_directory)) 

111 

112 # Relative URIs 

113 parent = ButlerURI("a/b/", forceAbsolute=False) 

114 child = ButlerURI("a/b/c/d.txt", forceAbsolute=False) 

115 self.assertFalse(child.scheme) 

116 self.assertEqual(child.relative_to(parent), "c/d.txt") 

117 

118 # File URI and schemeless URI 

119 parent = ButlerURI("file:/a/b/c/") 

120 child = ButlerURI("e/f/g.txt", forceAbsolute=False) 

121 

122 # If the child is relative and the parent is absolute we assume 

123 # that the child is a child of the parent unless it uses ".." 

124 self.assertEqual(child.relative_to(parent), "e/f/g.txt") 

125 

126 child = ButlerURI("../e/f/g.txt", forceAbsolute=False) 

127 self.assertIsNone(child.relative_to(parent)) 

128 

129 child = ButlerURI("../c/e/f/g.txt", forceAbsolute=False) 

130 self.assertEqual(child.relative_to(parent), "e/f/g.txt") 

131 

132 # Test non-file root with relative path. 

133 child = ButlerURI("e/f/g.txt", forceAbsolute=False) 

134 parent = ButlerURI("s3://hello/a/b/c/") 

135 self.assertEqual(child.relative_to(parent), "e/f/g.txt") 

136 

137 # Test with different netloc 

138 child = ButlerURI("http://my.host/a/b/c.txt") 

139 parent = ButlerURI("http://other.host/a/") 

140 self.assertIsNone(child.relative_to(parent), f"{child}.relative_to({parent})") 

141 

142 # Schemeless absolute child. 

143 # Schemeless absolute URI is constructed using root= parameter. 

144 parent = ButlerURI("file:///a/b/c/") 

145 child = ButlerURI("d/e.txt", root=parent) 

146 self.assertEqual(child.relative_to(parent), "d/e.txt", f"{child}.relative_to({parent})") 

147 

148 parent = ButlerURI("c/", root="/a/b/") 

149 self.assertEqual(child.relative_to(parent), "d/e.txt", f"{child}.relative_to({parent})") 

150 

151 # Absolute schemeless child with relative parent will always fail. 

152 parent = ButlerURI("d/e.txt", forceAbsolute=False) 

153 self.assertIsNone(child.relative_to(parent), f"{child}.relative_to({parent})") 

154 

155 def testParents(self): 

156 """Test of splitting and parent walking.""" 

157 parent = ButlerURI(self.tmpdir, forceDirectory=True, forceAbsolute=True) 

158 child_file = parent.join("subdir/file.txt") 

159 self.assertFalse(child_file.isdir()) 

160 child_subdir, file = child_file.split() 

161 self.assertEqual(file, "file.txt") 

162 self.assertTrue(child_subdir.isdir()) 

163 self.assertEqual(child_file.dirname(), child_subdir) 

164 self.assertEqual(child_file.basename(), file) 

165 self.assertEqual(child_file.parent(), child_subdir) 

166 derived_parent = child_subdir.parent() 

167 self.assertEqual(derived_parent, parent) 

168 self.assertTrue(derived_parent.isdir()) 

169 self.assertEqual(child_file.parent().parent(), parent) 

170 

171 def testEnvVar(self): 

172 """Test that environment variables are expanded.""" 

173 

174 with unittest.mock.patch.dict(os.environ, {"MY_TEST_DIR": "/a/b/c"}): 

175 uri = ButlerURI("${MY_TEST_DIR}/d.txt") 

176 self.assertEqual(uri.path, "/a/b/c/d.txt") 

177 self.assertEqual(uri.scheme, "file") 

178 

179 # This will not expand 

180 uri = ButlerURI("${MY_TEST_DIR}/d.txt", forceAbsolute=False) 

181 self.assertEqual(uri.path, "${MY_TEST_DIR}/d.txt") 

182 self.assertFalse(uri.scheme) 

183 

184 def testMkdir(self): 

185 tmpdir = ButlerURI(self.tmpdir) 

186 newdir = tmpdir.join("newdir/seconddir") 

187 newdir.mkdir() 

188 self.assertTrue(newdir.exists()) 

189 newfile = newdir.join("temp.txt") 

190 newfile.write("Data".encode()) 

191 self.assertTrue(newfile.exists()) 

192 

193 def testTransfer(self): 

194 src = ButlerURI(os.path.join(self.tmpdir, "test.txt")) 

195 content = "Content is some content\nwith something to say\n\n" 

196 src.write(content.encode()) 

197 

198 for mode in ("copy", "link", "hardlink", "symlink", "relsymlink"): 

199 dest = ButlerURI(os.path.join(self.tmpdir, f"dest_{mode}.txt")) 

200 dest.transfer_from(src, transfer=mode) 

201 self.assertTrue(dest.exists(), f"Check that {dest} exists (transfer={mode})") 

202 

203 with open(dest.ospath, "r") as fh: 

204 new_content = fh.read() 

205 self.assertEqual(new_content, content) 

206 

207 if mode in ("symlink", "relsymlink"): 

208 self.assertTrue(os.path.islink(dest.ospath), f"Check that {dest} is symlink") 

209 

210 with self.assertRaises(FileExistsError): 

211 dest.transfer_from(src, transfer=mode) 

212 

213 dest.transfer_from(src, transfer=mode, overwrite=True) 

214 

215 os.remove(dest.ospath) 

216 

217 b = src.read() 

218 self.assertEqual(b.decode(), new_content) 

219 

220 nbytes = 10 

221 subset = src.read(size=nbytes) 

222 self.assertEqual(len(subset), nbytes) 

223 self.assertEqual(subset.decode(), content[:nbytes]) 

224 

225 with self.assertRaises(ValueError): 

226 src.transfer_from(src, transfer="unknown") 

227 

228 def testResource(self): 

229 u = ButlerURI("resource://lsst.daf.butler/configs/datastore.yaml") 

230 self.assertTrue(u.exists(), f"Check {u} exists") 

231 

232 content = u.read().decode() 

233 self.assertTrue(content.startswith("datastore:")) 

234 

235 truncated = u.read(size=9).decode() 

236 self.assertEqual(truncated, "datastore") 

237 

238 d = ButlerURI("resource://lsst.daf.butler/configs", forceDirectory=True) 

239 self.assertTrue(u.exists(), f"Check directory {d} exists") 

240 

241 j = d.join("datastore.yaml") 

242 self.assertEqual(u, j) 

243 self.assertFalse(j.dirLike) 

244 self.assertFalse(j.isdir()) 

245 self.assertFalse(d.join("not-there.yaml").exists()) 

246 

247 def testEscapes(self): 

248 """Special characters in file paths""" 

249 src = ButlerURI("bbb/???/test.txt", root=self.tmpdir, forceAbsolute=True) 

250 self.assertFalse(src.scheme) 

251 src.write(b"Some content") 

252 self.assertTrue(src.exists()) 

253 

254 # abspath always returns a file scheme 

255 file = src.abspath() 

256 self.assertTrue(file.exists()) 

257 self.assertIn("???", file.ospath) 

258 self.assertNotIn("???", file.path) 

259 

260 file = file.updatedFile("tests??.txt") 

261 self.assertNotIn("??.txt", file.path) 

262 file.write(b"Other content") 

263 self.assertEqual(file.read(), b"Other content") 

264 

265 src = src.updatedFile("tests??.txt") 

266 self.assertIn("??.txt", src.path) 

267 self.assertEqual(file.read(), src.read(), f"reading from {file.ospath} and {src.ospath}") 

268 

269 # File URI and schemeless URI 

270 parent = ButlerURI("file:" + urllib.parse.quote("/a/b/c/de/??/")) 

271 child = ButlerURI("e/f/g.txt", forceAbsolute=False) 

272 self.assertEqual(child.relative_to(parent), "e/f/g.txt") 

273 

274 child = ButlerURI("e/f??#/g.txt", forceAbsolute=False) 

275 self.assertEqual(child.relative_to(parent), "e/f??#/g.txt") 

276 

277 child = ButlerURI("file:" + urllib.parse.quote("/a/b/c/de/??/e/f??#/g.txt")) 

278 self.assertEqual(child.relative_to(parent), "e/f??#/g.txt") 

279 

280 self.assertEqual(child.relativeToPathRoot, "a/b/c/de/??/e/f??#/g.txt") 

281 

282 # Schemeless so should not quote 

283 dir = ButlerURI("bbb/???/", root=self.tmpdir, forceAbsolute=True, forceDirectory=True) 

284 self.assertIn("???", dir.ospath) 

285 self.assertIn("???", dir.path) 

286 self.assertFalse(dir.scheme) 

287 

288 # dir.join() morphs into a file scheme 

289 new = dir.join("test_j.txt") 

290 self.assertIn("???", new.ospath, f"Checking {new}") 

291 new.write(b"Content") 

292 

293 new2name = "###/test??.txt" 

294 new2 = dir.join(new2name) 

295 self.assertIn("???", new2.ospath) 

296 new2.write(b"Content") 

297 self.assertTrue(new2.ospath.endswith(new2name)) 

298 self.assertEqual(new.read(), new2.read()) 

299 

300 fdir = dir.abspath() 

301 self.assertNotIn("???", fdir.path) 

302 self.assertIn("???", fdir.ospath) 

303 self.assertEqual(fdir.scheme, "file") 

304 fnew = dir.join("test_jf.txt") 

305 fnew.write(b"Content") 

306 

307 fnew2 = fdir.join(new2name) 

308 fnew2.write(b"Content") 

309 self.assertTrue(fnew2.ospath.endswith(new2name)) 

310 self.assertNotIn("###", fnew2.path) 

311 

312 self.assertEqual(fnew.read(), fnew2.read()) 

313 

314 # Test that children relative to schemeless and file schemes 

315 # still return the same unquoted name 

316 self.assertEqual(fnew2.relative_to(fdir), new2name, f"{fnew2}.relative_to({fdir})") 

317 self.assertEqual(fnew2.relative_to(dir), new2name, f"{fnew2}.relative_to({dir})") 

318 self.assertEqual(new2.relative_to(fdir), new2name, f"{new2}.relative_to({fdir})") 

319 self.assertEqual(new2.relative_to(dir), new2name, f"{new2}.relative_to({dir})") 

320 

321 # Check for double quoting 

322 plus_path = "/a/b/c+d/" 

323 with self.assertLogs(level="WARNING"): 

324 uri = ButlerURI(urllib.parse.quote(plus_path), forceDirectory=True) 

325 self.assertEqual(uri.ospath, plus_path) 

326 

327 # Check that # is not escaped for schemeless URIs 

328 hash_path = "/a/b#/c&d#xyz" 

329 hpos = hash_path.rfind("#") 

330 uri = ButlerURI(hash_path) 

331 self.assertEqual(uri.ospath, hash_path[:hpos]) 

332 self.assertEqual(uri.fragment, hash_path[hpos + 1:]) 

333 

334 def testHash(self): 

335 """Test that we can store URIs in sets and as keys.""" 

336 uri1 = ButlerURI(TESTDIR) 

337 uri2 = uri1.join("test/") 

338 s = {uri1, uri2} 

339 self.assertIn(uri1, s) 

340 

341 d = {uri1: "1", uri2: "2"} 

342 self.assertEqual(d[uri2], "2") 

343 

344 def testWalk(self): 

345 """Test ButlerURI.walk().""" 

346 test_dir_uri = ButlerURI(TESTDIR) 

347 

348 file = test_dir_uri.join("config/basic/butler.yaml") 

349 found = list(ButlerURI.findFileResources([file])) 

350 self.assertEqual(found[0], file) 

351 

352 # Compare against the full local paths 

353 expected = set(p for p in glob.glob(os.path.join(TESTDIR, "config", "**"), recursive=True) 

354 if os.path.isfile(p)) 

355 found = set(u.ospath for u in ButlerURI.findFileResources([test_dir_uri.join("config")])) 

356 self.assertEqual(found, expected) 

357 

358 # Now solely the YAML files 

359 expected_yaml = set(glob.glob(os.path.join(TESTDIR, "config", "**", "*.yaml"), recursive=True)) 

360 found = set(u.ospath for u in ButlerURI.findFileResources([test_dir_uri.join("config")], 

361 file_filter=r".*\.yaml$")) 

362 self.assertEqual(found, expected_yaml) 

363 

364 # Now two explicit directories and a file 

365 expected = set(glob.glob(os.path.join(TESTDIR, "config", "**", "basic", "*.yaml"), recursive=True)) 

366 expected.update(set(glob.glob(os.path.join(TESTDIR, "config", "**", "templates", "*.yaml"), 

367 recursive=True))) 

368 expected.add(file.ospath) 

369 

370 found = set(u.ospath for u in ButlerURI.findFileResources([file, test_dir_uri.join("config/basic"), 

371 test_dir_uri.join("config/templates")], 

372 file_filter=r".*\.yaml$")) 

373 self.assertEqual(found, expected) 

374 

375 # Group by directory -- find everything and compare it with what 

376 # we expected to be there in total. We expect to find 9 directories 

377 # containing yaml files so make sure we only iterate 9 times. 

378 found_yaml = set() 

379 counter = 0 

380 for uris in ButlerURI.findFileResources([file, test_dir_uri.join("config/")], 

381 file_filter=r".*\.yaml$", grouped=True): 

382 found = set(u.ospath for u in uris) 

383 if found: 

384 counter += 1 

385 

386 found_yaml.update(found) 

387 

388 self.assertEqual(found_yaml, expected_yaml) 

389 self.assertEqual(counter, 9) 

390 

391 # Grouping but check that single files are returned in a single group 

392 # at the end 

393 file2 = test_dir_uri.join("config/templates/templates-bad.yaml") 

394 found = list(ButlerURI.findFileResources([file, file2, test_dir_uri.join("config/dbAuth")], 

395 grouped=True)) 

396 self.assertEqual(len(found), 2) 

397 self.assertEqual(list(found[1]), [file, file2]) 

398 

399 with self.assertRaises(ValueError): 

400 list(file.walk()) 

401 

402 def testRootURI(self): 

403 """Test ButlerURI.root_uri().""" 

404 uri = ButlerURI("https://www.notexist.com:8080/file/test") 

405 uri2 = ButlerURI("s3://www.notexist.com/file/test") 

406 self.assertEqual(uri.root_uri().geturl(), "https://www.notexist.com:8080/") 

407 self.assertEqual(uri2.root_uri().geturl(), "s3://www.notexist.com/") 

408 

409 def testJoin(self): 

410 """Test .join method.""" 

411 

412 root_str = "s3://bucket/hsc/payload/" 

413 root = ButlerURI(root_str) 

414 

415 self.assertEqual(root.join("b/test.txt").geturl(), f"{root_str}b/test.txt") 

416 add_dir = root.join("b/c/d/") 

417 self.assertTrue(add_dir.isdir()) 

418 self.assertEqual(add_dir.geturl(), f"{root_str}b/c/d/") 

419 

420 quote_example = "b&c.t@x#t" 

421 needs_quote = root.join(quote_example) 

422 self.assertEqual(needs_quote.unquoted_path, f"/hsc/payload/{quote_example}") 

423 

424 other = ButlerURI("file://localhost/test.txt") 

425 self.assertEqual(root.join(other), other) 

426 self.assertEqual(other.join("b/new.txt").geturl(), "file://localhost/b/new.txt") 

427 

428 joined = ButlerURI("s3://bucket/hsc/payload/").join(ButlerURI("test.qgraph", forceAbsolute=False)) 

429 self.assertEqual(joined, ButlerURI("s3://bucket/hsc/payload/test.qgraph")) 

430 

431 with self.assertRaises(ValueError): 

432 ButlerURI("s3://bucket/hsc/payload/").join(ButlerURI("test.qgraph")) 

433 

434 def testTemporary(self): 

435 with ButlerURI.temporary_uri(suffix=".json") as tmp: 

436 self.assertEqual(tmp.getExtension(), ".json", f"uri: {tmp}") 

437 self.assertTrue(tmp.isabs(), f"uri: {tmp}") 

438 self.assertFalse(tmp.exists(), f"uri: {tmp}") 

439 tmp.write(b"abcd") 

440 self.assertTrue(tmp.exists(), f"uri: {tmp}") 

441 self.assertTrue(tmp.isTemporary) 

442 self.assertFalse(tmp.exists(), f"uri: {tmp}") 

443 

444 tmpdir = ButlerURI(self.tmpdir, forceDirectory=True) 

445 with ButlerURI.temporary_uri(prefix=tmpdir, suffix=".yaml") as tmp: 

446 # Use a specified tmpdir and check it is okay for the file 

447 # to not be created. 

448 self.assertFalse(tmp.exists(), f"uri: {tmp}") 

449 self.assertTrue(tmpdir.exists(), f"uri: {tmpdir} still exists") 

450 

451 

452@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!") 

453@mock_s3 

454class S3URITestCase(unittest.TestCase): 

455 """Tests involving S3""" 

456 

457 bucketName = "any_bucket" 

458 """Bucket name to use in tests""" 

459 

460 def setUp(self): 

461 # Local test directory 

462 self.tmpdir = makeTestTempDir(TESTDIR) 

463 

464 # set up some fake credentials if they do not exist 

465 self.usingDummyCredentials = setAwsEnvCredentials() 

466 

467 # MOTO needs to know that we expect Bucket bucketname to exist 

468 s3 = boto3.resource("s3") 

469 s3.create_bucket(Bucket=self.bucketName) 

470 

471 def tearDown(self): 

472 s3 = boto3.resource("s3") 

473 bucket = s3.Bucket(self.bucketName) 

474 try: 

475 bucket.objects.all().delete() 

476 except botocore.exceptions.ClientError as e: 

477 if e.response["Error"]["Code"] == "404": 

478 # the key was not reachable - pass 

479 pass 

480 else: 

481 raise 

482 

483 bucket = s3.Bucket(self.bucketName) 

484 bucket.delete() 

485 

486 # unset any potentially set dummy credentials 

487 if self.usingDummyCredentials: 

488 unsetAwsEnvCredentials() 

489 

490 shutil.rmtree(self.tmpdir, ignore_errors=True) 

491 

492 def makeS3Uri(self, path): 

493 return f"s3://{self.bucketName}/{path}" 

494 

495 def testTransfer(self): 

496 src = ButlerURI(os.path.join(self.tmpdir, "test.txt")) 

497 content = "Content is some content\nwith something to say\n\n" 

498 src.write(content.encode()) 

499 self.assertTrue(src.exists()) 

500 self.assertEqual(src.size(), len(content.encode())) 

501 

502 dest = ButlerURI(self.makeS3Uri("test.txt")) 

503 self.assertFalse(dest.exists()) 

504 

505 with self.assertRaises(FileNotFoundError): 

506 dest.size() 

507 

508 dest.transfer_from(src, transfer="copy") 

509 self.assertTrue(dest.exists()) 

510 

511 dest2 = ButlerURI(self.makeS3Uri("copied.txt")) 

512 dest2.transfer_from(dest, transfer="copy") 

513 self.assertTrue(dest2.exists()) 

514 

515 local = ButlerURI(os.path.join(self.tmpdir, "copied.txt")) 

516 local.transfer_from(dest2, transfer="copy") 

517 with open(local.ospath, "r") as fd: 

518 new_content = fd.read() 

519 self.assertEqual(new_content, content) 

520 

521 with self.assertRaises(ValueError): 

522 dest2.transfer_from(local, transfer="symlink") 

523 

524 b = dest.read() 

525 self.assertEqual(b.decode(), new_content) 

526 

527 nbytes = 10 

528 subset = dest.read(size=nbytes) 

529 self.assertEqual(len(subset), nbytes) # Extra byte comes back 

530 self.assertEqual(subset.decode(), content[:nbytes]) 

531 

532 with self.assertRaises(FileExistsError): 

533 dest.transfer_from(src, transfer="copy") 

534 

535 dest.transfer_from(src, transfer="copy", overwrite=True) 

536 

537 def testWalk(self): 

538 """Test that we can list an S3 bucket""" 

539 # Files we want to create 

540 expected = ("a/x.txt", "a/y.txt", "a/z.json", "a/b/w.txt", "a/b/c/d/v.json") 

541 expected_uris = [ButlerURI(self.makeS3Uri(path)) for path in expected] 

542 for uri in expected_uris: 

543 # Doesn't matter what we write 

544 uri.write("123".encode()) 

545 

546 # Find all the files in the a/ tree 

547 found = set(uri.path for uri in ButlerURI.findFileResources([ButlerURI(self.makeS3Uri("a/"))])) 

548 self.assertEqual(found, {uri.path for uri in expected_uris}) 

549 

550 # Find all the files in the a/ tree but group by folder 

551 found = ButlerURI.findFileResources([ButlerURI(self.makeS3Uri("a/"))], 

552 grouped=True) 

553 expected = (("/a/x.txt", "/a/y.txt", "/a/z.json"), ("/a/b/w.txt",), ("/a/b/c/d/v.json",)) 

554 

555 for got, expect in zip(found, expected): 

556 self.assertEqual(tuple(u.path for u in got), expect) 

557 

558 # Find only JSON files 

559 found = set(uri.path for uri in ButlerURI.findFileResources([ButlerURI(self.makeS3Uri("a/"))], 

560 file_filter=r"\.json$")) 

561 self.assertEqual(found, {uri.path for uri in expected_uris if uri.path.endswith(".json")}) 

562 

563 # JSON files grouped by directory 

564 found = ButlerURI.findFileResources([ButlerURI(self.makeS3Uri("a/"))], 

565 file_filter=r"\.json$", grouped=True) 

566 expected = (("/a/z.json",), ("/a/b/c/d/v.json",)) 

567 

568 for got, expect in zip(found, expected): 

569 self.assertEqual(tuple(u.path for u in got), expect) 

570 

571 # Check pagination works with large numbers of files. S3 API limits 

572 # us to 1000 response per list_objects call so create lots of files 

573 created = set() 

574 counter = 1 

575 n_dir1 = 1100 

576 while counter <= n_dir1: 

577 new = ButlerURI(self.makeS3Uri(f"test/file{counter:04d}.txt")) 

578 new.write(f"{counter}".encode()) 

579 created.add(str(new)) 

580 counter += 1 

581 counter = 1 

582 # Put some in a subdirectory to make sure we are looking in a 

583 # hierarchy. 

584 n_dir2 = 100 

585 while counter <= n_dir2: 

586 new = ButlerURI(self.makeS3Uri(f"test/subdir/file{counter:04d}.txt")) 

587 new.write(f"{counter}".encode()) 

588 created.add(str(new)) 

589 counter += 1 

590 

591 found = ButlerURI.findFileResources([ButlerURI(self.makeS3Uri("test/"))]) 

592 self.assertEqual({str(u) for u in found}, created) 

593 

594 # Again with grouping. 

595 found = list(ButlerURI.findFileResources([ButlerURI(self.makeS3Uri("test/"))], grouped=True)) 

596 self.assertEqual(len(found), 2) 

597 dir_1 = list(found[0]) 

598 dir_2 = list(found[1]) 

599 self.assertEqual(len(dir_1), n_dir1) 

600 self.assertEqual(len(dir_2), n_dir2) 

601 

602 def testWrite(self): 

603 s3write = ButlerURI(self.makeS3Uri("created.txt")) 

604 content = "abcdefghijklmnopqrstuv\n" 

605 s3write.write(content.encode()) 

606 self.assertEqual(s3write.read().decode(), content) 

607 

608 def testTemporary(self): 

609 s3root = ButlerURI(self.makeS3Uri("rootdir"), forceDirectory=True) 

610 with ButlerURI.temporary_uri(prefix=s3root, suffix=".json") as tmp: 

611 self.assertEqual(tmp.getExtension(), ".json", f"uri: {tmp}") 

612 self.assertEqual(tmp.scheme, "s3", f"uri: {tmp}") 

613 self.assertEqual(tmp.parent(), s3root) 

614 basename = tmp.basename() 

615 content = "abcd" 

616 tmp.write(content.encode()) 

617 self.assertTrue(tmp.exists(), f"uri: {tmp}") 

618 self.assertFalse(tmp.exists()) 

619 

620 # Again without writing anything, to check that there is no complaint 

621 # on exit of context manager. 

622 with ButlerURI.temporary_uri(prefix=s3root, suffix=".json") as tmp: 

623 self.assertFalse(tmp.exists()) 

624 # Check that the file has a different name than before. 

625 self.assertNotEqual(tmp.basename(), basename, f"uri: {tmp}") 

626 self.assertFalse(tmp.exists()) 

627 

628 def testRelative(self): 

629 """Check that we can get subpaths back from two URIs""" 

630 parent = ButlerURI(self.makeS3Uri("rootdir"), forceDirectory=True) 

631 child = ButlerURI(self.makeS3Uri("rootdir/dir1/file.txt")) 

632 

633 self.assertEqual(child.relative_to(parent), "dir1/file.txt") 

634 

635 not_child = ButlerURI(self.makeS3Uri("/a/b/dir1/file.txt")) 

636 self.assertFalse(not_child.relative_to(parent)) 

637 

638 not_s3 = ButlerURI(os.path.join(self.tmpdir, "dir1", "file2.txt")) 

639 self.assertFalse(child.relative_to(not_s3)) 

640 

641 def testQuoting(self): 

642 """Check that quoting works.""" 

643 parent = ButlerURI(self.makeS3Uri("rootdir"), forceDirectory=True) 

644 subpath = "rootdir/dir1+/file?.txt" 

645 child = ButlerURI(self.makeS3Uri(urllib.parse.quote(subpath))) 

646 

647 self.assertEqual(child.relative_to(parent), "dir1+/file?.txt") 

648 self.assertEqual(child.basename(), "file?.txt") 

649 self.assertEqual(child.relativeToPathRoot, subpath) 

650 self.assertIn("%", child.path) 

651 self.assertEqual(child.unquoted_path, "/" + subpath) 

652 

653 

654# Mock required environment variables during tests 

655@unittest.mock.patch.dict(os.environ, {"LSST_BUTLER_WEBDAV_AUTH": "TOKEN", 

656 "LSST_BUTLER_WEBDAV_TOKEN_FILE": os.path.join( 

657 TESTDIR, "config/testConfigs/webdav/token"), 

658 "LSST_BUTLER_WEBDAV_CA_BUNDLE": "/path/to/ca/certs"}) 

659class WebdavURITestCase(unittest.TestCase): 

660 

661 def setUp(self): 

662 serverRoot = "www.not-exists.orgx" 

663 existingFolderName = "existingFolder" 

664 existingFileName = "existingFile" 

665 notExistingFileName = "notExistingFile" 

666 

667 self.baseURL = ButlerURI( 

668 f"https://{serverRoot}", forceDirectory=True) 

669 self.existingFileButlerURI = ButlerURI( 

670 f"https://{serverRoot}/{existingFolderName}/{existingFileName}") 

671 self.notExistingFileButlerURI = ButlerURI( 

672 f"https://{serverRoot}/{existingFolderName}/{notExistingFileName}") 

673 self.existingFolderButlerURI = ButlerURI( 

674 f"https://{serverRoot}/{existingFolderName}", forceDirectory=True) 

675 self.notExistingFolderButlerURI = ButlerURI( 

676 f"https://{serverRoot}/{notExistingFileName}", forceDirectory=True) 

677 

678 # Need to declare the options 

679 responses.add(responses.OPTIONS, 

680 self.baseURL.geturl(), 

681 status=200, headers={"DAV": "1,2,3"}) 

682 

683 # Used by ButlerHttpURI.exists() 

684 responses.add(responses.HEAD, 

685 self.existingFileButlerURI.geturl(), 

686 status=200, headers={'Content-Length': '1024'}) 

687 responses.add(responses.HEAD, 

688 self.notExistingFileButlerURI.geturl(), 

689 status=404) 

690 

691 # Used by ButlerHttpURI.read() 

692 responses.add(responses.GET, 

693 self.existingFileButlerURI.geturl(), 

694 status=200, 

695 body=str.encode("It works!")) 

696 responses.add(responses.GET, 

697 self.notExistingFileButlerURI.geturl(), 

698 status=404) 

699 

700 # Used by ButlerHttpURI.write() 

701 responses.add(responses.PUT, 

702 self.existingFileButlerURI.geturl(), 

703 status=201) 

704 

705 # Used by ButlerHttpURI.transfer_from() 

706 responses.add(responses.Response(url=self.existingFileButlerURI.geturl(), 

707 method="COPY", 

708 headers={"Destination": self.existingFileButlerURI.geturl()}, 

709 status=201)) 

710 responses.add(responses.Response(url=self.existingFileButlerURI.geturl(), 

711 method="COPY", 

712 headers={"Destination": self.notExistingFileButlerURI.geturl()}, 

713 status=201)) 

714 responses.add(responses.Response(url=self.existingFileButlerURI.geturl(), 

715 method="MOVE", 

716 headers={"Destination": self.notExistingFileButlerURI.geturl()}, 

717 status=201)) 

718 

719 # Used by ButlerHttpURI.remove() 

720 responses.add(responses.DELETE, 

721 self.existingFileButlerURI.geturl(), 

722 status=200) 

723 responses.add(responses.DELETE, 

724 self.notExistingFileButlerURI.geturl(), 

725 status=404) 

726 

727 # Used by ButlerHttpURI.mkdir() 

728 responses.add(responses.HEAD, 

729 self.existingFolderButlerURI.geturl(), 

730 status=200, headers={'Content-Length': '1024'}) 

731 responses.add(responses.HEAD, 

732 self.baseURL.geturl(), 

733 status=200, headers={'Content-Length': '1024'}) 

734 responses.add(responses.HEAD, 

735 self.notExistingFolderButlerURI.geturl(), 

736 status=404) 

737 responses.add(responses.Response(url=self.notExistingFolderButlerURI.geturl(), 

738 method="MKCOL", 

739 status=201)) 

740 responses.add(responses.Response(url=self.existingFolderButlerURI.geturl(), 

741 method="MKCOL", 

742 status=403)) 

743 

744 @responses.activate 

745 def testExists(self): 

746 

747 self.assertTrue(self.existingFileButlerURI.exists()) 

748 self.assertFalse(self.notExistingFileButlerURI.exists()) 

749 

750 self.assertEqual(self.existingFileButlerURI.size(), 1024) 

751 with self.assertRaises(FileNotFoundError): 

752 self.notExistingFileButlerURI.size() 

753 

754 @responses.activate 

755 def testRemove(self): 

756 

757 self.assertIsNone(self.existingFileButlerURI.remove()) 

758 with self.assertRaises(FileNotFoundError): 

759 self.notExistingFileButlerURI.remove() 

760 

761 @responses.activate 

762 def testMkdir(self): 

763 

764 # The mock means that we can't check this now exists 

765 self.notExistingFolderButlerURI.mkdir() 

766 

767 # This should do nothing 

768 self.existingFolderButlerURI.mkdir() 

769 

770 with self.assertRaises(ValueError): 

771 self.notExistingFileButlerURI.mkdir() 

772 

773 @responses.activate 

774 def testRead(self): 

775 

776 self.assertEqual(self.existingFileButlerURI.read().decode(), "It works!") 

777 self.assertNotEqual(self.existingFileButlerURI.read().decode(), "Nope.") 

778 with self.assertRaises(FileNotFoundError): 

779 self.notExistingFileButlerURI.read() 

780 

781 @responses.activate 

782 def testWrite(self): 

783 

784 self.assertIsNone(self.existingFileButlerURI.write(data=str.encode("Some content."))) 

785 with self.assertRaises(FileExistsError): 

786 self.existingFileButlerURI.write(data=str.encode("Some content."), overwrite=False) 

787 

788 @responses.activate 

789 def testTransfer(self): 

790 

791 self.assertIsNone(self.notExistingFileButlerURI.transfer_from( 

792 src=self.existingFileButlerURI)) 

793 self.assertIsNone(self.notExistingFileButlerURI.transfer_from( 

794 src=self.existingFileButlerURI, 

795 transfer="move")) 

796 with self.assertRaises(FileExistsError): 

797 self.existingFileButlerURI.transfer_from(src=self.existingFileButlerURI) 

798 with self.assertRaises(ValueError): 

799 self.notExistingFileButlerURI.transfer_from( 

800 src=self.existingFileButlerURI, 

801 transfer="unsupported") 

802 

803 def testParent(self): 

804 

805 self.assertEqual(self.existingFolderButlerURI.geturl(), 

806 self.notExistingFileButlerURI.parent().geturl()) 

807 self.assertEqual(self.baseURL.geturl(), 

808 self.baseURL.parent().geturl()) 

809 self.assertEqual(self.existingFileButlerURI.parent().geturl(), 

810 self.existingFileButlerURI.dirname().geturl()) 

811 

812 

813if __name__ == "__main__": 813 ↛ 814line 813 didn't jump to line 814, because the condition on line 813 was never true

814 unittest.main()