Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22import os 

23import shutil 

24import tempfile 

25import unittest 

26import urllib.parse 

27 

28try: 

29 import boto3 

30 import botocore 

31 from moto import mock_s3 

32except ImportError: 

33 boto3 = None 

34 

35 def mock_s3(cls): 

36 """A no-op decorator in case moto mock_s3 can not be imported. 

37 """ 

38 return cls 

39 

40from lsst.daf.butler import ButlerURI 

41from lsst.daf.butler.core.s3utils import (setAwsEnvCredentials, 

42 unsetAwsEnvCredentials) 

43 

44TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

45 

46 

47class FileURITestCase(unittest.TestCase): 

48 """Concrete tests for local files""" 

49 

50 def setUp(self): 

51 # Use a local tempdir because on macOS the temp dirs use symlinks 

52 # so relsymlink gets quite confused. 

53 self.tmpdir = tempfile.mkdtemp(dir=TESTDIR) 

54 

55 def tearDown(self): 

56 shutil.rmtree(self.tmpdir, ignore_errors=True) 

57 

58 def testFile(self): 

59 file = os.path.join(self.tmpdir, "test.txt") 

60 uri = ButlerURI(file) 

61 self.assertFalse(uri.exists(), f"{uri} should not exist") 

62 self.assertEqual(uri.ospath, file) 

63 

64 content = "abcdefghijklmnopqrstuv\n" 

65 uri.write(content.encode()) 

66 self.assertTrue(os.path.exists(file), "File should exist locally") 

67 self.assertTrue(uri.exists(), f"{uri} should now exist") 

68 self.assertEqual(uri.read().decode(), content) 

69 

70 def testRelative(self): 

71 """Check that we can get subpaths back from two URIs""" 

72 parent = ButlerURI(self.tmpdir, forceDirectory=True, forceAbsolute=True) 

73 child = ButlerURI(os.path.join(self.tmpdir, "dir1", "file.txt"), forceAbsolute=True) 

74 

75 self.assertEqual(child.relative_to(parent), "dir1/file.txt") 

76 

77 not_child = ButlerURI("/a/b/dir1/file.txt") 

78 self.assertFalse(not_child.relative_to(parent)) 

79 

80 not_directory = ButlerURI(os.path.join(self.tmpdir, "dir1", "file2.txt")) 

81 self.assertFalse(child.relative_to(not_directory)) 

82 

83 # Relative URIs 

84 parent = ButlerURI("a/b/", forceAbsolute=False) 

85 child = ButlerURI("a/b/c/d.txt", forceAbsolute=False) 

86 self.assertFalse(child.scheme) 

87 self.assertEqual(child.relative_to(parent), "c/d.txt") 

88 

89 # File URI and schemeless URI 

90 parent = ButlerURI("file:/a/b/c/") 

91 child = ButlerURI("e/f/g.txt", forceAbsolute=False) 

92 

93 # If the child is relative and the parent is absolute we assume 

94 # that the child is a child of the parent unless it uses ".." 

95 self.assertEqual(child.relative_to(parent), "e/f/g.txt") 

96 

97 child = ButlerURI("../e/f/g.txt", forceAbsolute=False) 

98 self.assertFalse(child.relative_to(parent)) 

99 

100 child = ButlerURI("../c/e/f/g.txt", forceAbsolute=False) 

101 self.assertEqual(child.relative_to(parent), "e/f/g.txt") 

102 

103 def testMkdir(self): 

104 tmpdir = ButlerURI(self.tmpdir) 

105 newdir = tmpdir.join("newdir/seconddir") 

106 newdir.mkdir() 

107 self.assertTrue(newdir.exists()) 

108 newfile = newdir.join("temp.txt") 

109 newfile.write("Data".encode()) 

110 self.assertTrue(newfile.exists()) 

111 

112 def testTransfer(self): 

113 src = ButlerURI(os.path.join(self.tmpdir, "test.txt")) 

114 content = "Content is some content\nwith something to say\n\n" 

115 src.write(content.encode()) 

116 

117 for mode in ("copy", "link", "hardlink", "symlink", "relsymlink"): 

118 dest = ButlerURI(os.path.join(self.tmpdir, f"dest_{mode}.txt")) 

119 dest.transfer_from(src, transfer=mode) 

120 self.assertTrue(dest.exists(), f"Check that {dest} exists (transfer={mode})") 

121 

122 with open(dest.ospath, "r") as fh: 

123 new_content = fh.read() 

124 self.assertEqual(new_content, content) 

125 

126 if mode in ("symlink", "relsymlink"): 

127 self.assertTrue(os.path.islink(dest.ospath), f"Check that {dest} is symlink") 

128 

129 os.remove(dest.ospath) 

130 

131 b = src.read() 

132 self.assertEqual(b.decode(), new_content) 

133 

134 nbytes = 10 

135 subset = src.read(size=nbytes) 

136 self.assertEqual(len(subset), nbytes) 

137 self.assertEqual(subset.decode(), content[:nbytes]) 

138 

139 with self.assertRaises(ValueError): 

140 src.transfer_from(src, transfer="unknown") 

141 

142 def testResource(self): 

143 u = ButlerURI("resource://lsst.daf.butler/configs/datastore.yaml") 

144 self.assertTrue(u.exists(), f"Check {u} exists") 

145 

146 content = u.read().decode() 

147 self.assertTrue(content.startswith("datastore:")) 

148 

149 truncated = u.read(size=9).decode() 

150 self.assertEqual(truncated, "datastore") 

151 

152 d = ButlerURI("resource://lsst.daf.butler/configs", forceDirectory=True) 

153 self.assertTrue(u.exists(), f"Check directory {d} exists") 

154 

155 j = d.join("datastore.yaml") 

156 self.assertEqual(u, j) 

157 self.assertFalse(j.dirLike) 

158 self.assertFalse(d.join("not-there.yaml").exists()) 

159 

160 def testEscapes(self): 

161 """Special characters in file paths""" 

162 src = ButlerURI("bbb/???/test.txt", root=self.tmpdir, forceAbsolute=True) 

163 self.assertFalse(src.scheme) 

164 src.write(b"Some content") 

165 self.assertTrue(src.exists()) 

166 

167 # Use the internal API to force to a file 

168 file = src._force_to_file() 

169 self.assertTrue(file.exists()) 

170 self.assertIn("???", file.ospath) 

171 self.assertNotIn("???", file.path) 

172 

173 file.updateFile("tests??.txt") 

174 self.assertNotIn("??.txt", file.path) 

175 file.write(b"Other content") 

176 self.assertEqual(file.read(), b"Other content") 

177 

178 src.updateFile("tests??.txt") 

179 self.assertIn("??.txt", src.path) 

180 self.assertEqual(file.read(), src.read(), f"reading from {file.ospath} and {src.ospath}") 

181 

182 # File URI and schemeless URI 

183 parent = ButlerURI("file:" + urllib.parse.quote("/a/b/c/de/??/")) 

184 child = ButlerURI("e/f/g.txt", forceAbsolute=False) 

185 self.assertEqual(child.relative_to(parent), "e/f/g.txt") 

186 

187 child = ButlerURI("e/f??#/g.txt", forceAbsolute=False) 

188 self.assertEqual(child.relative_to(parent), "e/f??#/g.txt") 

189 

190 child = ButlerURI("file:" + urllib.parse.quote("/a/b/c/de/??/e/f??#/g.txt")) 

191 self.assertEqual(child.relative_to(parent), "e/f??#/g.txt") 

192 

193 self.assertEqual(child.relativeToPathRoot, "a/b/c/de/??/e/f??#/g.txt") 

194 

195 # Schemeless so should not quote 

196 dir = ButlerURI("bbb/???/", root=self.tmpdir, forceAbsolute=True, forceDirectory=True) 

197 self.assertIn("???", dir.ospath) 

198 self.assertIn("???", dir.path) 

199 self.assertFalse(dir.scheme) 

200 

201 # dir.join() morphs into a file scheme 

202 new = dir.join("test_j.txt") 

203 self.assertIn("???", new.ospath, f"Checking {new}") 

204 new.write(b"Content") 

205 

206 new2name = "###/test??.txt" 

207 new2 = dir.join(new2name) 

208 self.assertIn("???", new2.ospath) 

209 new2.write(b"Content") 

210 self.assertTrue(new2.ospath.endswith(new2name)) 

211 self.assertEqual(new.read(), new2.read()) 

212 

213 fdir = dir._force_to_file() 

214 self.assertNotIn("???", fdir.path) 

215 self.assertIn("???", fdir.ospath) 

216 self.assertEqual(fdir.scheme, "file") 

217 fnew = dir.join("test_jf.txt") 

218 fnew.write(b"Content") 

219 

220 fnew2 = fdir.join(new2name) 

221 fnew2.write(b"Content") 

222 self.assertTrue(fnew2.ospath.endswith(new2name)) 

223 self.assertNotIn("###", fnew2.path) 

224 

225 self.assertEqual(fnew.read(), fnew2.read()) 

226 

227 # Test that children relative to schemeless and file schemes 

228 # still return the same unquoted name 

229 self.assertEqual(fnew2.relative_to(fdir), new2name) 

230 self.assertEqual(fnew2.relative_to(dir), new2name) 

231 self.assertEqual(new2.relative_to(fdir), new2name, f"{new2} vs {fdir}") 

232 self.assertEqual(new2.relative_to(dir), new2name) 

233 

234 # Check for double quoting 

235 plus_path = "/a/b/c+d/" 

236 with self.assertLogs(level="WARNING"): 

237 uri = ButlerURI(urllib.parse.quote(plus_path), forceDirectory=True) 

238 self.assertEqual(uri.ospath, plus_path) 

239 

240 

241@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!") 

242@mock_s3 

243class S3URITestCase(unittest.TestCase): 

244 """Tests involving S3""" 

245 

246 bucketName = "any_bucket" 

247 """Bucket name to use in tests""" 

248 

249 def setUp(self): 

250 # Local test directory 

251 self.tmpdir = tempfile.mkdtemp() 

252 

253 # set up some fake credentials if they do not exist 

254 self.usingDummyCredentials = setAwsEnvCredentials() 

255 

256 # MOTO needs to know that we expect Bucket bucketname to exist 

257 s3 = boto3.resource("s3") 

258 s3.create_bucket(Bucket=self.bucketName) 

259 

260 def tearDown(self): 

261 s3 = boto3.resource("s3") 

262 bucket = s3.Bucket(self.bucketName) 

263 try: 

264 bucket.objects.all().delete() 

265 except botocore.exceptions.ClientError as e: 

266 if e.response["Error"]["Code"] == "404": 

267 # the key was not reachable - pass 

268 pass 

269 else: 

270 raise 

271 

272 bucket = s3.Bucket(self.bucketName) 

273 bucket.delete() 

274 

275 # unset any potentially set dummy credentials 

276 if self.usingDummyCredentials: 

277 unsetAwsEnvCredentials() 

278 

279 shutil.rmtree(self.tmpdir, ignore_errors=True) 

280 

281 def makeS3Uri(self, path): 

282 return f"s3://{self.bucketName}/{path}" 

283 

284 def testTransfer(self): 

285 src = ButlerURI(os.path.join(self.tmpdir, "test.txt")) 

286 content = "Content is some content\nwith something to say\n\n" 

287 src.write(content.encode()) 

288 

289 dest = ButlerURI(self.makeS3Uri("test.txt")) 

290 self.assertFalse(dest.exists()) 

291 dest.transfer_from(src, transfer="copy") 

292 self.assertTrue(dest.exists()) 

293 

294 dest2 = ButlerURI(self.makeS3Uri("copied.txt")) 

295 dest2.transfer_from(dest, transfer="copy") 

296 self.assertTrue(dest2.exists()) 

297 

298 local = ButlerURI(os.path.join(self.tmpdir, "copied.txt")) 

299 local.transfer_from(dest2, transfer="copy") 

300 with open(local.ospath, "r") as fd: 

301 new_content = fd.read() 

302 self.assertEqual(new_content, content) 

303 

304 with self.assertRaises(ValueError): 

305 dest2.transfer_from(local, transfer="symlink") 

306 

307 b = dest.read() 

308 self.assertEqual(b.decode(), new_content) 

309 

310 nbytes = 10 

311 subset = dest.read(size=nbytes) 

312 self.assertEqual(len(subset), nbytes) # Extra byte comes back 

313 self.assertEqual(subset.decode(), content[:nbytes]) 

314 

315 def testWrite(self): 

316 s3write = ButlerURI(self.makeS3Uri("created.txt")) 

317 content = "abcdefghijklmnopqrstuv\n" 

318 s3write.write(content.encode()) 

319 self.assertEqual(s3write.read().decode(), content) 

320 

321 def testRelative(self): 

322 """Check that we can get subpaths back from two URIs""" 

323 parent = ButlerURI(self.makeS3Uri("rootdir"), forceDirectory=True) 

324 child = ButlerURI(self.makeS3Uri("rootdir/dir1/file.txt")) 

325 

326 self.assertEqual(child.relative_to(parent), "dir1/file.txt") 

327 

328 not_child = ButlerURI(self.makeS3Uri("/a/b/dir1/file.txt")) 

329 self.assertFalse(not_child.relative_to(parent)) 

330 

331 not_s3 = ButlerURI(os.path.join(self.tmpdir, "dir1", "file2.txt")) 

332 self.assertFalse(child.relative_to(not_s3)) 

333 

334 def testQuoting(self): 

335 """Check that quoting works.""" 

336 parent = ButlerURI(self.makeS3Uri("rootdir"), forceDirectory=True) 

337 subpath = "rootdir/dir1+/file?.txt" 

338 child = ButlerURI(self.makeS3Uri(urllib.parse.quote(subpath))) 

339 

340 self.assertEqual(child.relative_to(parent), "dir1+/file?.txt") 

341 self.assertEqual(child.basename(), "file?.txt") 

342 self.assertEqual(child.relativeToPathRoot, subpath) 

343 self.assertIn("%", child.path) 

344 self.assertEqual(child.unquoted_path, "/" + subpath) 

345 

346 

347if __name__ == "__main__": 347 ↛ 348line 347 didn't jump to line 348, because the condition on line 347 was never true

348 unittest.main()