Coverage for python/lsst/resources/file.py: 80%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

162 statements  

1# This file is part of lsst-resources. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# Use of this source code is governed by a 3-clause BSD-style 

10# license that can be found in the LICENSE file. 

11 

12from __future__ import annotations 

13 

14import copy 

15import logging 

16import os 

17import os.path 

18import posixpath 

19import re 

20import shutil 

21import urllib.parse 

22 

23__all__ = ("FileResourcePath",) 

24 

25from typing import TYPE_CHECKING, Iterator, List, Optional, Tuple, Union 

26 

27from ._resourcePath import ResourcePath 

28from .utils import NoTransaction, os2posix, posix2os 

29 

30if TYPE_CHECKING: 30 ↛ 31line 30 didn't jump to line 31, because the condition on line 30 was never true

31 from .utils import TransactionProtocol 

32 

33 

34log = logging.getLogger(__name__) 

35 

36 

37class FileResourcePath(ResourcePath): 

38 """Path for explicit ``file`` URI scheme.""" 

39 

40 transferModes = ("copy", "link", "symlink", "hardlink", "relsymlink", "auto", "move") 

41 transferDefault: str = "link" 

42 

43 # By definition refers to a local file 

44 isLocal = True 

45 

46 @property 

47 def ospath(self) -> str: 

48 """Path component of the URI localized to current OS. 

49 

50 Will unquote URI path since a formal URI must include the quoting. 

51 """ 

52 return urllib.parse.unquote(posix2os(self._uri.path)) 

53 

54 def exists(self) -> bool: 

55 """Indicate that the file exists.""" 

56 # Uses os.path.exists so if there is a soft link that points 

57 # to a file that no longer exists this will return False 

58 return os.path.exists(self.ospath) 

59 

60 def size(self) -> int: 

61 """Return the size of the file in bytes.""" 

62 if not os.path.isdir(self.ospath): 62 ↛ 66line 62 didn't jump to line 66, because the condition on line 62 was never false

63 stat = os.stat(self.ospath) 

64 sz = stat.st_size 

65 else: 

66 sz = 0 

67 return sz 

68 

69 def remove(self) -> None: 

70 """Remove the resource.""" 

71 os.remove(self.ospath) 

72 

73 def _as_local(self) -> Tuple[str, bool]: 

74 """Return the local path of the file. 

75 

76 This is an internal helper for ``as_local()``. 

77 

78 Returns 

79 ------- 

80 path : `str` 

81 The local path to this file. 

82 temporary : `bool` 

83 Always returns `False` (this is not a temporary file). 

84 """ 

85 return self.ospath, False 

86 

87 def read(self, size: int = -1) -> bytes: 

88 """Return the entire content of the file as bytes.""" 

89 with open(self.ospath, "rb") as fh: 

90 return fh.read(size) 

91 

92 def write(self, data: bytes, overwrite: bool = True) -> None: 

93 """Write the supplied data to the file.""" 

94 dir = os.path.dirname(self.ospath) 

95 if not os.path.exists(dir): 

96 os.makedirs(dir, exist_ok=True) 

97 if overwrite: 97 ↛ 100line 97 didn't jump to line 100, because the condition on line 97 was never false

98 mode = "wb" 

99 else: 

100 mode = "xb" 

101 with open(self.ospath, mode) as f: 

102 f.write(data) 

103 

104 def mkdir(self) -> None: 

105 """Make the directory associated with this URI.""" 

106 if not os.path.exists(self.ospath): 106 ↛ 108line 106 didn't jump to line 108, because the condition on line 106 was never false

107 os.makedirs(self.ospath, exist_ok=True) 

108 elif not os.path.isdir(self.ospath): 

109 raise FileExistsError(f"URI {self} exists but is not a directory!") 

110 

111 def isdir(self) -> bool: 

112 """Return whether this URI is a directory. 

113 

114 Returns 

115 ------- 

116 isdir : `bool` 

117 `True` if this URI is a directory or looks like a directory, 

118 else `False`. 

119 """ 

120 return self.dirLike or os.path.isdir(self.ospath) 

121 

122 def transfer_from( 

123 self, 

124 src: ResourcePath, 

125 transfer: str, 

126 overwrite: bool = False, 

127 transaction: Optional[TransactionProtocol] = None, 

128 ) -> None: 

129 """Transfer the current resource to a local file. 

130 

131 Parameters 

132 ---------- 

133 src : `ResourcePath` 

134 Source URI. 

135 transfer : `str` 

136 Mode to use for transferring the resource. Supports the following 

137 options: copy, link, symlink, hardlink, relsymlink. 

138 overwrite : `bool`, optional 

139 Allow an existing file to be overwritten. Defaults to `False`. 

140 transaction : `~lsst.resources.utils.TransactionProtocol`, optional 

141 If a transaction is provided, undo actions will be registered. 

142 """ 

143 # Fail early to prevent delays if remote resources are requested 

144 if transfer not in self.transferModes: 

145 raise ValueError(f"Transfer mode '{transfer}' not supported by URI scheme {self.scheme}") 

146 

147 # Existence checks can take time so only try if the log message 

148 # will be issued. 

149 if log.isEnabledFor(logging.DEBUG): 149 ↛ 150line 149 didn't jump to line 150, because the condition on line 149 was never true

150 log.debug( 

151 "Transferring %s [exists: %s] -> %s [exists: %s] (transfer=%s)", 

152 src, 

153 src.exists(), 

154 self, 

155 self.exists(), 

156 transfer, 

157 ) 

158 

159 # We do not have to special case FileResourcePath here because 

160 # as_local handles that. 

161 with src.as_local() as local_uri: 

162 is_temporary = local_uri.isTemporary 

163 local_src = local_uri.ospath 

164 

165 # Short circuit if the URIs are identical immediately. 

166 if self == local_uri: 166 ↛ 167line 166 didn't jump to line 167, because the condition on line 166 was never true

167 log.debug( 

168 "Target and destination URIs are identical: %s, returning immediately." 

169 " No further action required.", 

170 self, 

171 ) 

172 return 

173 

174 # Default transfer mode depends on whether we have a temporary 

175 # file or not. 

176 if transfer == "auto": 176 ↛ 177line 176 didn't jump to line 177, because the condition on line 176 was never true

177 transfer = self.transferDefault if not is_temporary else "copy" 

178 

179 if not os.path.exists(local_src): 179 ↛ 180line 179 didn't jump to line 180, because the condition on line 179 was never true

180 if is_temporary: 

181 msg = f"Local file {local_uri} downloaded from {src} has gone missing" 

182 else: 

183 msg = f"Source URI {src} does not exist" 

184 raise FileNotFoundError(msg) 

185 

186 # Follow soft links 

187 local_src = os.path.realpath(os.path.normpath(local_src)) 

188 

189 # All the modes involving linking use "link" somewhere 

190 if "link" in transfer and is_temporary: 190 ↛ 191line 190 didn't jump to line 191, because the condition on line 190 was never true

191 raise RuntimeError( 

192 f"Can not use local file system transfer mode {transfer} for remote resource ({src})" 

193 ) 

194 

195 # For temporary files we can own them 

196 requested_transfer = transfer 

197 if is_temporary and transfer == "copy": 

198 transfer = "move" 

199 

200 # The output location should not exist unless overwrite=True. 

201 # Rather than use `exists()`, use os.stat since we might need 

202 # the full answer later. 

203 dest_stat: Optional[os.stat_result] 

204 try: 

205 # Do not read through links of the file itself. 

206 dest_stat = os.lstat(self.ospath) 

207 except FileNotFoundError: 

208 dest_stat = None 

209 

210 # It is possible that the source URI and target URI refer 

211 # to the same file. This can happen for a number of reasons 

212 # (such as soft links in the path, or they really are the same). 

213 # In that case log a message and return as if the transfer 

214 # completed (it technically did). A temporary file download 

215 # can't be the same so the test can be skipped. 

216 if dest_stat and not is_temporary: 

217 # Be consistent and use lstat here (even though realpath 

218 # has been called). It does not harm. 

219 local_src_stat = os.lstat(local_src) 

220 if dest_stat.st_ino == local_src_stat.st_ino and dest_stat.st_dev == local_src_stat.st_dev: 

221 log.debug( 

222 "Destination URI %s is the same file as source URI %s, returning immediately." 

223 " No further action required.", 

224 self, 

225 local_uri, 

226 ) 

227 return 

228 

229 if not overwrite and dest_stat: 

230 raise FileExistsError( 

231 f"Destination path '{self}' already exists. Transfer from {src} cannot be completed." 

232 ) 

233 

234 # Make the path absolute (but don't follow links since that 

235 # would possibly cause us to end up in the wrong place if the 

236 # file existed already as a soft link) 

237 newFullPath = os.path.abspath(self.ospath) 

238 outputDir = os.path.dirname(newFullPath) 

239 if not os.path.isdir(outputDir): 239 ↛ 243line 239 didn't jump to line 243, because the condition on line 239 was never true

240 # Must create the directory -- this can not be rolled back 

241 # since another transfer running concurrently may 

242 # be relying on this existing. 

243 os.makedirs(outputDir, exist_ok=True) 

244 

245 if transaction is None: 245 ↛ 252line 245 didn't jump to line 252, because the condition on line 245 was never false

246 # Use a no-op transaction to reduce code duplication 

247 transaction = NoTransaction() 

248 

249 # For links the OS doesn't let us overwrite so if something does 

250 # exist we have to remove it before we do the actual "transfer" 

251 # below 

252 if "link" in transfer and overwrite and dest_stat: 

253 try: 

254 self.remove() 

255 except Exception: 

256 # If this fails we ignore it since it's a problem 

257 # that will manifest immediately below with a more relevant 

258 # error message 

259 pass 

260 

261 if transfer == "move": 

262 with transaction.undoWith(f"move from {local_src}", shutil.move, newFullPath, local_src): 

263 shutil.move(local_src, newFullPath) 

264 elif transfer == "copy": 

265 with transaction.undoWith(f"copy from {local_src}", os.remove, newFullPath): 

266 shutil.copy(local_src, newFullPath) 

267 elif transfer == "link": 

268 # Try hard link and if that fails use a symlink 

269 with transaction.undoWith(f"link to {local_src}", os.remove, newFullPath): 

270 try: 

271 os.link(local_src, newFullPath) 

272 except OSError: 

273 # Read through existing symlinks 

274 os.symlink(local_src, newFullPath) 

275 elif transfer == "hardlink": 

276 with transaction.undoWith(f"hardlink to {local_src}", os.remove, newFullPath): 

277 os.link(local_src, newFullPath) 

278 elif transfer == "symlink": 

279 # Read through existing symlinks 

280 with transaction.undoWith(f"symlink to {local_src}", os.remove, newFullPath): 

281 os.symlink(local_src, newFullPath) 

282 elif transfer == "relsymlink": 282 ↛ 291line 282 didn't jump to line 291, because the condition on line 282 was never false

283 # This is a standard symlink but using a relative path 

284 # Need the directory name to give to relative root 

285 # A full file path confuses it into an extra ../ 

286 newFullPathRoot = os.path.dirname(newFullPath) 

287 relPath = os.path.relpath(local_src, newFullPathRoot) 

288 with transaction.undoWith(f"relsymlink to {local_src}", os.remove, newFullPath): 

289 os.symlink(relPath, newFullPath) 

290 else: 

291 raise NotImplementedError("Transfer type '{}' not supported.".format(transfer)) 

292 

293 # This was an explicit move requested from a remote resource 

294 # try to remove that remote resource. We check is_temporary because 

295 # the local file would have been moved by shutil.move already. 

296 if requested_transfer == "move" and is_temporary: 296 ↛ 298line 296 didn't jump to line 298, because the condition on line 296 was never true

297 # Transactions do not work here 

298 src.remove() 

299 

300 def walk( 

301 self, file_filter: Optional[Union[str, re.Pattern]] = None 

302 ) -> Iterator[Union[List, Tuple[ResourcePath, List[str], List[str]]]]: 

303 """Walk the directory tree returning matching files and directories. 

304 

305 Parameters 

306 ---------- 

307 file_filter : `str` or `re.Pattern`, optional 

308 Regex to filter out files from the list before it is returned. 

309 

310 Yields 

311 ------ 

312 dirpath : `ResourcePath` 

313 Current directory being examined. 

314 dirnames : `list` of `str` 

315 Names of subdirectories within dirpath. 

316 filenames : `list` of `str` 

317 Names of all the files within dirpath. 

318 """ 

319 if not self.isdir(): 

320 raise ValueError("Can not walk a non-directory URI") 

321 

322 if isinstance(file_filter, str): 322 ↛ 323line 322 didn't jump to line 323, because the condition on line 322 was never true

323 file_filter = re.compile(file_filter) 

324 

325 for root, dirs, files in os.walk(self.ospath): 

326 # Filter by the regex 

327 if file_filter is not None: 

328 files = [f for f in files if file_filter.search(f)] 

329 yield type(self)(root, forceAbsolute=False, forceDirectory=True), dirs, files 

330 

331 @classmethod 

332 def _fixupPathUri( 

333 cls, 

334 parsed: urllib.parse.ParseResult, 

335 root: Optional[Union[str, ResourcePath]] = None, 

336 forceAbsolute: bool = False, 

337 forceDirectory: bool = False, 

338 ) -> Tuple[urllib.parse.ParseResult, bool]: 

339 """Fix up relative paths in URI instances. 

340 

341 Parameters 

342 ---------- 

343 parsed : `~urllib.parse.ParseResult` 

344 The result from parsing a URI using `urllib.parse`. 

345 root : `str` or `ResourcePath`, optional 

346 Path to use as root when converting relative to absolute. 

347 If `None`, it will be the current working directory. This 

348 is a local file system path, or a file URI. It is only used if 

349 a file-scheme is used incorrectly with a relative path. 

350 forceAbsolute : `bool`, ignored 

351 Has no effect for this subclass. ``file`` URIs are always 

352 absolute. 

353 forceDirectory : `bool`, optional 

354 If `True` forces the URI to end with a separator, otherwise given 

355 URI is interpreted as is. 

356 

357 Returns 

358 ------- 

359 modified : `~urllib.parse.ParseResult` 

360 Update result if a URI is being handled. 

361 dirLike : `bool` 

362 `True` if given parsed URI has a trailing separator or 

363 forceDirectory is True. Otherwise `False`. 

364 

365 Notes 

366 ----- 

367 Relative paths are explicitly not supported by RFC8089 but `urllib` 

368 does accept URIs of the form ``file:relative/path.ext``. They need 

369 to be turned into absolute paths before they can be used. This is 

370 always done regardless of the ``forceAbsolute`` parameter. 

371 """ 

372 # assume we are not dealing with a directory like URI 

373 dirLike = False 

374 

375 # file URI implies POSIX path separators so split as POSIX, 

376 # then join as os, and convert to abspath. Do not handle 

377 # home directories since "file" scheme is explicitly documented 

378 # to not do tilde expansion. 

379 sep = posixpath.sep 

380 

381 # For local file system we can explicitly check to see if this 

382 # really is a directory. The URI might point to a location that 

383 # does not exists yet but all that matters is if it is a directory 

384 # then we make sure use that fact. No need to do the check if 

385 # we are already being told. 

386 if not forceDirectory and posixpath.isdir(parsed.path): 386 ↛ 387line 386 didn't jump to line 387, because the condition on line 386 was never true

387 forceDirectory = True 

388 

389 # For an absolute path all we need to do is check if we need 

390 # to force the directory separator 

391 if posixpath.isabs(parsed.path): 

392 if forceDirectory: 

393 if not parsed.path.endswith(sep): 

394 parsed = parsed._replace(path=parsed.path + sep) 

395 dirLike = True 

396 return copy.copy(parsed), dirLike 

397 

398 # Relative path so must fix it to be compliant with the standard 

399 

400 # Replacement values for the URI 

401 replacements = {} 

402 

403 if root is None: 

404 root = os.path.abspath(os.path.curdir) 

405 elif isinstance(root, ResourcePath): 405 ↛ 406line 405 didn't jump to line 406, because the condition on line 405 was never true

406 if root.scheme and root.scheme != "file": 

407 raise RuntimeError(f"The override root must be a file URI not {root.scheme}") 

408 root = os.path.abspath(root.ospath) 

409 

410 replacements["path"] = posixpath.normpath(posixpath.join(os2posix(root), parsed.path)) 

411 

412 # normpath strips trailing "/" so put it back if necessary 

413 # Acknowledge that trailing separator exists. 

414 if forceDirectory or (parsed.path.endswith(sep) and not replacements["path"].endswith(sep)): 

415 replacements["path"] += sep 

416 dirLike = True 

417 

418 # ParseResult is a NamedTuple so _replace is standard API 

419 parsed = parsed._replace(**replacements) 

420 

421 if parsed.params or parsed.query: 421 ↛ 422line 421 didn't jump to line 422, because the condition on line 421 was never true

422 log.warning("Additional items unexpectedly encountered in file URI: %s", parsed.geturl()) 

423 

424 return parsed, dirLike