Coverage for python/lsst/resources/file.py: 80%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

167 statements  

1# This file is part of lsst-resources. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# Use of this source code is governed by a 3-clause BSD-style 

10# license that can be found in the LICENSE file. 

11 

12from __future__ import annotations 

13 

14import contextlib 

15import copy 

16import logging 

17import os 

18import os.path 

19import posixpath 

20import re 

21import shutil 

22import urllib.parse 

23 

24__all__ = ("FileResourcePath",) 

25 

26from typing import IO, TYPE_CHECKING, Iterator, List, Optional, Tuple, Union 

27 

28from ._resourcePath import ResourcePath 

29from .utils import NoTransaction, os2posix, posix2os 

30 

31if TYPE_CHECKING: 31 ↛ 32line 31 didn't jump to line 32, because the condition on line 31 was never true

32 from .utils import TransactionProtocol 

33 

34 

35log = logging.getLogger(__name__) 

36 

37 

38class FileResourcePath(ResourcePath): 

39 """Path for explicit ``file`` URI scheme.""" 

40 

41 transferModes = ("copy", "link", "symlink", "hardlink", "relsymlink", "auto", "move") 

42 transferDefault: str = "link" 

43 

44 # By definition refers to a local file 

45 isLocal = True 

46 

47 @property 

48 def ospath(self) -> str: 

49 """Path component of the URI localized to current OS. 

50 

51 Will unquote URI path since a formal URI must include the quoting. 

52 """ 

53 return urllib.parse.unquote(posix2os(self._uri.path)) 

54 

55 def exists(self) -> bool: 

56 """Indicate that the file exists.""" 

57 # Uses os.path.exists so if there is a soft link that points 

58 # to a file that no longer exists this will return False 

59 return os.path.exists(self.ospath) 

60 

61 def size(self) -> int: 

62 """Return the size of the file in bytes.""" 

63 if not os.path.isdir(self.ospath): 63 ↛ 67line 63 didn't jump to line 67, because the condition on line 63 was never false

64 stat = os.stat(self.ospath) 

65 sz = stat.st_size 

66 else: 

67 sz = 0 

68 return sz 

69 

70 def remove(self) -> None: 

71 """Remove the resource.""" 

72 os.remove(self.ospath) 

73 

74 def _as_local(self) -> Tuple[str, bool]: 

75 """Return the local path of the file. 

76 

77 This is an internal helper for ``as_local()``. 

78 

79 Returns 

80 ------- 

81 path : `str` 

82 The local path to this file. 

83 temporary : `bool` 

84 Always returns `False` (this is not a temporary file). 

85 """ 

86 return self.ospath, False 

87 

88 def read(self, size: int = -1) -> bytes: 

89 """Return the entire content of the file as bytes.""" 

90 with open(self.ospath, "rb") as fh: 

91 return fh.read(size) 

92 

93 def write(self, data: bytes, overwrite: bool = True) -> None: 

94 """Write the supplied data to the file.""" 

95 dir = os.path.dirname(self.ospath) 

96 if not os.path.exists(dir): 

97 os.makedirs(dir, exist_ok=True) 

98 if overwrite: 98 ↛ 101line 98 didn't jump to line 101, because the condition on line 98 was never false

99 mode = "wb" 

100 else: 

101 mode = "xb" 

102 with open(self.ospath, mode) as f: 

103 f.write(data) 

104 

105 def mkdir(self) -> None: 

106 """Make the directory associated with this URI.""" 

107 if not os.path.exists(self.ospath): 107 ↛ 109line 107 didn't jump to line 109, because the condition on line 107 was never false

108 os.makedirs(self.ospath, exist_ok=True) 

109 elif not os.path.isdir(self.ospath): 

110 raise FileExistsError(f"URI {self} exists but is not a directory!") 

111 

112 def isdir(self) -> bool: 

113 """Return whether this URI is a directory. 

114 

115 Returns 

116 ------- 

117 isdir : `bool` 

118 `True` if this URI is a directory or looks like a directory, 

119 else `False`. 

120 """ 

121 return self.dirLike or os.path.isdir(self.ospath) 

122 

123 def transfer_from( 

124 self, 

125 src: ResourcePath, 

126 transfer: str, 

127 overwrite: bool = False, 

128 transaction: Optional[TransactionProtocol] = None, 

129 ) -> None: 

130 """Transfer the current resource to a local file. 

131 

132 Parameters 

133 ---------- 

134 src : `ResourcePath` 

135 Source URI. 

136 transfer : `str` 

137 Mode to use for transferring the resource. Supports the following 

138 options: copy, link, symlink, hardlink, relsymlink. 

139 overwrite : `bool`, optional 

140 Allow an existing file to be overwritten. Defaults to `False`. 

141 transaction : `~lsst.resources.utils.TransactionProtocol`, optional 

142 If a transaction is provided, undo actions will be registered. 

143 """ 

144 # Fail early to prevent delays if remote resources are requested 

145 if transfer not in self.transferModes: 

146 raise ValueError(f"Transfer mode '{transfer}' not supported by URI scheme {self.scheme}") 

147 

148 # Existence checks can take time so only try if the log message 

149 # will be issued. 

150 if log.isEnabledFor(logging.DEBUG): 150 ↛ 151line 150 didn't jump to line 151, because the condition on line 150 was never true

151 log.debug( 

152 "Transferring %s [exists: %s] -> %s [exists: %s] (transfer=%s)", 

153 src, 

154 src.exists(), 

155 self, 

156 self.exists(), 

157 transfer, 

158 ) 

159 

160 # We do not have to special case FileResourcePath here because 

161 # as_local handles that. 

162 with src.as_local() as local_uri: 

163 is_temporary = local_uri.isTemporary 

164 local_src = local_uri.ospath 

165 

166 # Short circuit if the URIs are identical immediately. 

167 if self == local_uri: 167 ↛ 168line 167 didn't jump to line 168, because the condition on line 167 was never true

168 log.debug( 

169 "Target and destination URIs are identical: %s, returning immediately." 

170 " No further action required.", 

171 self, 

172 ) 

173 return 

174 

175 # Default transfer mode depends on whether we have a temporary 

176 # file or not. 

177 if transfer == "auto": 177 ↛ 178line 177 didn't jump to line 178, because the condition on line 177 was never true

178 transfer = self.transferDefault if not is_temporary else "copy" 

179 

180 if not os.path.exists(local_src): 180 ↛ 181line 180 didn't jump to line 181, because the condition on line 180 was never true

181 if is_temporary: 

182 msg = f"Local file {local_uri} downloaded from {src} has gone missing" 

183 else: 

184 msg = f"Source URI {src} does not exist" 

185 raise FileNotFoundError(msg) 

186 

187 # Follow soft links 

188 local_src = os.path.realpath(os.path.normpath(local_src)) 

189 

190 # All the modes involving linking use "link" somewhere 

191 if "link" in transfer and is_temporary: 191 ↛ 192line 191 didn't jump to line 192, because the condition on line 191 was never true

192 raise RuntimeError( 

193 f"Can not use local file system transfer mode {transfer} for remote resource ({src})" 

194 ) 

195 

196 # For temporary files we can own them 

197 requested_transfer = transfer 

198 if is_temporary and transfer == "copy": 

199 transfer = "move" 

200 

201 # The output location should not exist unless overwrite=True. 

202 # Rather than use `exists()`, use os.stat since we might need 

203 # the full answer later. 

204 dest_stat: Optional[os.stat_result] 

205 try: 

206 # Do not read through links of the file itself. 

207 dest_stat = os.lstat(self.ospath) 

208 except FileNotFoundError: 

209 dest_stat = None 

210 

211 # It is possible that the source URI and target URI refer 

212 # to the same file. This can happen for a number of reasons 

213 # (such as soft links in the path, or they really are the same). 

214 # In that case log a message and return as if the transfer 

215 # completed (it technically did). A temporary file download 

216 # can't be the same so the test can be skipped. 

217 if dest_stat and not is_temporary: 

218 # Be consistent and use lstat here (even though realpath 

219 # has been called). It does not harm. 

220 local_src_stat = os.lstat(local_src) 

221 if dest_stat.st_ino == local_src_stat.st_ino and dest_stat.st_dev == local_src_stat.st_dev: 

222 log.debug( 

223 "Destination URI %s is the same file as source URI %s, returning immediately." 

224 " No further action required.", 

225 self, 

226 local_uri, 

227 ) 

228 return 

229 

230 if not overwrite and dest_stat: 

231 raise FileExistsError( 

232 f"Destination path '{self}' already exists. Transfer from {src} cannot be completed." 

233 ) 

234 

235 # Make the path absolute (but don't follow links since that 

236 # would possibly cause us to end up in the wrong place if the 

237 # file existed already as a soft link) 

238 newFullPath = os.path.abspath(self.ospath) 

239 outputDir = os.path.dirname(newFullPath) 

240 if not os.path.isdir(outputDir): 240 ↛ 244line 240 didn't jump to line 244, because the condition on line 240 was never true

241 # Must create the directory -- this can not be rolled back 

242 # since another transfer running concurrently may 

243 # be relying on this existing. 

244 os.makedirs(outputDir, exist_ok=True) 

245 

246 if transaction is None: 246 ↛ 253line 246 didn't jump to line 253, because the condition on line 246 was never false

247 # Use a no-op transaction to reduce code duplication 

248 transaction = NoTransaction() 

249 

250 # For links the OS doesn't let us overwrite so if something does 

251 # exist we have to remove it before we do the actual "transfer" 

252 # below 

253 if "link" in transfer and overwrite and dest_stat: 

254 try: 

255 self.remove() 

256 except Exception: 

257 # If this fails we ignore it since it's a problem 

258 # that will manifest immediately below with a more relevant 

259 # error message 

260 pass 

261 

262 if transfer == "move": 

263 with transaction.undoWith( 

264 f"move from {local_src}", shutil.move, newFullPath, local_src, copy_function=shutil.copy 

265 ): 

266 shutil.move(local_src, newFullPath, copy_function=shutil.copy) 

267 elif transfer == "copy": 

268 with transaction.undoWith(f"copy from {local_src}", os.remove, newFullPath): 

269 shutil.copy(local_src, newFullPath) 

270 elif transfer == "link": 

271 # Try hard link and if that fails use a symlink 

272 with transaction.undoWith(f"link to {local_src}", os.remove, newFullPath): 

273 try: 

274 os.link(local_src, newFullPath) 

275 except OSError: 

276 # Read through existing symlinks 

277 os.symlink(local_src, newFullPath) 

278 elif transfer == "hardlink": 

279 with transaction.undoWith(f"hardlink to {local_src}", os.remove, newFullPath): 

280 os.link(local_src, newFullPath) 

281 elif transfer == "symlink": 

282 # Read through existing symlinks 

283 with transaction.undoWith(f"symlink to {local_src}", os.remove, newFullPath): 

284 os.symlink(local_src, newFullPath) 

285 elif transfer == "relsymlink": 285 ↛ 294line 285 didn't jump to line 294, because the condition on line 285 was never false

286 # This is a standard symlink but using a relative path 

287 # Need the directory name to give to relative root 

288 # A full file path confuses it into an extra ../ 

289 newFullPathRoot = os.path.dirname(newFullPath) 

290 relPath = os.path.relpath(local_src, newFullPathRoot) 

291 with transaction.undoWith(f"relsymlink to {local_src}", os.remove, newFullPath): 

292 os.symlink(relPath, newFullPath) 

293 else: 

294 raise NotImplementedError("Transfer type '{}' not supported.".format(transfer)) 

295 

296 # This was an explicit move requested from a remote resource 

297 # try to remove that remote resource. We check is_temporary because 

298 # the local file would have been moved by shutil.move already. 

299 if requested_transfer == "move" and is_temporary: 299 ↛ 301line 299 didn't jump to line 301, because the condition on line 299 was never true

300 # Transactions do not work here 

301 src.remove() 

302 

303 def walk( 

304 self, file_filter: Optional[Union[str, re.Pattern]] = None 

305 ) -> Iterator[Union[List, Tuple[ResourcePath, List[str], List[str]]]]: 

306 """Walk the directory tree returning matching files and directories. 

307 

308 Parameters 

309 ---------- 

310 file_filter : `str` or `re.Pattern`, optional 

311 Regex to filter out files from the list before it is returned. 

312 

313 Yields 

314 ------ 

315 dirpath : `ResourcePath` 

316 Current directory being examined. 

317 dirnames : `list` of `str` 

318 Names of subdirectories within dirpath. 

319 filenames : `list` of `str` 

320 Names of all the files within dirpath. 

321 """ 

322 if not self.isdir(): 

323 raise ValueError("Can not walk a non-directory URI") 

324 

325 if isinstance(file_filter, str): 325 ↛ 326line 325 didn't jump to line 326, because the condition on line 325 was never true

326 file_filter = re.compile(file_filter) 

327 

328 for root, dirs, files in os.walk(self.ospath): 

329 # Filter by the regex 

330 if file_filter is not None: 

331 files = [f for f in files if file_filter.search(f)] 

332 yield type(self)(root, forceAbsolute=False, forceDirectory=True), dirs, files 

333 

334 @classmethod 

335 def _fixupPathUri( 

336 cls, 

337 parsed: urllib.parse.ParseResult, 

338 root: Optional[Union[str, ResourcePath]] = None, 

339 forceAbsolute: bool = False, 

340 forceDirectory: bool = False, 

341 ) -> Tuple[urllib.parse.ParseResult, bool]: 

342 """Fix up relative paths in URI instances. 

343 

344 Parameters 

345 ---------- 

346 parsed : `~urllib.parse.ParseResult` 

347 The result from parsing a URI using `urllib.parse`. 

348 root : `str` or `ResourcePath`, optional 

349 Path to use as root when converting relative to absolute. 

350 If `None`, it will be the current working directory. This 

351 is a local file system path, or a file URI. It is only used if 

352 a file-scheme is used incorrectly with a relative path. 

353 forceAbsolute : `bool`, ignored 

354 Has no effect for this subclass. ``file`` URIs are always 

355 absolute. 

356 forceDirectory : `bool`, optional 

357 If `True` forces the URI to end with a separator, otherwise given 

358 URI is interpreted as is. 

359 

360 Returns 

361 ------- 

362 modified : `~urllib.parse.ParseResult` 

363 Update result if a URI is being handled. 

364 dirLike : `bool` 

365 `True` if given parsed URI has a trailing separator or 

366 forceDirectory is True. Otherwise `False`. 

367 

368 Notes 

369 ----- 

370 Relative paths are explicitly not supported by RFC8089 but `urllib` 

371 does accept URIs of the form ``file:relative/path.ext``. They need 

372 to be turned into absolute paths before they can be used. This is 

373 always done regardless of the ``forceAbsolute`` parameter. 

374 """ 

375 # assume we are not dealing with a directory like URI 

376 dirLike = False 

377 

378 # file URI implies POSIX path separators so split as POSIX, 

379 # then join as os, and convert to abspath. Do not handle 

380 # home directories since "file" scheme is explicitly documented 

381 # to not do tilde expansion. 

382 sep = posixpath.sep 

383 

384 # For local file system we can explicitly check to see if this 

385 # really is a directory. The URI might point to a location that 

386 # does not exists yet but all that matters is if it is a directory 

387 # then we make sure use that fact. No need to do the check if 

388 # we are already being told. 

389 if not forceDirectory and posixpath.isdir(parsed.path): 389 ↛ 390line 389 didn't jump to line 390, because the condition on line 389 was never true

390 forceDirectory = True 

391 

392 # For an absolute path all we need to do is check if we need 

393 # to force the directory separator 

394 if posixpath.isabs(parsed.path): 

395 if forceDirectory: 

396 if not parsed.path.endswith(sep): 

397 parsed = parsed._replace(path=parsed.path + sep) 

398 dirLike = True 

399 return copy.copy(parsed), dirLike 

400 

401 # Relative path so must fix it to be compliant with the standard 

402 

403 # Replacement values for the URI 

404 replacements = {} 

405 

406 if root is None: 

407 root = os.path.abspath(os.path.curdir) 

408 elif isinstance(root, ResourcePath): 408 ↛ 409line 408 didn't jump to line 409, because the condition on line 408 was never true

409 if root.scheme and root.scheme != "file": 

410 raise RuntimeError(f"The override root must be a file URI not {root.scheme}") 

411 root = os.path.abspath(root.ospath) 

412 

413 replacements["path"] = posixpath.normpath(posixpath.join(os2posix(root), parsed.path)) 

414 

415 # normpath strips trailing "/" so put it back if necessary 

416 # Acknowledge that trailing separator exists. 

417 if forceDirectory or (parsed.path.endswith(sep) and not replacements["path"].endswith(sep)): 

418 replacements["path"] += sep 

419 dirLike = True 

420 

421 # ParseResult is a NamedTuple so _replace is standard API 

422 parsed = parsed._replace(**replacements) 

423 

424 if parsed.params or parsed.query: 424 ↛ 425line 424 didn't jump to line 425, because the condition on line 424 was never true

425 log.warning("Additional items unexpectedly encountered in file URI: %s", parsed.geturl()) 

426 

427 return parsed, dirLike 

428 

429 @contextlib.contextmanager 

430 def open( 

431 self, 

432 mode: str = "r", 

433 *, 

434 encoding: Optional[str] = None, 

435 prefer_file_temporary: bool = False, 

436 ) -> Iterator[IO]: 

437 # Docstring inherited. 

438 with open(self.ospath, mode=mode, encoding=encoding) as buffer: 

439 yield buffer