Coverage for python/lsst/resources/file.py: 89%

180 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2022-12-14 03:26 -0800

1# This file is part of lsst-resources. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# Use of this source code is governed by a 3-clause BSD-style 

10# license that can be found in the LICENSE file. 

11 

12from __future__ import annotations 

13 

14import contextlib 

15import copy 

16import logging 

17import os 

18import os.path 

19import posixpath 

20import re 

21import shutil 

22import urllib.parse 

23 

24__all__ = ("FileResourcePath",) 

25 

26from typing import IO, TYPE_CHECKING, Iterator, List, Optional, Tuple, Union 

27 

28from ._resourcePath import ResourcePath 

29from .utils import NoTransaction, os2posix, posix2os 

30 

31if TYPE_CHECKING: 31 ↛ 32line 31 didn't jump to line 32, because the condition on line 31 was never true

32 from .utils import TransactionProtocol 

33 

34 

35log = logging.getLogger(__name__) 

36 

37 

38class FileResourcePath(ResourcePath): 

39 """Path for explicit ``file`` URI scheme.""" 

40 

41 transferModes = ("copy", "link", "symlink", "hardlink", "relsymlink", "auto", "move") 

42 transferDefault: str = "link" 

43 

44 # By definition refers to a local file 

45 isLocal = True 

46 

47 @property 

48 def ospath(self) -> str: 

49 """Path component of the URI localized to current OS. 

50 

51 Will unquote URI path since a formal URI must include the quoting. 

52 """ 

53 return urllib.parse.unquote(posix2os(self._uri.path)) 

54 

55 def exists(self) -> bool: 

56 """Indicate that the file exists.""" 

57 # Uses os.path.exists so if there is a soft link that points 

58 # to a file that no longer exists this will return False 

59 return os.path.exists(self.ospath) 

60 

61 def size(self) -> int: 

62 """Return the size of the file in bytes.""" 

63 if not os.path.isdir(self.ospath): 

64 stat = os.stat(self.ospath) 

65 sz = stat.st_size 

66 else: 

67 sz = 0 

68 return sz 

69 

70 def remove(self) -> None: 

71 """Remove the resource.""" 

72 os.remove(self.ospath) 

73 

74 def _as_local(self) -> Tuple[str, bool]: 

75 """Return the local path of the file. 

76 

77 This is an internal helper for ``as_local()``. 

78 

79 Returns 

80 ------- 

81 path : `str` 

82 The local path to this file. 

83 temporary : `bool` 

84 Always returns the temporary nature of the input file resource. 

85 """ 

86 return self.ospath, self.isTemporary 

87 

88 def read(self, size: int = -1) -> bytes: 

89 """Return the entire content of the file as bytes.""" 

90 with open(self.ospath, "rb") as fh: 

91 return fh.read(size) 

92 

93 def write(self, data: bytes, overwrite: bool = True) -> None: 

94 """Write the supplied data to the file.""" 

95 dir = os.path.dirname(self.ospath) 

96 if not os.path.exists(dir): 

97 os.makedirs(dir, exist_ok=True) 

98 if overwrite: 

99 mode = "wb" 

100 else: 

101 mode = "xb" 

102 with open(self.ospath, mode) as f: 

103 f.write(data) 

104 

105 def mkdir(self) -> None: 

106 """Make the directory associated with this URI. 

107 

108 An attempt will be made to create the directory even if the URI 

109 looks like a file. 

110 

111 Raises 

112 ------ 

113 NotADirectoryError: 

114 Raised if a non-directory already exists. 

115 """ 

116 try: 

117 os.makedirs(self.ospath, exist_ok=True) 

118 except FileExistsError: 

119 raise NotADirectoryError(f"{self.ospath} exists but is not a directory.") from None 

120 

121 def isdir(self) -> bool: 

122 """Return whether this URI is a directory. 

123 

124 Returns 

125 ------- 

126 isdir : `bool` 

127 `True` if this URI is a directory or looks like a directory, 

128 else `False`. 

129 """ 

130 return self.dirLike or os.path.isdir(self.ospath) 

131 

132 def transfer_from( 

133 self, 

134 src: ResourcePath, 

135 transfer: str, 

136 overwrite: bool = False, 

137 transaction: Optional[TransactionProtocol] = None, 

138 ) -> None: 

139 """Transfer the current resource to a local file. 

140 

141 Parameters 

142 ---------- 

143 src : `ResourcePath` 

144 Source URI. 

145 transfer : `str` 

146 Mode to use for transferring the resource. Supports the following 

147 options: copy, link, symlink, hardlink, relsymlink. 

148 overwrite : `bool`, optional 

149 Allow an existing file to be overwritten. Defaults to `False`. 

150 transaction : `~lsst.resources.utils.TransactionProtocol`, optional 

151 If a transaction is provided, undo actions will be registered. 

152 """ 

153 # Fail early to prevent delays if remote resources are requested 

154 if transfer not in self.transferModes: 

155 raise ValueError(f"Transfer mode '{transfer}' not supported by URI scheme {self.scheme}") 

156 

157 # Existence checks can take time so only try if the log message 

158 # will be issued. 

159 if log.isEnabledFor(logging.DEBUG): 

160 log.debug( 

161 "Transferring %s [exists: %s] -> %s [exists: %s] (transfer=%s)", 

162 src, 

163 src.exists(), 

164 self, 

165 self.exists(), 

166 transfer, 

167 ) 

168 

169 # We do not have to special case FileResourcePath here because 

170 # as_local handles that. 

171 with src.as_local() as local_uri: 

172 is_temporary = local_uri.isTemporary 

173 local_src = local_uri.ospath 

174 

175 # Short circuit if the URIs are identical immediately. 

176 if self == local_uri: 

177 log.debug( 

178 "Target and destination URIs are identical: %s, returning immediately." 

179 " No further action required.", 

180 self, 

181 ) 

182 return 

183 

184 # Default transfer mode depends on whether we have a temporary 

185 # file or not. 

186 if transfer == "auto": 

187 transfer = self.transferDefault if not is_temporary else "copy" 

188 

189 if not os.path.exists(local_src): 

190 if is_temporary: 

191 if src == local_uri: 191 ↛ 195line 191 didn't jump to line 195, because the condition on line 191 was never false

192 msg = f"Local temporary file {src} has gone missing." 

193 else: 

194 # This will not happen in normal scenarios. 

195 msg = f"Local file {local_uri} downloaded from {src} has gone missing" 

196 else: 

197 msg = f"Source URI {src} does not exist" 

198 raise FileNotFoundError(msg) 

199 

200 # Follow soft links 

201 local_src = os.path.realpath(os.path.normpath(local_src)) 

202 

203 # Creating a symlink to a local copy of a remote resource 

204 # should never work. Creating a hardlink will work but should 

205 # not be allowed since it is highly unlikely that this is ever 

206 # an intended option and depends on the local target being 

207 # on the same file system as was used for the temporary file 

208 # download. 

209 # If a symlink is being requested for a local temporary file 

210 # that is likely undesirable but should not be refused. 

211 if is_temporary and src != local_uri and "link" in transfer: 

212 raise RuntimeError( 

213 f"Can not use local file system transfer mode {transfer} for remote resource ({src})" 

214 ) 

215 elif is_temporary and src == local_uri and "symlink" in transfer: 

216 log.debug( 

217 "Using a symlink for a temporary resource may lead to unexpected downstream failures." 

218 ) 

219 

220 # For temporary files we can own them if we created it. 

221 requested_transfer = transfer 

222 if src != local_uri and is_temporary and transfer == "copy": 

223 transfer = "move" 

224 

225 # The output location should not exist unless overwrite=True. 

226 # Rather than use `exists()`, use os.stat since we might need 

227 # the full answer later. 

228 dest_stat: Optional[os.stat_result] 

229 try: 

230 # Do not read through links of the file itself. 

231 dest_stat = os.lstat(self.ospath) 

232 except FileNotFoundError: 

233 dest_stat = None 

234 

235 # It is possible that the source URI and target URI refer 

236 # to the same file. This can happen for a number of reasons 

237 # (such as soft links in the path, or they really are the same). 

238 # In that case log a message and return as if the transfer 

239 # completed (it technically did). A temporary file download 

240 # can't be the same so the test can be skipped. 

241 if dest_stat and not is_temporary: 

242 # Be consistent and use lstat here (even though realpath 

243 # has been called). It does not harm. 

244 local_src_stat = os.lstat(local_src) 

245 if dest_stat.st_ino == local_src_stat.st_ino and dest_stat.st_dev == local_src_stat.st_dev: 

246 log.debug( 

247 "Destination URI %s is the same file as source URI %s, returning immediately." 

248 " No further action required.", 

249 self, 

250 local_uri, 

251 ) 

252 return 

253 

254 if not overwrite and dest_stat: 

255 raise FileExistsError( 

256 f"Destination path '{self}' already exists. Transfer from {src} cannot be completed." 

257 ) 

258 

259 # Make the path absolute (but don't follow links since that 

260 # would possibly cause us to end up in the wrong place if the 

261 # file existed already as a soft link) 

262 newFullPath = os.path.abspath(self.ospath) 

263 outputDir = os.path.dirname(newFullPath) 

264 if not os.path.isdir(outputDir): 

265 # Must create the directory -- this can not be rolled back 

266 # since another transfer running concurrently may 

267 # be relying on this existing. 

268 os.makedirs(outputDir, exist_ok=True) 

269 

270 if transaction is None: 270 ↛ 277line 270 didn't jump to line 277, because the condition on line 270 was never false

271 # Use a no-op transaction to reduce code duplication 

272 transaction = NoTransaction() 

273 

274 # For links the OS doesn't let us overwrite so if something does 

275 # exist we have to remove it before we do the actual "transfer" 

276 # below 

277 if "link" in transfer and overwrite and dest_stat: 

278 try: 

279 self.remove() 

280 except Exception: 

281 # If this fails we ignore it since it's a problem 

282 # that will manifest immediately below with a more relevant 

283 # error message 

284 pass 

285 

286 if transfer == "move": 

287 # If a rename works we try that since that is guaranteed to 

288 # be atomic. If that fails we copy and rename. We do this 

289 # in case other processes are trying to move to the same 

290 # file and we want the "winner" to not be corrupted. 

291 try: 

292 with transaction.undoWith(f"move from {local_src}", os.rename, newFullPath, local_src): 

293 os.rename(local_src, newFullPath) 

294 except OSError: 

295 with self.temporary_uri(prefix=self.parent(), suffix=self.getExtension()) as temp_copy: 

296 shutil.copy(local_src, temp_copy.ospath) 

297 with transaction.undoWith( 

298 f"move from {local_src}", 

299 shutil.move, 

300 newFullPath, 

301 local_src, 

302 copy_function=shutil.copy, 

303 ): 

304 os.rename(temp_copy.ospath, newFullPath) 

305 os.remove(local_src) 

306 elif transfer == "copy": 

307 # We want atomic copies so first copy to a temp location in 

308 # the same output directory. This at least guarantees that 

309 # if multiple processes are writing to the same file 

310 # simultaneously the file we end up with will not be corrupt. 

311 with self.temporary_uri(prefix=self.parent(), suffix=self.getExtension()) as temp_copy: 

312 shutil.copy(local_src, temp_copy.ospath) 

313 with transaction.undoWith(f"copy from {local_src}", os.remove, newFullPath): 

314 # os.rename works even if the file exists. 

315 # It's possible that another process has copied a file 

316 # in whilst this one was copying. If overwrite 

317 # protection is needed then another stat() call should 

318 # happen here. 

319 os.rename(temp_copy.ospath, newFullPath) 

320 elif transfer == "link": 

321 # Try hard link and if that fails use a symlink 

322 with transaction.undoWith(f"link to {local_src}", os.remove, newFullPath): 

323 try: 

324 os.link(local_src, newFullPath) 

325 except OSError: 

326 # Read through existing symlinks 

327 os.symlink(local_src, newFullPath) 

328 elif transfer == "hardlink": 

329 with transaction.undoWith(f"hardlink to {local_src}", os.remove, newFullPath): 

330 os.link(local_src, newFullPath) 

331 elif transfer == "symlink": 

332 # Read through existing symlinks 

333 with transaction.undoWith(f"symlink to {local_src}", os.remove, newFullPath): 

334 os.symlink(local_src, newFullPath) 

335 elif transfer == "relsymlink": 335 ↛ 344line 335 didn't jump to line 344, because the condition on line 335 was never false

336 # This is a standard symlink but using a relative path 

337 # Need the directory name to give to relative root 

338 # A full file path confuses it into an extra ../ 

339 newFullPathRoot = os.path.dirname(newFullPath) 

340 relPath = os.path.relpath(local_src, newFullPathRoot) 

341 with transaction.undoWith(f"relsymlink to {local_src}", os.remove, newFullPath): 

342 os.symlink(relPath, newFullPath) 

343 else: 

344 raise NotImplementedError("Transfer type '{}' not supported.".format(transfer)) 

345 

346 # This was an explicit move requested from a remote resource 

347 # try to remove that remote resource. We check is_temporary because 

348 # the local file would have been moved by shutil.move already. 

349 if requested_transfer == "move" and is_temporary and src != local_uri: 

350 # Transactions do not work here 

351 src.remove() 

352 

353 def walk( 

354 self, file_filter: Optional[Union[str, re.Pattern]] = None 

355 ) -> Iterator[Union[List, Tuple[ResourcePath, List[str], List[str]]]]: 

356 """Walk the directory tree returning matching files and directories. 

357 

358 Parameters 

359 ---------- 

360 file_filter : `str` or `re.Pattern`, optional 

361 Regex to filter out files from the list before it is returned. 

362 

363 Yields 

364 ------ 

365 dirpath : `ResourcePath` 

366 Current directory being examined. 

367 dirnames : `list` of `str` 

368 Names of subdirectories within dirpath. 

369 filenames : `list` of `str` 

370 Names of all the files within dirpath. 

371 """ 

372 if not self.isdir(): 

373 raise ValueError("Can not walk a non-directory URI") 

374 

375 if isinstance(file_filter, str): 375 ↛ 376line 375 didn't jump to line 376, because the condition on line 375 was never true

376 file_filter = re.compile(file_filter) 

377 

378 for root, dirs, files in os.walk(self.ospath, followlinks=True): 

379 # Filter by the regex 

380 if file_filter is not None: 

381 files = [f for f in files if file_filter.search(f)] 

382 yield type(self)(root, forceAbsolute=False, forceDirectory=True), dirs, files 

383 

384 @classmethod 

385 def _fixupPathUri( 

386 cls, 

387 parsed: urllib.parse.ParseResult, 

388 root: Optional[Union[str, ResourcePath]] = None, 

389 forceAbsolute: bool = False, 

390 forceDirectory: bool = False, 

391 ) -> Tuple[urllib.parse.ParseResult, bool]: 

392 """Fix up relative paths in URI instances. 

393 

394 Parameters 

395 ---------- 

396 parsed : `~urllib.parse.ParseResult` 

397 The result from parsing a URI using `urllib.parse`. 

398 root : `str` or `ResourcePath`, optional 

399 Path to use as root when converting relative to absolute. 

400 If `None`, it will be the current working directory. This 

401 is a local file system path, or a file URI. It is only used if 

402 a file-scheme is used incorrectly with a relative path. 

403 forceAbsolute : `bool`, ignored 

404 Has no effect for this subclass. ``file`` URIs are always 

405 absolute. 

406 forceDirectory : `bool`, optional 

407 If `True` forces the URI to end with a separator, otherwise given 

408 URI is interpreted as is. 

409 

410 Returns 

411 ------- 

412 modified : `~urllib.parse.ParseResult` 

413 Update result if a URI is being handled. 

414 dirLike : `bool` 

415 `True` if given parsed URI has a trailing separator or 

416 forceDirectory is True. Otherwise `False`. 

417 

418 Notes 

419 ----- 

420 Relative paths are explicitly not supported by RFC8089 but `urllib` 

421 does accept URIs of the form ``file:relative/path.ext``. They need 

422 to be turned into absolute paths before they can be used. This is 

423 always done regardless of the ``forceAbsolute`` parameter. 

424 """ 

425 # assume we are not dealing with a directory like URI 

426 dirLike = False 

427 

428 # file URI implies POSIX path separators so split as POSIX, 

429 # then join as os, and convert to abspath. Do not handle 

430 # home directories since "file" scheme is explicitly documented 

431 # to not do tilde expansion. 

432 sep = posixpath.sep 

433 

434 # For local file system we can explicitly check to see if this 

435 # really is a directory. The URI might point to a location that 

436 # does not exists yet but all that matters is if it is a directory 

437 # then we make sure use that fact. No need to do the check if 

438 # we are already being told. 

439 if not forceDirectory and posixpath.isdir(parsed.path): 439 ↛ 440line 439 didn't jump to line 440, because the condition on line 439 was never true

440 forceDirectory = True 

441 

442 # For an absolute path all we need to do is check if we need 

443 # to force the directory separator 

444 if posixpath.isabs(parsed.path): 

445 if forceDirectory: 

446 if not parsed.path.endswith(sep): 

447 parsed = parsed._replace(path=parsed.path + sep) 

448 dirLike = True 

449 return copy.copy(parsed), dirLike 

450 

451 # Relative path so must fix it to be compliant with the standard 

452 

453 # Replacement values for the URI 

454 replacements = {} 

455 

456 if root is None: 

457 root = os.path.abspath(os.path.curdir) 

458 elif isinstance(root, ResourcePath): 458 ↛ 459line 458 didn't jump to line 459, because the condition on line 458 was never true

459 if root.scheme and root.scheme != "file": 

460 raise RuntimeError(f"The override root must be a file URI not {root.scheme}") 

461 root = os.path.abspath(root.ospath) 

462 

463 replacements["path"] = posixpath.normpath(posixpath.join(os2posix(root), parsed.path)) 

464 

465 # normpath strips trailing "/" so put it back if necessary 

466 # Acknowledge that trailing separator exists. 

467 if forceDirectory or (parsed.path.endswith(sep) and not replacements["path"].endswith(sep)): 

468 replacements["path"] += sep 

469 dirLike = True 

470 

471 # ParseResult is a NamedTuple so _replace is standard API 

472 parsed = parsed._replace(**replacements) 

473 

474 if parsed.params or parsed.query: 474 ↛ 475line 474 didn't jump to line 475, because the condition on line 474 was never true

475 log.warning("Additional items unexpectedly encountered in file URI: %s", parsed.geturl()) 

476 

477 return parsed, dirLike 

478 

479 @contextlib.contextmanager 

480 def open( 

481 self, 

482 mode: str = "r", 

483 *, 

484 encoding: Optional[str] = None, 

485 prefer_file_temporary: bool = False, 

486 ) -> Iterator[IO]: 

487 # Docstring inherited. 

488 with open(self.ospath, mode=mode, encoding=encoding) as buffer: 

489 yield buffer