Coverage for python/lsst/resources/file.py: 89%

181 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-02-02 06:15 -0800

1# This file is part of lsst-resources. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# Use of this source code is governed by a 3-clause BSD-style 

10# license that can be found in the LICENSE file. 

11 

12from __future__ import annotations 

13 

14import contextlib 

15import copy 

16import logging 

17import os 

18import os.path 

19import posixpath 

20import re 

21import shutil 

22import urllib.parse 

23 

24__all__ = ("FileResourcePath",) 

25 

26from typing import IO, TYPE_CHECKING, Iterator, List, Optional, Tuple, Union 

27 

28from ._resourceHandles._fileResourceHandle import FileResourceHandle 

29from ._resourcePath import ResourcePath 

30from .utils import NoTransaction, os2posix, posix2os 

31 

32if TYPE_CHECKING: 32 ↛ 33line 32 didn't jump to line 33, because the condition on line 32 was never true

33 from .utils import TransactionProtocol 

34 

35 

36log = logging.getLogger(__name__) 

37 

38 

39class FileResourcePath(ResourcePath): 

40 """Path for explicit ``file`` URI scheme.""" 

41 

42 transferModes = ("copy", "link", "symlink", "hardlink", "relsymlink", "auto", "move") 

43 transferDefault: str = "link" 

44 

45 # By definition refers to a local file 

46 isLocal = True 

47 

48 @property 

49 def ospath(self) -> str: 

50 """Path component of the URI localized to current OS. 

51 

52 Will unquote URI path since a formal URI must include the quoting. 

53 """ 

54 return urllib.parse.unquote(posix2os(self._uri.path)) 

55 

56 def exists(self) -> bool: 

57 """Indicate that the file exists.""" 

58 # Uses os.path.exists so if there is a soft link that points 

59 # to a file that no longer exists this will return False 

60 return os.path.exists(self.ospath) 

61 

62 def size(self) -> int: 

63 """Return the size of the file in bytes.""" 

64 if not os.path.isdir(self.ospath): 

65 stat = os.stat(self.ospath) 

66 sz = stat.st_size 

67 else: 

68 sz = 0 

69 return sz 

70 

71 def remove(self) -> None: 

72 """Remove the resource.""" 

73 os.remove(self.ospath) 

74 

75 def _as_local(self) -> Tuple[str, bool]: 

76 """Return the local path of the file. 

77 

78 This is an internal helper for ``as_local()``. 

79 

80 Returns 

81 ------- 

82 path : `str` 

83 The local path to this file. 

84 temporary : `bool` 

85 Always returns the temporary nature of the input file resource. 

86 """ 

87 return self.ospath, self.isTemporary 

88 

89 def read(self, size: int = -1) -> bytes: 

90 """Return the entire content of the file as bytes.""" 

91 with open(self.ospath, "rb") as fh: 

92 return fh.read(size) 

93 

94 def write(self, data: bytes, overwrite: bool = True) -> None: 

95 """Write the supplied data to the file.""" 

96 dir = os.path.dirname(self.ospath) 

97 if not os.path.exists(dir): 

98 os.makedirs(dir, exist_ok=True) 

99 if overwrite: 

100 mode = "wb" 

101 else: 

102 mode = "xb" 

103 with open(self.ospath, mode) as f: 

104 f.write(data) 

105 

106 def mkdir(self) -> None: 

107 """Make the directory associated with this URI. 

108 

109 An attempt will be made to create the directory even if the URI 

110 looks like a file. 

111 

112 Raises 

113 ------ 

114 NotADirectoryError: 

115 Raised if a non-directory already exists. 

116 """ 

117 try: 

118 os.makedirs(self.ospath, exist_ok=True) 

119 except FileExistsError: 

120 raise NotADirectoryError(f"{self.ospath} exists but is not a directory.") from None 

121 

122 def isdir(self) -> bool: 

123 """Return whether this URI is a directory. 

124 

125 Returns 

126 ------- 

127 isdir : `bool` 

128 `True` if this URI is a directory or looks like a directory, 

129 else `False`. 

130 """ 

131 return self.dirLike or os.path.isdir(self.ospath) 

132 

133 def transfer_from( 

134 self, 

135 src: ResourcePath, 

136 transfer: str, 

137 overwrite: bool = False, 

138 transaction: Optional[TransactionProtocol] = None, 

139 ) -> None: 

140 """Transfer the current resource to a local file. 

141 

142 Parameters 

143 ---------- 

144 src : `ResourcePath` 

145 Source URI. 

146 transfer : `str` 

147 Mode to use for transferring the resource. Supports the following 

148 options: copy, link, symlink, hardlink, relsymlink. 

149 overwrite : `bool`, optional 

150 Allow an existing file to be overwritten. Defaults to `False`. 

151 transaction : `~lsst.resources.utils.TransactionProtocol`, optional 

152 If a transaction is provided, undo actions will be registered. 

153 """ 

154 # Fail early to prevent delays if remote resources are requested 

155 if transfer not in self.transferModes: 

156 raise ValueError(f"Transfer mode '{transfer}' not supported by URI scheme {self.scheme}") 

157 

158 # Existence checks can take time so only try if the log message 

159 # will be issued. 

160 if log.isEnabledFor(logging.DEBUG): 

161 log.debug( 

162 "Transferring %s [exists: %s] -> %s [exists: %s] (transfer=%s)", 

163 src, 

164 src.exists(), 

165 self, 

166 self.exists(), 

167 transfer, 

168 ) 

169 

170 # We do not have to special case FileResourcePath here because 

171 # as_local handles that. 

172 with src.as_local() as local_uri: 

173 is_temporary = local_uri.isTemporary 

174 local_src = local_uri.ospath 

175 

176 # Short circuit if the URIs are identical immediately. 

177 if self == local_uri: 

178 log.debug( 

179 "Target and destination URIs are identical: %s, returning immediately." 

180 " No further action required.", 

181 self, 

182 ) 

183 return 

184 

185 # Default transfer mode depends on whether we have a temporary 

186 # file or not. 

187 if transfer == "auto": 

188 transfer = self.transferDefault if not is_temporary else "copy" 

189 

190 if not os.path.exists(local_src): 

191 if is_temporary: 

192 if src == local_uri: 192 ↛ 196line 192 didn't jump to line 196, because the condition on line 192 was never false

193 msg = f"Local temporary file {src} has gone missing." 

194 else: 

195 # This will not happen in normal scenarios. 

196 msg = f"Local file {local_uri} downloaded from {src} has gone missing" 

197 else: 

198 msg = f"Source URI {src} does not exist" 

199 raise FileNotFoundError(msg) 

200 

201 # Follow soft links 

202 local_src = os.path.realpath(os.path.normpath(local_src)) 

203 

204 # Creating a symlink to a local copy of a remote resource 

205 # should never work. Creating a hardlink will work but should 

206 # not be allowed since it is highly unlikely that this is ever 

207 # an intended option and depends on the local target being 

208 # on the same file system as was used for the temporary file 

209 # download. 

210 # If a symlink is being requested for a local temporary file 

211 # that is likely undesirable but should not be refused. 

212 if is_temporary and src != local_uri and "link" in transfer: 

213 raise RuntimeError( 

214 f"Can not use local file system transfer mode {transfer} for remote resource ({src})" 

215 ) 

216 elif is_temporary and src == local_uri and "symlink" in transfer: 

217 log.debug( 

218 "Using a symlink for a temporary resource may lead to unexpected downstream failures." 

219 ) 

220 

221 # For temporary files we can own them if we created it. 

222 requested_transfer = transfer 

223 if src != local_uri and is_temporary and transfer == "copy": 

224 transfer = "move" 

225 

226 # The output location should not exist unless overwrite=True. 

227 # Rather than use `exists()`, use os.stat since we might need 

228 # the full answer later. 

229 dest_stat: Optional[os.stat_result] 

230 try: 

231 # Do not read through links of the file itself. 

232 dest_stat = os.lstat(self.ospath) 

233 except FileNotFoundError: 

234 dest_stat = None 

235 

236 # It is possible that the source URI and target URI refer 

237 # to the same file. This can happen for a number of reasons 

238 # (such as soft links in the path, or they really are the same). 

239 # In that case log a message and return as if the transfer 

240 # completed (it technically did). A temporary file download 

241 # can't be the same so the test can be skipped. 

242 if dest_stat and not is_temporary: 

243 # Be consistent and use lstat here (even though realpath 

244 # has been called). It does not harm. 

245 local_src_stat = os.lstat(local_src) 

246 if dest_stat.st_ino == local_src_stat.st_ino and dest_stat.st_dev == local_src_stat.st_dev: 

247 log.debug( 

248 "Destination URI %s is the same file as source URI %s, returning immediately." 

249 " No further action required.", 

250 self, 

251 local_uri, 

252 ) 

253 return 

254 

255 if not overwrite and dest_stat: 

256 raise FileExistsError( 

257 f"Destination path '{self}' already exists. Transfer from {src} cannot be completed." 

258 ) 

259 

260 # Make the path absolute (but don't follow links since that 

261 # would possibly cause us to end up in the wrong place if the 

262 # file existed already as a soft link) 

263 newFullPath = os.path.abspath(self.ospath) 

264 outputDir = os.path.dirname(newFullPath) 

265 if not os.path.isdir(outputDir): 

266 # Must create the directory -- this can not be rolled back 

267 # since another transfer running concurrently may 

268 # be relying on this existing. 

269 os.makedirs(outputDir, exist_ok=True) 

270 

271 if transaction is None: 271 ↛ 278line 271 didn't jump to line 278, because the condition on line 271 was never false

272 # Use a no-op transaction to reduce code duplication 

273 transaction = NoTransaction() 

274 

275 # For links the OS doesn't let us overwrite so if something does 

276 # exist we have to remove it before we do the actual "transfer" 

277 # below 

278 if "link" in transfer and overwrite and dest_stat: 

279 try: 

280 self.remove() 

281 except Exception: 

282 # If this fails we ignore it since it's a problem 

283 # that will manifest immediately below with a more relevant 

284 # error message 

285 pass 

286 

287 if transfer == "move": 

288 # If a rename works we try that since that is guaranteed to 

289 # be atomic. If that fails we copy and rename. We do this 

290 # in case other processes are trying to move to the same 

291 # file and we want the "winner" to not be corrupted. 

292 try: 

293 with transaction.undoWith(f"move from {local_src}", os.rename, newFullPath, local_src): 

294 os.rename(local_src, newFullPath) 

295 except OSError: 

296 with self.temporary_uri(prefix=self.parent(), suffix=self.getExtension()) as temp_copy: 

297 shutil.copy(local_src, temp_copy.ospath) 

298 with transaction.undoWith( 

299 f"move from {local_src}", 

300 shutil.move, 

301 newFullPath, 

302 local_src, 

303 copy_function=shutil.copy, 

304 ): 

305 os.rename(temp_copy.ospath, newFullPath) 

306 os.remove(local_src) 

307 elif transfer == "copy": 

308 # We want atomic copies so first copy to a temp location in 

309 # the same output directory. This at least guarantees that 

310 # if multiple processes are writing to the same file 

311 # simultaneously the file we end up with will not be corrupt. 

312 with self.temporary_uri(prefix=self.parent(), suffix=self.getExtension()) as temp_copy: 

313 shutil.copy(local_src, temp_copy.ospath) 

314 with transaction.undoWith(f"copy from {local_src}", os.remove, newFullPath): 

315 # os.rename works even if the file exists. 

316 # It's possible that another process has copied a file 

317 # in whilst this one was copying. If overwrite 

318 # protection is needed then another stat() call should 

319 # happen here. 

320 os.rename(temp_copy.ospath, newFullPath) 

321 elif transfer == "link": 

322 # Try hard link and if that fails use a symlink 

323 with transaction.undoWith(f"link to {local_src}", os.remove, newFullPath): 

324 try: 

325 os.link(local_src, newFullPath) 

326 except OSError: 

327 # Read through existing symlinks 

328 os.symlink(local_src, newFullPath) 

329 elif transfer == "hardlink": 

330 with transaction.undoWith(f"hardlink to {local_src}", os.remove, newFullPath): 

331 os.link(local_src, newFullPath) 

332 elif transfer == "symlink": 

333 # Read through existing symlinks 

334 with transaction.undoWith(f"symlink to {local_src}", os.remove, newFullPath): 

335 os.symlink(local_src, newFullPath) 

336 elif transfer == "relsymlink": 336 ↛ 345line 336 didn't jump to line 345, because the condition on line 336 was never false

337 # This is a standard symlink but using a relative path 

338 # Need the directory name to give to relative root 

339 # A full file path confuses it into an extra ../ 

340 newFullPathRoot = os.path.dirname(newFullPath) 

341 relPath = os.path.relpath(local_src, newFullPathRoot) 

342 with transaction.undoWith(f"relsymlink to {local_src}", os.remove, newFullPath): 

343 os.symlink(relPath, newFullPath) 

344 else: 

345 raise NotImplementedError("Transfer type '{}' not supported.".format(transfer)) 

346 

347 # This was an explicit move requested from a remote resource 

348 # try to remove that remote resource. We check is_temporary because 

349 # the local file would have been moved by shutil.move already. 

350 if requested_transfer == "move" and is_temporary and src != local_uri: 

351 # Transactions do not work here 

352 src.remove() 

353 

354 def walk( 

355 self, file_filter: Optional[Union[str, re.Pattern]] = None 

356 ) -> Iterator[Union[List, Tuple[ResourcePath, List[str], List[str]]]]: 

357 """Walk the directory tree returning matching files and directories. 

358 

359 Parameters 

360 ---------- 

361 file_filter : `str` or `re.Pattern`, optional 

362 Regex to filter out files from the list before it is returned. 

363 

364 Yields 

365 ------ 

366 dirpath : `ResourcePath` 

367 Current directory being examined. 

368 dirnames : `list` of `str` 

369 Names of subdirectories within dirpath. 

370 filenames : `list` of `str` 

371 Names of all the files within dirpath. 

372 """ 

373 if not self.isdir(): 

374 raise ValueError("Can not walk a non-directory URI") 

375 

376 if isinstance(file_filter, str): 376 ↛ 377line 376 didn't jump to line 377, because the condition on line 376 was never true

377 file_filter = re.compile(file_filter) 

378 

379 for root, dirs, files in os.walk(self.ospath, followlinks=True): 

380 # Filter by the regex 

381 if file_filter is not None: 

382 files = [f for f in files if file_filter.search(f)] 

383 yield type(self)(root, forceAbsolute=False, forceDirectory=True), dirs, files 

384 

385 @classmethod 

386 def _fixupPathUri( 

387 cls, 

388 parsed: urllib.parse.ParseResult, 

389 root: Optional[Union[str, ResourcePath]] = None, 

390 forceAbsolute: bool = False, 

391 forceDirectory: bool = False, 

392 ) -> Tuple[urllib.parse.ParseResult, bool]: 

393 """Fix up relative paths in URI instances. 

394 

395 Parameters 

396 ---------- 

397 parsed : `~urllib.parse.ParseResult` 

398 The result from parsing a URI using `urllib.parse`. 

399 root : `str` or `ResourcePath`, optional 

400 Path to use as root when converting relative to absolute. 

401 If `None`, it will be the current working directory. This 

402 is a local file system path, or a file URI. It is only used if 

403 a file-scheme is used incorrectly with a relative path. 

404 forceAbsolute : `bool`, ignored 

405 Has no effect for this subclass. ``file`` URIs are always 

406 absolute. 

407 forceDirectory : `bool`, optional 

408 If `True` forces the URI to end with a separator, otherwise given 

409 URI is interpreted as is. 

410 

411 Returns 

412 ------- 

413 modified : `~urllib.parse.ParseResult` 

414 Update result if a URI is being handled. 

415 dirLike : `bool` 

416 `True` if given parsed URI has a trailing separator or 

417 forceDirectory is True. Otherwise `False`. 

418 

419 Notes 

420 ----- 

421 Relative paths are explicitly not supported by RFC8089 but `urllib` 

422 does accept URIs of the form ``file:relative/path.ext``. They need 

423 to be turned into absolute paths before they can be used. This is 

424 always done regardless of the ``forceAbsolute`` parameter. 

425 """ 

426 # assume we are not dealing with a directory like URI 

427 dirLike = False 

428 

429 # file URI implies POSIX path separators so split as POSIX, 

430 # then join as os, and convert to abspath. Do not handle 

431 # home directories since "file" scheme is explicitly documented 

432 # to not do tilde expansion. 

433 sep = posixpath.sep 

434 

435 # For local file system we can explicitly check to see if this 

436 # really is a directory. The URI might point to a location that 

437 # does not exists yet but all that matters is if it is a directory 

438 # then we make sure use that fact. No need to do the check if 

439 # we are already being told. 

440 if not forceDirectory and posixpath.isdir(parsed.path): 440 ↛ 441line 440 didn't jump to line 441, because the condition on line 440 was never true

441 forceDirectory = True 

442 

443 # For an absolute path all we need to do is check if we need 

444 # to force the directory separator 

445 if posixpath.isabs(parsed.path): 

446 if forceDirectory: 

447 if not parsed.path.endswith(sep): 

448 parsed = parsed._replace(path=parsed.path + sep) 

449 dirLike = True 

450 return copy.copy(parsed), dirLike 

451 

452 # Relative path so must fix it to be compliant with the standard 

453 

454 # Replacement values for the URI 

455 replacements = {} 

456 

457 if root is None: 

458 root = os.path.abspath(os.path.curdir) 

459 elif isinstance(root, ResourcePath): 459 ↛ 460line 459 didn't jump to line 460, because the condition on line 459 was never true

460 if root.scheme and root.scheme != "file": 

461 raise RuntimeError(f"The override root must be a file URI not {root.scheme}") 

462 root = os.path.abspath(root.ospath) 

463 

464 replacements["path"] = posixpath.normpath(posixpath.join(os2posix(root), parsed.path)) 

465 

466 # normpath strips trailing "/" so put it back if necessary 

467 # Acknowledge that trailing separator exists. 

468 if forceDirectory or (parsed.path.endswith(sep) and not replacements["path"].endswith(sep)): 

469 replacements["path"] += sep 

470 dirLike = True 

471 

472 # ParseResult is a NamedTuple so _replace is standard API 

473 parsed = parsed._replace(**replacements) 

474 

475 if parsed.params or parsed.query: 475 ↛ 476line 475 didn't jump to line 476, because the condition on line 475 was never true

476 log.warning("Additional items unexpectedly encountered in file URI: %s", parsed.geturl()) 

477 

478 return parsed, dirLike 

479 

480 @contextlib.contextmanager 

481 def _openImpl( 

482 self, 

483 mode: str = "r", 

484 *, 

485 encoding: Optional[str] = None, 

486 ) -> Iterator[IO]: 

487 with FileResourceHandle(mode=mode, log=log, filename=self.ospath, encoding=encoding) as buffer: 

488 yield buffer # type: ignore