Coverage for python/lsst/daf/butler/core/_butlerUri/file.py: 14%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

162 statements  

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24import os 

25import os.path 

26import shutil 

27import urllib.parse 

28import posixpath 

29import copy 

30import logging 

31import re 

32 

33__all__ = ('ButlerFileURI',) 

34 

35from typing import ( 

36 TYPE_CHECKING, 

37 Iterator, 

38 List, 

39 Optional, 

40 Tuple, 

41 Union, 

42) 

43 

44from .utils import NoTransaction, os2posix, posix2os 

45from ._butlerUri import ButlerURI 

46 

47 

48if TYPE_CHECKING: 48 ↛ 49line 48 didn't jump to line 49, because the condition on line 48 was never true

49 from ..datastore import DatastoreTransaction 

50 

51 

52log = logging.getLogger(__name__) 

53 

54 

55class ButlerFileURI(ButlerURI): 

56 """URI for explicit ``file`` scheme.""" 

57 

58 transferModes = ("copy", "link", "symlink", "hardlink", "relsymlink", "auto", "move") 

59 transferDefault: str = "link" 

60 

61 # By definition refers to a local file 

62 isLocal = True 

63 

64 @property 

65 def ospath(self) -> str: 

66 """Path component of the URI localized to current OS. 

67 

68 Will unquote URI path since a formal URI must include the quoting. 

69 """ 

70 return urllib.parse.unquote(posix2os(self._uri.path)) 

71 

72 def exists(self) -> bool: 

73 """Indicate that the file exists.""" 

74 # Uses os.path.exists so if there is a soft link that points 

75 # to a file that no longer exists this will return False 

76 return os.path.exists(self.ospath) 

77 

78 def size(self) -> int: 

79 """Return the size of the file in bytes.""" 

80 if not os.path.isdir(self.ospath): 

81 stat = os.stat(self.ospath) 

82 sz = stat.st_size 

83 else: 

84 sz = 0 

85 return sz 

86 

87 def remove(self) -> None: 

88 """Remove the resource.""" 

89 os.remove(self.ospath) 

90 

91 def _as_local(self) -> Tuple[str, bool]: 

92 """Return the local path of the file. 

93 

94 This is an internal helper for ``as_local()``. 

95 

96 Returns 

97 ------- 

98 path : `str` 

99 The local path to this file. 

100 temporary : `bool` 

101 Always returns `False` (this is not a temporary file). 

102 """ 

103 return self.ospath, False 

104 

105 def read(self, size: int = -1) -> bytes: 

106 """Return the entire content of the file as bytes.""" 

107 with open(self.ospath, "rb") as fh: 

108 return fh.read(size) 

109 

110 def write(self, data: bytes, overwrite: bool = True) -> None: 

111 """Write the supplied data to the file.""" 

112 dir = os.path.dirname(self.ospath) 

113 if not os.path.exists(dir): 

114 os.makedirs(dir, exist_ok=True) 

115 if overwrite: 

116 mode = "wb" 

117 else: 

118 mode = "xb" 

119 with open(self.ospath, mode) as f: 

120 f.write(data) 

121 

122 def mkdir(self) -> None: 

123 """Make the directory associated with this URI.""" 

124 if not os.path.exists(self.ospath): 

125 os.makedirs(self.ospath, exist_ok=True) 

126 elif not os.path.isdir(self.ospath): 

127 raise FileExistsError(f"URI {self} exists but is not a directory!") 

128 

129 def isdir(self) -> bool: 

130 """Return whether this URI is a directory. 

131 

132 Returns 

133 ------- 

134 isdir : `bool` 

135 `True` if this URI is a directory or looks like a directory, 

136 else `False`. 

137 """ 

138 return self.dirLike or os.path.isdir(self.ospath) 

139 

140 def transfer_from(self, src: ButlerURI, transfer: str, 

141 overwrite: bool = False, 

142 transaction: Optional[Union[DatastoreTransaction, NoTransaction]] = None) -> None: 

143 """Transfer the current resource to a local file. 

144 

145 Parameters 

146 ---------- 

147 src : `ButlerURI` 

148 Source URI. 

149 transfer : `str` 

150 Mode to use for transferring the resource. Supports the following 

151 options: copy, link, symlink, hardlink, relsymlink. 

152 overwrite : `bool`, optional 

153 Allow an existing file to be overwritten. Defaults to `False`. 

154 transaction : `DatastoreTransaction`, optional 

155 If a transaction is provided, undo actions will be registered. 

156 """ 

157 # Fail early to prevent delays if remote resources are requested 

158 if transfer not in self.transferModes: 

159 raise ValueError(f"Transfer mode '{transfer}' not supported by URI scheme {self.scheme}") 

160 

161 # Existence checks can take time so only try if the log message 

162 # will be issued. 

163 if log.isEnabledFor(logging.DEBUG): 

164 log.debug("Transferring %s [exists: %s] -> %s [exists: %s] (transfer=%s)", 

165 src, src.exists(), self, self.exists(), transfer) 

166 

167 # We do not have to special case ButlerFileURI here because 

168 # as_local handles that. 

169 with src.as_local() as local_uri: 

170 is_temporary = local_uri.isTemporary 

171 local_src = local_uri.ospath 

172 

173 # Short circuit if the URIs are identical immediately. 

174 if self == local_uri: 

175 log.debug("Target and destination URIs are identical: %s, returning immediately." 

176 " No further action required.", self) 

177 return 

178 

179 # Default transfer mode depends on whether we have a temporary 

180 # file or not. 

181 if transfer == "auto": 

182 transfer = self.transferDefault if not is_temporary else "copy" 

183 

184 if not os.path.exists(local_src): 

185 if is_temporary: 

186 msg = f"Local file {local_uri} downloaded from {src} has gone missing" 

187 else: 

188 msg = f"Source URI {src} does not exist" 

189 raise FileNotFoundError(msg) 

190 

191 # Follow soft links 

192 local_src = os.path.realpath(os.path.normpath(local_src)) 

193 

194 # All the modes involving linking use "link" somewhere 

195 if "link" in transfer and is_temporary: 

196 raise RuntimeError("Can not use local file system transfer mode" 

197 f" {transfer} for remote resource ({src})") 

198 

199 # For temporary files we can own them 

200 requested_transfer = transfer 

201 if is_temporary and transfer == "copy": 

202 transfer = "move" 

203 

204 # The output location should not exist unless overwrite=True. 

205 # Rather than use `exists()`, use os.stat since we might need 

206 # the full answer later. 

207 dest_stat: Optional[os.stat_result] 

208 try: 

209 # Do not read through links of the file itself. 

210 dest_stat = os.lstat(self.ospath) 

211 except FileNotFoundError: 

212 dest_stat = None 

213 

214 # It is possible that the source URI and target URI refer 

215 # to the same file. This can happen for a number of reasons 

216 # (such as soft links in the path, or they really are the same). 

217 # In that case log a message and return as if the transfer 

218 # completed (it technically did). A temporary file download 

219 # can't be the same so the test can be skipped. 

220 if dest_stat and not is_temporary: 

221 # Be consistent and use lstat here (even though realpath 

222 # has been called). It does not harm. 

223 local_src_stat = os.lstat(local_src) 

224 if (dest_stat.st_ino == local_src_stat.st_ino 

225 and dest_stat.st_dev == local_src_stat.st_dev): 

226 log.debug("Destination URI %s is the same file as source URI %s, returning immediately." 

227 " No further action required.", self, local_uri) 

228 return 

229 

230 if not overwrite and dest_stat: 

231 raise FileExistsError(f"Destination path '{self}' already exists. Transfer " 

232 f"from {src} cannot be completed.") 

233 

234 # Make the path absolute (but don't follow links since that 

235 # would possibly cause us to end up in the wrong place if the 

236 # file existed already as a soft link) 

237 newFullPath = os.path.abspath(self.ospath) 

238 outputDir = os.path.dirname(newFullPath) 

239 if not os.path.isdir(outputDir): 

240 # Must create the directory -- this can not be rolled back 

241 # since another transfer running concurrently may 

242 # be relying on this existing. 

243 os.makedirs(outputDir, exist_ok=True) 

244 

245 if transaction is None: 

246 # Use a no-op transaction to reduce code duplication 

247 transaction = NoTransaction() 

248 

249 # For links the OS doesn't let us overwrite so if something does 

250 # exist we have to remove it before we do the actual "transfer" 

251 # below 

252 if "link" in transfer and overwrite and dest_stat: 

253 try: 

254 self.remove() 

255 except Exception: 

256 # If this fails we ignore it since it's a problem 

257 # that will manifest immediately below with a more relevant 

258 # error message 

259 pass 

260 

261 if transfer == "move": 

262 with transaction.undoWith(f"move from {local_src}", shutil.move, newFullPath, local_src): 

263 shutil.move(local_src, newFullPath) 

264 elif transfer == "copy": 

265 with transaction.undoWith(f"copy from {local_src}", os.remove, newFullPath): 

266 shutil.copy(local_src, newFullPath) 

267 elif transfer == "link": 

268 # Try hard link and if that fails use a symlink 

269 with transaction.undoWith(f"link to {local_src}", os.remove, newFullPath): 

270 try: 

271 os.link(local_src, newFullPath) 

272 except OSError: 

273 # Read through existing symlinks 

274 os.symlink(local_src, newFullPath) 

275 elif transfer == "hardlink": 

276 with transaction.undoWith(f"hardlink to {local_src}", os.remove, newFullPath): 

277 os.link(local_src, newFullPath) 

278 elif transfer == "symlink": 

279 # Read through existing symlinks 

280 with transaction.undoWith(f"symlink to {local_src}", os.remove, newFullPath): 

281 os.symlink(local_src, newFullPath) 

282 elif transfer == "relsymlink": 

283 # This is a standard symlink but using a relative path 

284 # Need the directory name to give to relative root 

285 # A full file path confuses it into an extra ../ 

286 newFullPathRoot = os.path.dirname(newFullPath) 

287 relPath = os.path.relpath(local_src, newFullPathRoot) 

288 with transaction.undoWith(f"relsymlink to {local_src}", os.remove, newFullPath): 

289 os.symlink(relPath, newFullPath) 

290 else: 

291 raise NotImplementedError("Transfer type '{}' not supported.".format(transfer)) 

292 

293 # This was an explicit move requested from a remote resource 

294 # try to remove that remote resource. We check is_temporary because 

295 # the local file would have been moved by shutil.move already. 

296 if requested_transfer == "move" and is_temporary: 

297 # Transactions do not work here 

298 src.remove() 

299 

300 def walk(self, file_filter: Optional[Union[str, re.Pattern]] = None) -> Iterator[Union[List, 

301 Tuple[ButlerURI, 

302 List[str], 

303 List[str]]]]: 

304 """Walk the directory tree returning matching files and directories. 

305 

306 Parameters 

307 ---------- 

308 file_filter : `str` or `re.Pattern`, optional 

309 Regex to filter out files from the list before it is returned. 

310 

311 Yields 

312 ------ 

313 dirpath : `ButlerURI` 

314 Current directory being examined. 

315 dirnames : `list` of `str` 

316 Names of subdirectories within dirpath. 

317 filenames : `list` of `str` 

318 Names of all the files within dirpath. 

319 """ 

320 if not self.isdir(): 

321 raise ValueError("Can not walk a non-directory URI") 

322 

323 if isinstance(file_filter, str): 

324 file_filter = re.compile(file_filter) 

325 

326 for root, dirs, files in os.walk(self.ospath): 

327 # Filter by the regex 

328 if file_filter is not None: 

329 files = [f for f in files if file_filter.search(f)] 

330 yield type(self)(root, forceAbsolute=False, forceDirectory=True), dirs, files 

331 

332 @classmethod 

333 def _fixupPathUri(cls, parsed: urllib.parse.ParseResult, root: Optional[Union[str, ButlerURI]] = None, 

334 forceAbsolute: bool = False, 

335 forceDirectory: bool = False) -> Tuple[urllib.parse.ParseResult, bool]: 

336 """Fix up relative paths in URI instances. 

337 

338 Parameters 

339 ---------- 

340 parsed : `~urllib.parse.ParseResult` 

341 The result from parsing a URI using `urllib.parse`. 

342 root : `str` or `ButlerURI`, optional 

343 Path to use as root when converting relative to absolute. 

344 If `None`, it will be the current working directory. This 

345 is a local file system path, or a file URI. It is only used if 

346 a file-scheme is used incorrectly with a relative path. 

347 forceAbsolute : `bool`, ignored 

348 Has no effect for this subclass. ``file`` URIs are always 

349 absolute. 

350 forceDirectory : `bool`, optional 

351 If `True` forces the URI to end with a separator, otherwise given 

352 URI is interpreted as is. 

353 

354 Returns 

355 ------- 

356 modified : `~urllib.parse.ParseResult` 

357 Update result if a URI is being handled. 

358 dirLike : `bool` 

359 `True` if given parsed URI has a trailing separator or 

360 forceDirectory is True. Otherwise `False`. 

361 

362 Notes 

363 ----- 

364 Relative paths are explicitly not supported by RFC8089 but `urllib` 

365 does accept URIs of the form ``file:relative/path.ext``. They need 

366 to be turned into absolute paths before they can be used. This is 

367 always done regardless of the ``forceAbsolute`` parameter. 

368 """ 

369 # assume we are not dealing with a directory like URI 

370 dirLike = False 

371 

372 # file URI implies POSIX path separators so split as POSIX, 

373 # then join as os, and convert to abspath. Do not handle 

374 # home directories since "file" scheme is explicitly documented 

375 # to not do tilde expansion. 

376 sep = posixpath.sep 

377 

378 # For local file system we can explicitly check to see if this 

379 # really is a directory. The URI might point to a location that 

380 # does not exists yet but all that matters is if it is a directory 

381 # then we make sure use that fact. No need to do the check if 

382 # we are already being told. 

383 if not forceDirectory and posixpath.isdir(parsed.path): 

384 forceDirectory = True 

385 

386 # For an absolute path all we need to do is check if we need 

387 # to force the directory separator 

388 if posixpath.isabs(parsed.path): 

389 if forceDirectory: 

390 if not parsed.path.endswith(sep): 

391 parsed = parsed._replace(path=parsed.path+sep) 

392 dirLike = True 

393 return copy.copy(parsed), dirLike 

394 

395 # Relative path so must fix it to be compliant with the standard 

396 

397 # Replacement values for the URI 

398 replacements = {} 

399 

400 if root is None: 

401 root = os.path.abspath(os.path.curdir) 

402 elif isinstance(root, ButlerURI): 

403 if root.scheme and root.scheme != "file": 

404 raise RuntimeError(f"The override root must be a file URI not {root.scheme}") 

405 root = os.path.abspath(root.ospath) 

406 

407 replacements["path"] = posixpath.normpath(posixpath.join(os2posix(root), parsed.path)) 

408 

409 # normpath strips trailing "/" so put it back if necessary 

410 # Acknowledge that trailing separator exists. 

411 if forceDirectory or (parsed.path.endswith(sep) and not replacements["path"].endswith(sep)): 

412 replacements["path"] += sep 

413 dirLike = True 

414 

415 # ParseResult is a NamedTuple so _replace is standard API 

416 parsed = parsed._replace(**replacements) 

417 

418 if parsed.params or parsed.query: 

419 log.warning("Additional items unexpectedly encountered in file URI: %s", parsed.geturl()) 

420 

421 return parsed, dirLike