Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24import os 

25import os.path 

26import shutil 

27import urllib.parse 

28import posixpath 

29import copy 

30import logging 

31import re 

32 

33__all__ = ('ButlerFileURI',) 

34 

35from typing import ( 

36 TYPE_CHECKING, 

37 Iterator, 

38 List, 

39 Optional, 

40 Tuple, 

41 Union, 

42) 

43 

44from ..utils import safeMakeDir 

45from .utils import NoTransaction, os2posix, posix2os 

46from ._butlerUri import ButlerURI 

47 

48 

49if TYPE_CHECKING: 49 ↛ 50line 49 didn't jump to line 50, because the condition on line 49 was never true

50 from ..datastore import DatastoreTransaction 

51 

52 

53log = logging.getLogger(__name__) 

54 

55 

56class ButlerFileURI(ButlerURI): 

57 """URI for explicit ``file`` scheme.""" 

58 

59 transferModes = ("copy", "link", "symlink", "hardlink", "relsymlink", "auto", "move") 

60 transferDefault: str = "link" 

61 

62 # By definition refers to a local file 

63 isLocal = True 

64 

65 @property 

66 def ospath(self) -> str: 

67 """Path component of the URI localized to current OS. 

68 

69 Will unquote URI path since a formal URI must include the quoting. 

70 """ 

71 return urllib.parse.unquote(posix2os(self._uri.path)) 

72 

73 def exists(self) -> bool: 

74 """Indicate that the file exists.""" 

75 # Uses os.path.exists so if there is a soft link that points 

76 # to a file that no longer exists this will return False 

77 return os.path.exists(self.ospath) 

78 

79 def size(self) -> int: 

80 """Return the size of the file in bytes.""" 

81 if not os.path.isdir(self.ospath): 

82 stat = os.stat(self.ospath) 

83 sz = stat.st_size 

84 else: 

85 sz = 0 

86 return sz 

87 

88 def remove(self) -> None: 

89 """Remove the resource.""" 

90 os.remove(self.ospath) 

91 

92 def _as_local(self) -> Tuple[str, bool]: 

93 """Return the local path of the file. 

94 

95 This is an internal helper for ``as_local()``. 

96 

97 Returns 

98 ------- 

99 path : `str` 

100 The local path to this file. 

101 temporary : `bool` 

102 Always returns `False` (this is not a temporary file). 

103 """ 

104 return self.ospath, False 

105 

106 def read(self, size: int = -1) -> bytes: 

107 """Return the entire content of the file as bytes.""" 

108 with open(self.ospath, "rb") as fh: 

109 return fh.read(size) 

110 

111 def write(self, data: bytes, overwrite: bool = True) -> None: 

112 """Write the supplied data to the file.""" 

113 dir = os.path.dirname(self.ospath) 

114 if not os.path.exists(dir): 

115 safeMakeDir(dir) 

116 if overwrite: 

117 mode = "wb" 

118 else: 

119 mode = "xb" 

120 with open(self.ospath, mode) as f: 

121 f.write(data) 

122 

123 def mkdir(self) -> None: 

124 """Make the directory associated with this URI.""" 

125 if not os.path.exists(self.ospath): 

126 safeMakeDir(self.ospath) 

127 elif not os.path.isdir(self.ospath): 

128 raise FileExistsError(f"URI {self} exists but is not a directory!") 

129 

130 def isdir(self) -> bool: 

131 """Return whether this URI is a directory. 

132 

133 Returns 

134 ------- 

135 isdir : `bool` 

136 `True` if this URI is a directory or looks like a directory, 

137 else `False`. 

138 """ 

139 return self.dirLike or os.path.isdir(self.ospath) 

140 

141 def transfer_from(self, src: ButlerURI, transfer: str, 

142 overwrite: bool = False, 

143 transaction: Optional[Union[DatastoreTransaction, NoTransaction]] = None) -> None: 

144 """Transfer the current resource to a local file. 

145 

146 Parameters 

147 ---------- 

148 src : `ButlerURI` 

149 Source URI. 

150 transfer : `str` 

151 Mode to use for transferring the resource. Supports the following 

152 options: copy, link, symlink, hardlink, relsymlink. 

153 overwrite : `bool`, optional 

154 Allow an existing file to be overwritten. Defaults to `False`. 

155 transaction : `DatastoreTransaction`, optional 

156 If a transaction is provided, undo actions will be registered. 

157 """ 

158 # Fail early to prevent delays if remote resources are requested 

159 if transfer not in self.transferModes: 

160 raise ValueError(f"Transfer mode '{transfer}' not supported by URI scheme {self.scheme}") 

161 

162 # Existence checks can take time so only try if the log message 

163 # will be issued. 

164 if log.isEnabledFor(logging.DEBUG): 

165 log.debug("Transferring %s [exists: %s] -> %s [exists: %s] (transfer=%s)", 

166 src, src.exists(), self, self.exists(), transfer) 

167 

168 # We do not have to special case ButlerFileURI here because 

169 # as_local handles that. 

170 with src.as_local() as local_uri: 

171 is_temporary = local_uri.isTemporary 

172 local_src = local_uri.ospath 

173 

174 # Short circuit if the URIs are identical immediately. 

175 if self == local_uri: 

176 log.debug("Target and destination URIs are identical: %s, returning immediately." 

177 " No further action required.", self) 

178 return 

179 

180 # Default transfer mode depends on whether we have a temporary 

181 # file or not. 

182 if transfer == "auto": 

183 transfer = self.transferDefault if not is_temporary else "copy" 

184 

185 if not os.path.exists(local_src): 

186 if is_temporary: 

187 msg = f"Local file {local_uri} downloaded from {src} has gone missing" 

188 else: 

189 msg = f"Source URI {src} does not exist" 

190 raise FileNotFoundError(msg) 

191 

192 # Follow soft links 

193 local_src = os.path.realpath(os.path.normpath(local_src)) 

194 

195 # All the modes involving linking use "link" somewhere 

196 if "link" in transfer and is_temporary: 

197 raise RuntimeError("Can not use local file system transfer mode" 

198 f" {transfer} for remote resource ({src})") 

199 

200 # For temporary files we can own them 

201 requested_transfer = transfer 

202 if is_temporary and transfer == "copy": 

203 transfer = "move" 

204 

205 # The output location should not exist unless overwrite=True. 

206 # Rather than use `exists()`, use os.stat since we might need 

207 # the full answer later. 

208 dest_stat: Optional[os.stat_result] 

209 try: 

210 # Do not read through links of the file itself. 

211 dest_stat = os.lstat(self.ospath) 

212 except FileNotFoundError: 

213 dest_stat = None 

214 

215 # It is possible that the source URI and target URI refer 

216 # to the same file. This can happen for a number of reasons 

217 # (such as soft links in the path, or they really are the same). 

218 # In that case log a message and return as if the transfer 

219 # completed (it technically did). A temporary file download 

220 # can't be the same so the test can be skipped. 

221 if dest_stat and not is_temporary: 

222 # Be consistent and use lstat here (even though realpath 

223 # has been called). It does not harm. 

224 local_src_stat = os.lstat(local_src) 

225 if (dest_stat.st_ino == local_src_stat.st_ino 

226 and dest_stat.st_dev == local_src_stat.st_dev): 

227 log.debug("Destination URI %s is the same file as source URI %s, returning immediately." 

228 " No further action required.", self, local_uri) 

229 return 

230 

231 if not overwrite and dest_stat: 

232 raise FileExistsError(f"Destination path '{self}' already exists. Transfer " 

233 f"from {src} cannot be completed.") 

234 

235 # Make the path absolute (but don't follow links since that 

236 # would possibly cause us to end up in the wrong place if the 

237 # file existed already as a soft link) 

238 newFullPath = os.path.abspath(self.ospath) 

239 outputDir = os.path.dirname(newFullPath) 

240 if not os.path.isdir(outputDir): 

241 # Must create the directory -- this can not be rolled back 

242 # since another transfer running concurrently may 

243 # be relying on this existing. 

244 safeMakeDir(outputDir) 

245 

246 if transaction is None: 

247 # Use a no-op transaction to reduce code duplication 

248 transaction = NoTransaction() 

249 

250 # For links the OS doesn't let us overwrite so if something does 

251 # exist we have to remove it before we do the actual "transfer" 

252 # below 

253 if "link" in transfer and overwrite and dest_stat: 

254 try: 

255 self.remove() 

256 except Exception: 

257 # If this fails we ignore it since it's a problem 

258 # that will manifest immediately below with a more relevant 

259 # error message 

260 pass 

261 

262 if transfer == "move": 

263 with transaction.undoWith(f"move from {local_src}", shutil.move, newFullPath, local_src): 

264 shutil.move(local_src, newFullPath) 

265 elif transfer == "copy": 

266 with transaction.undoWith(f"copy from {local_src}", os.remove, newFullPath): 

267 shutil.copy(local_src, newFullPath) 

268 elif transfer == "link": 

269 # Try hard link and if that fails use a symlink 

270 with transaction.undoWith(f"link to {local_src}", os.remove, newFullPath): 

271 try: 

272 os.link(local_src, newFullPath) 

273 except OSError: 

274 # Read through existing symlinks 

275 os.symlink(local_src, newFullPath) 

276 elif transfer == "hardlink": 

277 with transaction.undoWith(f"hardlink to {local_src}", os.remove, newFullPath): 

278 os.link(local_src, newFullPath) 

279 elif transfer == "symlink": 

280 # Read through existing symlinks 

281 with transaction.undoWith(f"symlink to {local_src}", os.remove, newFullPath): 

282 os.symlink(local_src, newFullPath) 

283 elif transfer == "relsymlink": 

284 # This is a standard symlink but using a relative path 

285 # Need the directory name to give to relative root 

286 # A full file path confuses it into an extra ../ 

287 newFullPathRoot = os.path.dirname(newFullPath) 

288 relPath = os.path.relpath(local_src, newFullPathRoot) 

289 with transaction.undoWith(f"relsymlink to {local_src}", os.remove, newFullPath): 

290 os.symlink(relPath, newFullPath) 

291 else: 

292 raise NotImplementedError("Transfer type '{}' not supported.".format(transfer)) 

293 

294 # This was an explicit move requested from a remote resource 

295 # try to remove that remote resource. We check is_temporary because 

296 # the local file would have been moved by shutil.move already. 

297 if requested_transfer == "move" and is_temporary: 

298 # Transactions do not work here 

299 src.remove() 

300 

301 def walk(self, file_filter: Optional[Union[str, re.Pattern]] = None) -> Iterator[Union[List, 

302 Tuple[ButlerURI, 

303 List[str], 

304 List[str]]]]: 

305 """Walk the directory tree returning matching files and directories. 

306 

307 Parameters 

308 ---------- 

309 file_filter : `str` or `re.Pattern`, optional 

310 Regex to filter out files from the list before it is returned. 

311 

312 Yields 

313 ------ 

314 dirpath : `ButlerURI` 

315 Current directory being examined. 

316 dirnames : `list` of `str` 

317 Names of subdirectories within dirpath. 

318 filenames : `list` of `str` 

319 Names of all the files within dirpath. 

320 """ 

321 if not self.isdir(): 

322 raise ValueError("Can not walk a non-directory URI") 

323 

324 if isinstance(file_filter, str): 

325 file_filter = re.compile(file_filter) 

326 

327 for root, dirs, files in os.walk(self.ospath): 

328 # Filter by the regex 

329 if file_filter is not None: 

330 files = [f for f in files if file_filter.search(f)] 

331 yield type(self)(root, forceAbsolute=False, forceDirectory=True), dirs, files 

332 

333 @classmethod 

334 def _fixupPathUri(cls, parsed: urllib.parse.ParseResult, root: Optional[Union[str, ButlerURI]] = None, 

335 forceAbsolute: bool = False, 

336 forceDirectory: bool = False) -> Tuple[urllib.parse.ParseResult, bool]: 

337 """Fix up relative paths in URI instances. 

338 

339 Parameters 

340 ---------- 

341 parsed : `~urllib.parse.ParseResult` 

342 The result from parsing a URI using `urllib.parse`. 

343 root : `str` or `ButlerURI`, optional 

344 Path to use as root when converting relative to absolute. 

345 If `None`, it will be the current working directory. This 

346 is a local file system path, or a file URI. It is only used if 

347 a file-scheme is used incorrectly with a relative path. 

348 forceAbsolute : `bool`, ignored 

349 Has no effect for this subclass. ``file`` URIs are always 

350 absolute. 

351 forceDirectory : `bool`, optional 

352 If `True` forces the URI to end with a separator, otherwise given 

353 URI is interpreted as is. 

354 

355 Returns 

356 ------- 

357 modified : `~urllib.parse.ParseResult` 

358 Update result if a URI is being handled. 

359 dirLike : `bool` 

360 `True` if given parsed URI has a trailing separator or 

361 forceDirectory is True. Otherwise `False`. 

362 

363 Notes 

364 ----- 

365 Relative paths are explicitly not supported by RFC8089 but `urllib` 

366 does accept URIs of the form ``file:relative/path.ext``. They need 

367 to be turned into absolute paths before they can be used. This is 

368 always done regardless of the ``forceAbsolute`` parameter. 

369 """ 

370 # assume we are not dealing with a directory like URI 

371 dirLike = False 

372 

373 # file URI implies POSIX path separators so split as POSIX, 

374 # then join as os, and convert to abspath. Do not handle 

375 # home directories since "file" scheme is explicitly documented 

376 # to not do tilde expansion. 

377 sep = posixpath.sep 

378 

379 # For local file system we can explicitly check to see if this 

380 # really is a directory. The URI might point to a location that 

381 # does not exists yet but all that matters is if it is a directory 

382 # then we make sure use that fact. No need to do the check if 

383 # we are already being told. 

384 if not forceDirectory and posixpath.isdir(parsed.path): 

385 forceDirectory = True 

386 

387 # For an absolute path all we need to do is check if we need 

388 # to force the directory separator 

389 if posixpath.isabs(parsed.path): 

390 if forceDirectory: 

391 if not parsed.path.endswith(sep): 

392 parsed = parsed._replace(path=parsed.path+sep) 

393 dirLike = True 

394 return copy.copy(parsed), dirLike 

395 

396 # Relative path so must fix it to be compliant with the standard 

397 

398 # Replacement values for the URI 

399 replacements = {} 

400 

401 if root is None: 

402 root = os.path.abspath(os.path.curdir) 

403 elif isinstance(root, ButlerURI): 

404 if root.scheme and root.scheme != "file": 

405 raise RuntimeError(f"The override root must be a file URI not {root.scheme}") 

406 root = os.path.abspath(root.ospath) 

407 

408 replacements["path"] = posixpath.normpath(posixpath.join(os2posix(root), parsed.path)) 

409 

410 # normpath strips trailing "/" so put it back if necessary 

411 # Acknowledge that trailing separator exists. 

412 if forceDirectory or (parsed.path.endswith(sep) and not replacements["path"].endswith(sep)): 

413 replacements["path"] += sep 

414 dirLike = True 

415 

416 # ParseResult is a NamedTuple so _replace is standard API 

417 parsed = parsed._replace(**replacements) 

418 

419 if parsed.params or parsed.query: 

420 log.warning("Additional items unexpectedly encountered in file URI: %s", parsed.geturl()) 

421 

422 return parsed, dirLike