Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24import os 

25import os.path 

26import shutil 

27import urllib.parse 

28import posixpath 

29import copy 

30import logging 

31import re 

32 

33__all__ = ('ButlerFileURI',) 

34 

35from typing import ( 

36 TYPE_CHECKING, 

37 cast, 

38 Iterator, 

39 List, 

40 Optional, 

41 Tuple, 

42 Union, 

43) 

44 

45from ..utils import safeMakeDir 

46from .utils import NoTransaction, os2posix, posix2os 

47from ._butlerUri import ButlerURI 

48 

49 

50if TYPE_CHECKING: 50 ↛ 51line 50 didn't jump to line 51, because the condition on line 50 was never true

51 from ..datastore import DatastoreTransaction 

52 

53 

54log = logging.getLogger(__name__) 

55 

56 

57class ButlerFileURI(ButlerURI): 

58 """URI for explicit ``file`` scheme.""" 

59 

60 transferModes = ("copy", "link", "symlink", "hardlink", "relsymlink", "auto", "move") 

61 transferDefault: str = "link" 

62 

63 # By definition refers to a local file 

64 isLocal = True 

65 

66 @property 

67 def ospath(self) -> str: 

68 """Path component of the URI localized to current OS. 

69 

70 Will unquote URI path since a formal URI must include the quoting. 

71 """ 

72 return urllib.parse.unquote(posix2os(self._uri.path)) 

73 

74 def exists(self) -> bool: 

75 """Indicate that the file exists.""" 

76 # Uses os.path.exists so if there is a soft link that points 

77 # to a file that no longer exists this will return False 

78 return os.path.exists(self.ospath) 

79 

80 def size(self) -> int: 

81 """Return the size of the file in bytes.""" 

82 if not os.path.isdir(self.ospath): 

83 stat = os.stat(self.ospath) 

84 sz = stat.st_size 

85 else: 

86 sz = 0 

87 return sz 

88 

89 def remove(self) -> None: 

90 """Remove the resource.""" 

91 os.remove(self.ospath) 

92 

93 def _as_local(self) -> Tuple[str, bool]: 

94 """Return the local path of the file. 

95 

96 This is an internal helper for ``as_local()``. 

97 

98 Returns 

99 ------- 

100 path : `str` 

101 The local path to this file. 

102 temporary : `bool` 

103 Always returns `False` (this is not a temporary file). 

104 """ 

105 return self.ospath, False 

106 

107 def relative_to(self, other: ButlerURI) -> Optional[str]: 

108 """Return the relative path from this URI to the other URI. 

109 

110 Parameters 

111 ---------- 

112 other : `ButlerURI` 

113 URI to use to calculate the relative path. Must be a parent 

114 of this URI. 

115 

116 Returns 

117 ------- 

118 subpath : `str` 

119 The sub path of this URI relative to the supplied other URI. 

120 Returns `None` if there is no parent child relationship. 

121 Scheme and netloc must match but for file URIs schemeless 

122 is also used. If this URI is a relative URI but the other is 

123 absolute, it is assumed to be in the parent completely unless it 

124 starts with ".." (in which case the path is combined and tested). 

125 If both URIs are relative, the relative paths are compared 

126 for commonality. 

127 

128 Notes 

129 ----- 

130 By definition a relative path will be relative to the enclosing 

131 absolute parent URI. It will be returned unchanged if it does not 

132 use a parent directory specification. 

133 """ 

134 # We know self is a file so check the other. Anything other than 

135 # file or schemeless means by definition these have no paths in common 

136 if other.scheme and other.scheme != "file": 

137 return None 

138 

139 # for case where both URIs are relative use the normal logic 

140 # where a/b/c.txt and a/b/ returns c.txt. 

141 if not self.isabs() and not other.isabs(): 

142 return super().relative_to(other) 

143 

144 # if we have a relative path convert it to absolute 

145 # relative to the supplied parent. This is solely to handle 

146 # the case where the relative path includes ".." but somehow 

147 # then goes back inside the directory of the parent 

148 if not self.isabs(): 

149 childUri = other.join(self.path) 

150 return childUri.relative_to(other) 

151 

152 # By this point if the schemes are identical we can use the 

153 # base class implementation. 

154 if self.scheme == other.scheme: 

155 return super().relative_to(other) 

156 

157 # if one is schemeless and the other is not the base implementation 

158 # will fail so we need to fix that -- they are both absolute so 

159 # forcing to file is fine. 

160 # Use a cast to convince mypy that other has to be a ButlerFileURI 

161 # in order to get to this part of the code. 

162 return self.abspath().relative_to(cast(ButlerFileURI, other).abspath()) 

163 

164 def read(self, size: int = -1) -> bytes: 

165 """Return the entire content of the file as bytes.""" 

166 with open(self.ospath, "rb") as fh: 

167 return fh.read(size) 

168 

169 def write(self, data: bytes, overwrite: bool = True) -> None: 

170 """Write the supplied data to the file.""" 

171 dir = os.path.dirname(self.ospath) 

172 if not os.path.exists(dir): 

173 safeMakeDir(dir) 

174 if overwrite: 

175 mode = "wb" 

176 else: 

177 mode = "xb" 

178 with open(self.ospath, mode) as f: 

179 f.write(data) 

180 

181 def mkdir(self) -> None: 

182 """Make the directory associated with this URI.""" 

183 if not os.path.exists(self.ospath): 

184 safeMakeDir(self.ospath) 

185 elif not os.path.isdir(self.ospath): 

186 raise FileExistsError(f"URI {self} exists but is not a directory!") 

187 

188 def isdir(self) -> bool: 

189 """Return whether this URI is a directory. 

190 

191 Returns 

192 ------- 

193 isdir : `bool` 

194 `True` if this URI is a directory or looks like a directory, 

195 else `False`. 

196 """ 

197 return self.dirLike or os.path.isdir(self.ospath) 

198 

199 def transfer_from(self, src: ButlerURI, transfer: str, 

200 overwrite: bool = False, 

201 transaction: Optional[Union[DatastoreTransaction, NoTransaction]] = None) -> None: 

202 """Transfer the current resource to a local file. 

203 

204 Parameters 

205 ---------- 

206 src : `ButlerURI` 

207 Source URI. 

208 transfer : `str` 

209 Mode to use for transferring the resource. Supports the following 

210 options: copy, link, symlink, hardlink, relsymlink. 

211 overwrite : `bool`, optional 

212 Allow an existing file to be overwritten. Defaults to `False`. 

213 transaction : `DatastoreTransaction`, optional 

214 If a transaction is provided, undo actions will be registered. 

215 """ 

216 # Fail early to prevent delays if remote resources are requested 

217 if transfer not in self.transferModes: 

218 raise ValueError(f"Transfer mode '{transfer}' not supported by URI scheme {self.scheme}") 

219 

220 # Existence checks can take time so only try if the log message 

221 # will be issued. 

222 if log.isEnabledFor(logging.DEBUG): 

223 log.debug("Transferring %s [exists: %s] -> %s [exists: %s] (transfer=%s)", 

224 src, src.exists(), self, self.exists(), transfer) 

225 

226 # We do not have to special case ButlerFileURI here because 

227 # as_local handles that. 

228 with src.as_local() as local_uri: 

229 is_temporary = local_uri.isTemporary 

230 local_src = local_uri.ospath 

231 

232 # Default transfer mode depends on whether we have a temporary 

233 # file or not. 

234 if transfer == "auto": 

235 transfer = self.transferDefault if not is_temporary else "copy" 

236 

237 if not os.path.exists(local_src): 

238 if is_temporary: 

239 msg = f"Local file {local_uri} downloaded from {src} has gone missing" 

240 else: 

241 msg = f"Source URI {src} does not exist" 

242 raise FileNotFoundError(msg) 

243 

244 # Follow soft links 

245 local_src = os.path.realpath(os.path.normpath(local_src)) 

246 

247 # All the modes involving linking use "link" somewhere 

248 if "link" in transfer and is_temporary: 

249 raise RuntimeError("Can not use local file system transfer mode" 

250 f" {transfer} for remote resource ({src})") 

251 

252 # For temporary files we can own them 

253 requested_transfer = transfer 

254 if is_temporary and transfer == "copy": 

255 transfer = "move" 

256 

257 # The output location should not exist 

258 dest_exists = self.exists() 

259 if not overwrite and dest_exists: 

260 raise FileExistsError(f"Destination path '{self}' already exists. Transfer " 

261 f"from {src} cannot be completed.") 

262 

263 # Make the path absolute (but don't follow links since that 

264 # would possibly cause us to end up in the wrong place if the 

265 # file existed already as a soft link) 

266 newFullPath = os.path.abspath(self.ospath) 

267 outputDir = os.path.dirname(newFullPath) 

268 if not os.path.isdir(outputDir): 

269 # Must create the directory -- this can not be rolled back 

270 # since another transfer running concurrently may 

271 # be relying on this existing. 

272 safeMakeDir(outputDir) 

273 

274 if transaction is None: 

275 # Use a no-op transaction to reduce code duplication 

276 transaction = NoTransaction() 

277 

278 # For links the OS doesn't let us overwrite so if something does 

279 # exist we have to remove it before we do the actual "transfer" 

280 # below 

281 if "link" in transfer and overwrite and dest_exists: 

282 try: 

283 self.remove() 

284 except Exception: 

285 # If this fails we ignore it since it's a problem 

286 # that will manifest immediately below with a more relevant 

287 # error message 

288 pass 

289 

290 if transfer == "move": 

291 with transaction.undoWith(f"move from {local_src}", shutil.move, newFullPath, local_src): 

292 shutil.move(local_src, newFullPath) 

293 elif transfer == "copy": 

294 with transaction.undoWith(f"copy from {local_src}", os.remove, newFullPath): 

295 shutil.copy(local_src, newFullPath) 

296 elif transfer == "link": 

297 # Try hard link and if that fails use a symlink 

298 with transaction.undoWith(f"link to {local_src}", os.remove, newFullPath): 

299 try: 

300 os.link(local_src, newFullPath) 

301 except OSError: 

302 # Read through existing symlinks 

303 os.symlink(local_src, newFullPath) 

304 elif transfer == "hardlink": 

305 with transaction.undoWith(f"hardlink to {local_src}", os.remove, newFullPath): 

306 os.link(local_src, newFullPath) 

307 elif transfer == "symlink": 

308 # Read through existing symlinks 

309 with transaction.undoWith(f"symlink to {local_src}", os.remove, newFullPath): 

310 os.symlink(local_src, newFullPath) 

311 elif transfer == "relsymlink": 

312 # This is a standard symlink but using a relative path 

313 # Need the directory name to give to relative root 

314 # A full file path confuses it into an extra ../ 

315 newFullPathRoot = os.path.dirname(newFullPath) 

316 relPath = os.path.relpath(local_src, newFullPathRoot) 

317 with transaction.undoWith(f"relsymlink to {local_src}", os.remove, newFullPath): 

318 os.symlink(relPath, newFullPath) 

319 else: 

320 raise NotImplementedError("Transfer type '{}' not supported.".format(transfer)) 

321 

322 # This was an explicit move requested from a remote resource 

323 # try to remove that remote resource. We check is_temporary because 

324 # the local file would have been moved by shutil.move already. 

325 if requested_transfer == "move" and is_temporary: 

326 # Transactions do not work here 

327 src.remove() 

328 

329 def walk(self, file_filter: Optional[Union[str, re.Pattern]] = None) -> Iterator[Union[List, 

330 Tuple[ButlerURI, 

331 List[str], 

332 List[str]]]]: 

333 """Walk the directory tree returning matching files and directories. 

334 

335 Parameters 

336 ---------- 

337 file_filter : `str` or `re.Pattern`, optional 

338 Regex to filter out files from the list before it is returned. 

339 

340 Yields 

341 ------ 

342 dirpath : `ButlerURI` 

343 Current directory being examined. 

344 dirnames : `list` of `str` 

345 Names of subdirectories within dirpath. 

346 filenames : `list` of `str` 

347 Names of all the files within dirpath. 

348 """ 

349 if not self.isdir(): 

350 raise ValueError("Can not walk a non-directory URI") 

351 

352 if isinstance(file_filter, str): 

353 file_filter = re.compile(file_filter) 

354 

355 for root, dirs, files in os.walk(self.ospath): 

356 # Filter by the regex 

357 if file_filter is not None: 

358 files = [f for f in files if file_filter.search(f)] 

359 yield type(self)(root, forceAbsolute=False, forceDirectory=True), dirs, files 

360 

361 @classmethod 

362 def _fixupPathUri(cls, parsed: urllib.parse.ParseResult, root: Optional[Union[str, ButlerURI]] = None, 

363 forceAbsolute: bool = False, 

364 forceDirectory: bool = False) -> Tuple[urllib.parse.ParseResult, bool]: 

365 """Fix up relative paths in URI instances. 

366 

367 Parameters 

368 ---------- 

369 parsed : `~urllib.parse.ParseResult` 

370 The result from parsing a URI using `urllib.parse`. 

371 root : `str` or `ButlerURI`, optional 

372 Path to use as root when converting relative to absolute. 

373 If `None`, it will be the current working directory. This 

374 is a local file system path, or a file URI. It is only used if 

375 a file-scheme is used incorrectly with a relative path. 

376 forceAbsolute : `bool`, ignored 

377 Has no effect for this subclass. ``file`` URIs are always 

378 absolute. 

379 forceDirectory : `bool`, optional 

380 If `True` forces the URI to end with a separator, otherwise given 

381 URI is interpreted as is. 

382 

383 Returns 

384 ------- 

385 modified : `~urllib.parse.ParseResult` 

386 Update result if a URI is being handled. 

387 dirLike : `bool` 

388 `True` if given parsed URI has a trailing separator or 

389 forceDirectory is True. Otherwise `False`. 

390 

391 Notes 

392 ----- 

393 Relative paths are explicitly not supported by RFC8089 but `urllib` 

394 does accept URIs of the form ``file:relative/path.ext``. They need 

395 to be turned into absolute paths before they can be used. This is 

396 always done regardless of the ``forceAbsolute`` parameter. 

397 """ 

398 # assume we are not dealing with a directory like URI 

399 dirLike = False 

400 

401 # file URI implies POSIX path separators so split as POSIX, 

402 # then join as os, and convert to abspath. Do not handle 

403 # home directories since "file" scheme is explicitly documented 

404 # to not do tilde expansion. 

405 sep = posixpath.sep 

406 

407 # For local file system we can explicitly check to see if this 

408 # really is a directory. The URI might point to a location that 

409 # does not exists yet but all that matters is if it is a directory 

410 # then we make sure use that fact. No need to do the check if 

411 # we are already being told. 

412 if not forceDirectory and posixpath.isdir(parsed.path): 

413 forceDirectory = True 

414 

415 # For an absolute path all we need to do is check if we need 

416 # to force the directory separator 

417 if posixpath.isabs(parsed.path): 

418 if forceDirectory: 

419 if not parsed.path.endswith(sep): 

420 parsed = parsed._replace(path=parsed.path+sep) 

421 dirLike = True 

422 return copy.copy(parsed), dirLike 

423 

424 # Relative path so must fix it to be compliant with the standard 

425 

426 # Replacement values for the URI 

427 replacements = {} 

428 

429 if root is None: 

430 root = os.path.abspath(os.path.curdir) 

431 elif isinstance(root, ButlerURI): 

432 if root.scheme and root.scheme != "file": 

433 raise RuntimeError(f"The override root must be a file URI not {root.scheme}") 

434 root = os.path.abspath(root.ospath) 

435 

436 replacements["path"] = posixpath.normpath(posixpath.join(os2posix(root), parsed.path)) 

437 

438 # normpath strips trailing "/" so put it back if necessary 

439 # Acknowledge that trailing separator exists. 

440 if forceDirectory or (parsed.path.endswith(sep) and not replacements["path"].endswith(sep)): 

441 replacements["path"] += sep 

442 dirLike = True 

443 

444 # ParseResult is a NamedTuple so _replace is standard API 

445 parsed = parsed._replace(**replacements) 

446 

447 if parsed.params or parsed.query: 

448 log.warning("Additional items unexpectedly encountered in file URI: %s", parsed.geturl()) 

449 

450 return parsed, dirLike