Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24import os 

25import os.path 

26import shutil 

27import urllib 

28import posixpath 

29import copy 

30import logging 

31 

32__all__ = ('ButlerFileURI',) 

33 

34from typing import ( 

35 TYPE_CHECKING, 

36 cast, 

37 Optional, 

38 Tuple, 

39 Union, 

40) 

41 

42from ..utils import safeMakeDir 

43from .utils import NoTransaction, os2posix, posix2os 

44from ._butlerUri import ButlerURI 

45 

46 

47if TYPE_CHECKING: 47 ↛ 48line 47 didn't jump to line 48, because the condition on line 47 was never true

48 from ..datastore import DatastoreTransaction 

49 

50 

51log = logging.getLogger(__name__) 

52 

53 

54class ButlerFileURI(ButlerURI): 

55 """URI for explicit ``file`` scheme.""" 

56 

57 transferModes = ("copy", "link", "symlink", "hardlink", "relsymlink", "auto", "move") 

58 transferDefault: str = "link" 

59 

60 # By definition refers to a local file 

61 isLocal = True 

62 

63 @property 

64 def ospath(self) -> str: 

65 """Path component of the URI localized to current OS. 

66 

67 Will unquote URI path since a formal URI must include the quoting. 

68 """ 

69 return urllib.parse.unquote(posix2os(self._uri.path)) 

70 

71 def exists(self) -> bool: 

72 # Uses os.path.exists so if there is a soft link that points 

73 # to a file that no longer exists this will return False 

74 return os.path.exists(self.ospath) 

75 

76 def size(self) -> int: 

77 if not os.path.isdir(self.ospath): 

78 stat = os.stat(self.ospath) 

79 sz = stat.st_size 

80 else: 

81 sz = 0 

82 return sz 

83 

84 def remove(self) -> None: 

85 """Remove the resource.""" 

86 os.remove(self.ospath) 

87 

88 def _as_local(self) -> Tuple[str, bool]: 

89 """Return the local path of the file. 

90 

91 This is an internal helper for ``as_local()``. 

92 

93 Returns 

94 ------- 

95 path : `str` 

96 The local path to this file. 

97 temporary : `bool` 

98 Always returns `False` (this is not a temporary file). 

99 """ 

100 return self.ospath, False 

101 

102 def _force_to_file(self) -> ButlerFileURI: 

103 """Force a schemeless URI to a file URI and returns a new URI. 

104 

105 Returns 

106 ------- 

107 file : `ButlerFileURI` 

108 A copy of the URI using file scheme. If already a file scheme 

109 the copy will be identical. 

110 

111 Raises 

112 ------ 

113 ValueError 

114 Raised if this URI is schemeless and relative path and so can 

115 not be forced to file absolute path without context. 

116 """ 

117 # This is always a file scheme so always return copy 

118 return copy.copy(self) 

119 

120 def relative_to(self, other: ButlerURI) -> Optional[str]: 

121 """Return the relative path from this URI to the other URI. 

122 

123 Parameters 

124 ---------- 

125 other : `ButlerURI` 

126 URI to use to calculate the relative path. Must be a parent 

127 of this URI. 

128 

129 Returns 

130 ------- 

131 subpath : `str` 

132 The sub path of this URI relative to the supplied other URI. 

133 Returns `None` if there is no parent child relationship. 

134 Scheme and netloc must match but for file URIs schemeless 

135 is also used. If this URI is a relative URI but the other is 

136 absolute, it is assumed to be in the parent completely unless it 

137 starts with ".." (in which case the path is combined and tested). 

138 If both URIs are relative, the relative paths are compared 

139 for commonality. 

140 

141 Notes 

142 ----- 

143 By definition a relative path will be relative to the enclosing 

144 absolute parent URI. It will be returned unchanged if it does not 

145 use a parent directory specification. 

146 """ 

147 # We know self is a file so check the other. Anything other than 

148 # file or schemeless means by definition these have no paths in common 

149 if other.scheme and other.scheme != "file": 

150 return None 

151 

152 # for case where both URIs are relative use the normal logic 

153 # where a/b/c.txt and a/b/ returns c.txt. 

154 if not self.isabs() and not other.isabs(): 

155 return super().relative_to(other) 

156 

157 # if we have a relative path convert it to absolute 

158 # relative to the supplied parent. This is solely to handle 

159 # the case where the relative path includes ".." but somehow 

160 # then goes back inside the directory of the parent 

161 if not self.isabs(): 

162 childUri = other.join(self.path) 

163 return childUri.relative_to(other) 

164 

165 # By this point if the schemes are identical we can use the 

166 # base class implementation. 

167 if self.scheme == other.scheme: 

168 return super().relative_to(other) 

169 

170 # if one is schemeless and the other is not the base implementation 

171 # will fail so we need to fix that -- they are both absolute so 

172 # forcing to file is fine. 

173 # Use a cast to convince mypy that other has to be a ButlerFileURI 

174 # in order to get to this part of the code. 

175 return self._force_to_file().relative_to(cast(ButlerFileURI, other)._force_to_file()) 

176 

177 def read(self, size: int = -1) -> bytes: 

178 # Docstring inherits 

179 with open(self.ospath, "rb") as fh: 

180 return fh.read(size) 

181 

182 def write(self, data: bytes, overwrite: bool = True) -> None: 

183 dir = os.path.dirname(self.ospath) 

184 if not os.path.exists(dir): 

185 safeMakeDir(dir) 

186 if overwrite: 

187 mode = "wb" 

188 else: 

189 mode = "xb" 

190 with open(self.ospath, mode) as f: 

191 f.write(data) 

192 

193 def mkdir(self) -> None: 

194 if not os.path.exists(self.ospath): 

195 safeMakeDir(self.ospath) 

196 elif not os.path.isdir(self.ospath): 

197 raise FileExistsError(f"URI {self} exists but is not a directory!") 

198 

199 def transfer_from(self, src: ButlerURI, transfer: str, 

200 overwrite: bool = False, 

201 transaction: Optional[Union[DatastoreTransaction, NoTransaction]] = None) -> None: 

202 """Transfer the current resource to a local file. 

203 

204 Parameters 

205 ---------- 

206 src : `ButlerURI` 

207 Source URI. 

208 transfer : `str` 

209 Mode to use for transferring the resource. Supports the following 

210 options: copy, link, symlink, hardlink, relsymlink. 

211 overwrite : `bool`, optional 

212 Allow an existing file to be overwritten. Defaults to `False`. 

213 transaction : `DatastoreTransaction`, optional 

214 If a transaction is provided, undo actions will be registered. 

215 """ 

216 # Fail early to prevent delays if remote resources are requested 

217 if transfer not in self.transferModes: 

218 raise ValueError(f"Transfer mode '{transfer}' not supported by URI scheme {self.scheme}") 

219 

220 log.debug(f"Transferring {src} [exists: {src.exists()}] -> " 

221 f"{self} [exists: {self.exists()}] (transfer={transfer})") 

222 

223 # We do not have to special case ButlerFileURI here because 

224 # as_local handles that. 

225 with src.as_local() as local_uri: 

226 is_temporary = local_uri.isTemporary 

227 local_src = local_uri.ospath 

228 

229 # Default transfer mode depends on whether we have a temporary 

230 # file or not. 

231 if transfer == "auto": 

232 transfer = self.transferDefault if not is_temporary else "copy" 

233 

234 if not os.path.exists(local_src): 

235 if is_temporary: 

236 msg = f"Local file {local_uri} downloaded from {src} has gone missing" 

237 else: 

238 msg = f"Source URI {src} does not exist" 

239 raise FileNotFoundError(msg) 

240 

241 # Follow soft links 

242 local_src = os.path.realpath(os.path.normpath(local_src)) 

243 

244 # All the modes involving linking use "link" somewhere 

245 if "link" in transfer and is_temporary: 

246 raise RuntimeError("Can not use local file system transfer mode" 

247 f" {transfer} for remote resource ({src})") 

248 

249 # For temporary files we can own them 

250 requested_transfer = transfer 

251 if is_temporary and transfer == "copy": 

252 transfer = "move" 

253 

254 # The output location should not exist 

255 dest_exists = self.exists() 

256 if not overwrite and dest_exists: 

257 raise FileExistsError(f"Destination path '{self}' already exists. Transfer " 

258 f"from {src} cannot be completed.") 

259 

260 # Make the path absolute (but don't follow links since that 

261 # would possibly cause us to end up in the wrong place if the 

262 # file existed already as a soft link) 

263 newFullPath = os.path.abspath(self.ospath) 

264 outputDir = os.path.dirname(newFullPath) 

265 if not os.path.isdir(outputDir): 

266 # Must create the directory -- this can not be rolled back 

267 # since another transfer running concurrently may 

268 # be relying on this existing. 

269 safeMakeDir(outputDir) 

270 

271 if transaction is None: 

272 # Use a no-op transaction to reduce code duplication 

273 transaction = NoTransaction() 

274 

275 # For links the OS doesn't let us overwrite so if something does 

276 # exist we have to remove it before we do the actual "transfer" 

277 # below 

278 if "link" in transfer and overwrite and dest_exists: 

279 try: 

280 self.remove() 

281 except Exception: 

282 # If this fails we ignore it since it's a problem 

283 # that will manifest immediately below with a more relevant 

284 # error message 

285 pass 

286 

287 if transfer == "move": 

288 with transaction.undoWith(f"move from {local_src}", shutil.move, newFullPath, local_src): 

289 shutil.move(local_src, newFullPath) 

290 elif transfer == "copy": 

291 with transaction.undoWith(f"copy from {local_src}", os.remove, newFullPath): 

292 shutil.copy(local_src, newFullPath) 

293 elif transfer == "link": 

294 # Try hard link and if that fails use a symlink 

295 with transaction.undoWith(f"link to {local_src}", os.remove, newFullPath): 

296 try: 

297 os.link(local_src, newFullPath) 

298 except OSError: 

299 # Read through existing symlinks 

300 os.symlink(local_src, newFullPath) 

301 elif transfer == "hardlink": 

302 with transaction.undoWith(f"hardlink to {local_src}", os.remove, newFullPath): 

303 os.link(local_src, newFullPath) 

304 elif transfer == "symlink": 

305 # Read through existing symlinks 

306 with transaction.undoWith(f"symlink to {local_src}", os.remove, newFullPath): 

307 os.symlink(local_src, newFullPath) 

308 elif transfer == "relsymlink": 

309 # This is a standard symlink but using a relative path 

310 # Need the directory name to give to relative root 

311 # A full file path confuses it into an extra ../ 

312 newFullPathRoot = os.path.dirname(newFullPath) 

313 relPath = os.path.relpath(local_src, newFullPathRoot) 

314 with transaction.undoWith(f"relsymlink to {local_src}", os.remove, newFullPath): 

315 os.symlink(relPath, newFullPath) 

316 else: 

317 raise NotImplementedError("Transfer type '{}' not supported.".format(transfer)) 

318 

319 # This was an explicit move requested from a remote resource 

320 # try to remove that remote resource. We check is_temporary because 

321 # the local file would have been moved by shutil.move already. 

322 if requested_transfer == "move" and is_temporary: 

323 # Transactions do not work here 

324 src.remove() 

325 

326 @staticmethod 

327 def _fixupPathUri(parsed: urllib.parse.ParseResult, root: Optional[Union[str, ButlerURI]] = None, 

328 forceAbsolute: bool = False, 

329 forceDirectory: bool = False) -> Tuple[urllib.parse.ParseResult, bool]: 

330 """Fix up relative paths in URI instances. 

331 

332 Parameters 

333 ---------- 

334 parsed : `~urllib.parse.ParseResult` 

335 The result from parsing a URI using `urllib.parse`. 

336 root : `str` or `ButlerURI`, optional 

337 Path to use as root when converting relative to absolute. 

338 If `None`, it will be the current working directory. This 

339 is a local file system path, or a file URI. It is only used if 

340 a file-scheme is used incorrectly with a relative path. 

341 forceAbsolute : `bool`, ignored 

342 Has no effect for this subclass. ``file`` URIs are always 

343 absolute. 

344 forceDirectory : `bool`, optional 

345 If `True` forces the URI to end with a separator, otherwise given 

346 URI is interpreted as is. 

347 

348 Returns 

349 ------- 

350 modified : `~urllib.parse.ParseResult` 

351 Update result if a URI is being handled. 

352 dirLike : `bool` 

353 `True` if given parsed URI has a trailing separator or 

354 forceDirectory is True. Otherwise `False`. 

355 

356 Notes 

357 ----- 

358 Relative paths are explicitly not supported by RFC8089 but `urllib` 

359 does accept URIs of the form ``file:relative/path.ext``. They need 

360 to be turned into absolute paths before they can be used. This is 

361 always done regardless of the ``forceAbsolute`` parameter. 

362 """ 

363 # assume we are not dealing with a directory like URI 

364 dirLike = False 

365 

366 # file URI implies POSIX path separators so split as POSIX, 

367 # then join as os, and convert to abspath. Do not handle 

368 # home directories since "file" scheme is explicitly documented 

369 # to not do tilde expansion. 

370 sep = posixpath.sep 

371 

372 # For local file system we can explicitly check to see if this 

373 # really is a directory. The URI might point to a location that 

374 # does not exists yet but all that matters is if it is a directory 

375 # then we make sure use that fact. No need to do the check if 

376 # we are already being told. 

377 if not forceDirectory and posixpath.isdir(parsed.path): 

378 forceDirectory = True 

379 

380 # For an absolute path all we need to do is check if we need 

381 # to force the directory separator 

382 if posixpath.isabs(parsed.path): 

383 if forceDirectory: 

384 if not parsed.path.endswith(sep): 

385 parsed = parsed._replace(path=parsed.path+sep) 

386 dirLike = True 

387 return copy.copy(parsed), dirLike 

388 

389 # Relative path so must fix it to be compliant with the standard 

390 

391 # Replacement values for the URI 

392 replacements = {} 

393 

394 if root is None: 

395 root = os.path.abspath(os.path.curdir) 

396 elif isinstance(root, ButlerURI): 

397 if root.scheme and root.scheme != "file": 

398 raise RuntimeError(f"The override root must be a file URI not {root.scheme}") 

399 root = os.path.abspath(root.ospath) 

400 

401 replacements["path"] = posixpath.normpath(posixpath.join(os2posix(root), parsed.path)) 

402 

403 # normpath strips trailing "/" so put it back if necessary 

404 # Acknowledge that trailing separator exists. 

405 if forceDirectory or (parsed.path.endswith(sep) and not replacements["path"].endswith(sep)): 

406 replacements["path"] += sep 

407 dirLike = True 

408 

409 # ParseResult is a NamedTuple so _replace is standard API 

410 parsed = parsed._replace(**replacements) 

411 

412 if parsed.params or parsed.query: 

413 log.warning("Additional items unexpectedly encountered in file URI: %s", parsed.geturl()) 

414 

415 return parsed, dirLike