Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24import os 

25import os.path 

26import shutil 

27import urllib 

28import posixpath 

29import copy 

30import logging 

31 

32__all__ = ('ButlerFileURI',) 

33 

34from typing import ( 

35 TYPE_CHECKING, 

36 cast, 

37 Optional, 

38 Tuple, 

39 Union, 

40) 

41 

42from ..utils import safeMakeDir 

43from .utils import NoTransaction, os2posix, posix2os 

44from ._butlerUri import ButlerURI 

45 

46 

47if TYPE_CHECKING: 47 ↛ 48line 47 didn't jump to line 48, because the condition on line 47 was never true

48 from ..datastore import DatastoreTransaction 

49 

50 

51log = logging.getLogger(__name__) 

52 

53 

54class ButlerFileURI(ButlerURI): 

55 """URI for explicit ``file`` scheme.""" 

56 

57 transferModes = ("copy", "link", "symlink", "hardlink", "relsymlink", "auto", "move") 

58 transferDefault: str = "link" 

59 

60 @property 

61 def ospath(self) -> str: 

62 """Path component of the URI localized to current OS. 

63 

64 Will unquote URI path since a formal URI must include the quoting. 

65 """ 

66 return urllib.parse.unquote(posix2os(self._uri.path)) 

67 

68 def exists(self) -> bool: 

69 # Uses os.path.exists so if there is a soft link that points 

70 # to a file that no longer exists this will return False 

71 return os.path.exists(self.ospath) 

72 

73 def size(self) -> int: 

74 if not os.path.isdir(self.ospath): 

75 stat = os.stat(self.ospath) 

76 sz = stat.st_size 

77 else: 

78 sz = 0 

79 return sz 

80 

81 def remove(self) -> None: 

82 """Remove the resource.""" 

83 os.remove(self.ospath) 

84 

85 def as_local(self) -> Tuple[str, bool]: 

86 """Return the local path of the file. 

87 

88 Returns 

89 ------- 

90 path : `str` 

91 The local path to this file. 

92 temporary : `bool` 

93 Always returns `False` (this is not a temporary file). 

94 """ 

95 return self.ospath, False 

96 

97 def _force_to_file(self) -> ButlerFileURI: 

98 """Force a schemeless URI to a file URI and returns a new URI. 

99 

100 Returns 

101 ------- 

102 file : `ButlerFileURI` 

103 A copy of the URI using file scheme. If already a file scheme 

104 the copy will be identical. 

105 

106 Raises 

107 ------ 

108 ValueError 

109 Raised if this URI is schemeless and relative path and so can 

110 not be forced to file absolute path without context. 

111 """ 

112 # This is always a file scheme so always return copy 

113 return copy.copy(self) 

114 

115 def relative_to(self, other: ButlerURI) -> Optional[str]: 

116 """Return the relative path from this URI to the other URI. 

117 

118 Parameters 

119 ---------- 

120 other : `ButlerURI` 

121 URI to use to calculate the relative path. Must be a parent 

122 of this URI. 

123 

124 Returns 

125 ------- 

126 subpath : `str` 

127 The sub path of this URI relative to the supplied other URI. 

128 Returns `None` if there is no parent child relationship. 

129 Scheme and netloc must match but for file URIs schemeless 

130 is also used. If this URI is a relative URI but the other is 

131 absolute, it is assumed to be in the parent completely unless it 

132 starts with ".." (in which case the path is combined and tested). 

133 If both URIs are relative, the relative paths are compared 

134 for commonality. 

135 

136 Notes 

137 ----- 

138 By definition a relative path will be relative to the enclosing 

139 absolute parent URI. It will be returned unchanged if it does not 

140 use a parent directory specification. 

141 """ 

142 # We know self is a file so check the other. Anything other than 

143 # file or schemeless means by definition these have no paths in common 

144 if other.scheme and other.scheme != "file": 

145 return None 

146 

147 # for case where both URIs are relative use the normal logic 

148 # where a/b/c.txt and a/b/ returns c.txt. 

149 if not self.isabs() and not other.isabs(): 

150 return super().relative_to(other) 

151 

152 # if we have a relative path convert it to absolute 

153 # relative to the supplied parent. This is solely to handle 

154 # the case where the relative path includes ".." but somehow 

155 # then goes back inside the directory of the parent 

156 if not self.isabs(): 

157 childUri = other.join(self.path) 

158 return childUri.relative_to(other) 

159 

160 # By this point if the schemes are identical we can use the 

161 # base class implementation. 

162 if self.scheme == other.scheme: 

163 return super().relative_to(other) 

164 

165 # if one is schemeless and the other is not the base implementation 

166 # will fail so we need to fix that -- they are both absolute so 

167 # forcing to file is fine. 

168 # Use a cast to convince mypy that other has to be a ButlerFileURI 

169 # in order to get to this part of the code. 

170 return self._force_to_file().relative_to(cast(ButlerFileURI, other)._force_to_file()) 

171 

172 def read(self, size: int = -1) -> bytes: 

173 # Docstring inherits 

174 with open(self.ospath, "rb") as fh: 

175 return fh.read(size) 

176 

177 def write(self, data: bytes, overwrite: bool = True) -> None: 

178 dir = os.path.dirname(self.ospath) 

179 if not os.path.exists(dir): 

180 safeMakeDir(dir) 

181 if overwrite: 

182 mode = "wb" 

183 else: 

184 mode = "xb" 

185 with open(self.ospath, mode) as f: 

186 f.write(data) 

187 

188 def mkdir(self) -> None: 

189 if not os.path.exists(self.ospath): 

190 safeMakeDir(self.ospath) 

191 elif not os.path.isdir(self.ospath): 

192 raise FileExistsError(f"URI {self} exists but is not a directory!") 

193 

194 def transfer_from(self, src: ButlerURI, transfer: str, 

195 overwrite: bool = False, 

196 transaction: Optional[Union[DatastoreTransaction, NoTransaction]] = None) -> None: 

197 """Transfer the current resource to a local file. 

198 

199 Parameters 

200 ---------- 

201 src : `ButlerURI` 

202 Source URI. 

203 transfer : `str` 

204 Mode to use for transferring the resource. Supports the following 

205 options: copy, link, symlink, hardlink, relsymlink. 

206 overwrite : `bool`, optional 

207 Allow an existing file to be overwritten. Defaults to `False`. 

208 transaction : `DatastoreTransaction`, optional 

209 If a transaction is provided, undo actions will be registered. 

210 """ 

211 # Fail early to prevent delays if remote resources are requested 

212 if transfer not in self.transferModes: 

213 raise ValueError(f"Transfer mode '{transfer}' not supported by URI scheme {self.scheme}") 

214 

215 log.debug(f"Transferring {src} [exists: {src.exists()}] -> " 

216 f"{self} [exists: {self.exists()}] (transfer={transfer})") 

217 

218 # We do not have to special case ButlerFileURI here because 

219 # as_local handles that. 

220 local_src, is_temporary = src.as_local() 

221 

222 # Default transfer mode depends on whether we have a temporary 

223 # file or not. 

224 if transfer == "auto": 

225 transfer = self.transferDefault if not is_temporary else "copy" 

226 

227 # Follow soft links 

228 local_src = os.path.realpath(os.path.normpath(local_src)) 

229 

230 if not os.path.exists(local_src): 

231 raise FileNotFoundError(f"Source URI {src} does not exist") 

232 

233 # All the modes involving linking use "link" somewhere 

234 if "link" in transfer and is_temporary: 

235 raise RuntimeError("Can not use local file system transfer mode" 

236 f" {transfer} for remote resource ({src})") 

237 

238 # For temporary files we can own them 

239 requested_transfer = transfer 

240 if is_temporary and transfer == "copy": 

241 transfer = "move" 

242 

243 # The output location should not exist 

244 dest_exists = self.exists() 

245 if not overwrite and dest_exists: 

246 raise FileExistsError(f"Destination path '{self}' already exists. Transfer " 

247 f"from {src} cannot be completed.") 

248 

249 # Make the path absolute (but don't follow links since that 

250 # would possibly cause us to end up in the wrong place if the 

251 # file existed already as a soft link) 

252 newFullPath = os.path.abspath(self.ospath) 

253 outputDir = os.path.dirname(newFullPath) 

254 if not os.path.isdir(outputDir): 

255 # Must create the directory -- this can not be rolled back 

256 # since another transfer running concurrently may 

257 # be relying on this existing. 

258 safeMakeDir(outputDir) 

259 

260 if transaction is None: 

261 # Use a no-op transaction to reduce code duplication 

262 transaction = NoTransaction() 

263 

264 # For links the OS doesn't let us overwrite so if something does 

265 # exist we have to remove it before we do the actual "transfer" below 

266 if "link" in transfer and overwrite and dest_exists: 

267 try: 

268 self.remove() 

269 except Exception: 

270 # If this fails we ignore it since it's a problem 

271 # that will manifest immediately below with a more relevant 

272 # error message 

273 pass 

274 

275 if transfer == "move": 

276 with transaction.undoWith(f"move from {local_src}", shutil.move, newFullPath, local_src): 

277 shutil.move(local_src, newFullPath) 

278 elif transfer == "copy": 

279 with transaction.undoWith(f"copy from {local_src}", os.remove, newFullPath): 

280 shutil.copy(local_src, newFullPath) 

281 elif transfer == "link": 

282 # Try hard link and if that fails use a symlink 

283 with transaction.undoWith(f"link to {local_src}", os.remove, newFullPath): 

284 try: 

285 os.link(local_src, newFullPath) 

286 except OSError: 

287 # Read through existing symlinks 

288 os.symlink(local_src, newFullPath) 

289 elif transfer == "hardlink": 

290 with transaction.undoWith(f"hardlink to {local_src}", os.remove, newFullPath): 

291 os.link(local_src, newFullPath) 

292 elif transfer == "symlink": 

293 # Read through existing symlinks 

294 with transaction.undoWith(f"symlink to {local_src}", os.remove, newFullPath): 

295 os.symlink(local_src, newFullPath) 

296 elif transfer == "relsymlink": 

297 # This is a standard symlink but using a relative path 

298 # Need the directory name to give to relative root 

299 # A full file path confuses it into an extra ../ 

300 newFullPathRoot = os.path.dirname(newFullPath) 

301 relPath = os.path.relpath(local_src, newFullPathRoot) 

302 with transaction.undoWith(f"relsymlink to {local_src}", os.remove, newFullPath): 

303 os.symlink(relPath, newFullPath) 

304 else: 

305 raise NotImplementedError("Transfer type '{}' not supported.".format(transfer)) 

306 

307 # This was an explicit move requested from a remote resource 

308 # try to remove that resource. We check is_temporary because 

309 # the local file would have been moved by shutil.move already. 

310 if requested_transfer == "move" and is_temporary: 

311 # Transactions do not work here 

312 src.remove() 

313 

314 if is_temporary and os.path.exists(local_src): 

315 # This should never happen since we have moved it above 

316 os.remove(local_src) 

317 

318 @staticmethod 

319 def _fixupPathUri(parsed: urllib.parse.ParseResult, root: Optional[Union[str, ButlerURI]] = None, 

320 forceAbsolute: bool = False, 

321 forceDirectory: bool = False) -> Tuple[urllib.parse.ParseResult, bool]: 

322 """Fix up relative paths in URI instances. 

323 

324 Parameters 

325 ---------- 

326 parsed : `~urllib.parse.ParseResult` 

327 The result from parsing a URI using `urllib.parse`. 

328 root : `str` or `ButlerURI`, optional 

329 Path to use as root when converting relative to absolute. 

330 If `None`, it will be the current working directory. This 

331 is a local file system path, or a file URI. It is only used if 

332 a file-scheme is used incorrectly with a relative path. 

333 forceAbsolute : `bool`, ignored 

334 Has no effect for this subclass. ``file`` URIs are always 

335 absolute. 

336 forceDirectory : `bool`, optional 

337 If `True` forces the URI to end with a separator, otherwise given 

338 URI is interpreted as is. 

339 

340 Returns 

341 ------- 

342 modified : `~urllib.parse.ParseResult` 

343 Update result if a URI is being handled. 

344 dirLike : `bool` 

345 `True` if given parsed URI has a trailing separator or 

346 forceDirectory is True. Otherwise `False`. 

347 

348 Notes 

349 ----- 

350 Relative paths are explicitly not supported by RFC8089 but `urllib` 

351 does accept URIs of the form ``file:relative/path.ext``. They need 

352 to be turned into absolute paths before they can be used. This is 

353 always done regardless of the ``forceAbsolute`` parameter. 

354 """ 

355 # assume we are not dealing with a directory like URI 

356 dirLike = False 

357 

358 # file URI implies POSIX path separators so split as POSIX, 

359 # then join as os, and convert to abspath. Do not handle 

360 # home directories since "file" scheme is explicitly documented 

361 # to not do tilde expansion. 

362 sep = posixpath.sep 

363 

364 # For local file system we can explicitly check to see if this 

365 # really is a directory. The URI might point to a location that 

366 # does not exists yet but all that matters is if it is a directory 

367 # then we make sure use that fact. No need to do the check if 

368 # we are already being told. 

369 if not forceDirectory and posixpath.isdir(parsed.path): 

370 forceDirectory = True 

371 

372 # For an absolute path all we need to do is check if we need 

373 # to force the directory separator 

374 if posixpath.isabs(parsed.path): 

375 if forceDirectory: 

376 if not parsed.path.endswith(sep): 

377 parsed = parsed._replace(path=parsed.path+sep) 

378 dirLike = True 

379 return copy.copy(parsed), dirLike 

380 

381 # Relative path so must fix it to be compliant with the standard 

382 

383 # Replacement values for the URI 

384 replacements = {} 

385 

386 if root is None: 

387 root = os.path.abspath(os.path.curdir) 

388 elif isinstance(root, ButlerURI): 

389 if root.scheme and root.scheme != "file": 

390 raise RuntimeError(f"The override root must be a file URI not {root.scheme}") 

391 root = os.path.abspath(root.ospath) 

392 

393 replacements["path"] = posixpath.normpath(posixpath.join(os2posix(root), parsed.path)) 

394 

395 # normpath strips trailing "/" so put it back if necessary 

396 # Acknowledge that trailing separator exists. 

397 if forceDirectory or (parsed.path.endswith(sep) and not replacements["path"].endswith(sep)): 

398 replacements["path"] += sep 

399 dirLike = True 

400 

401 # ParseResult is a NamedTuple so _replace is standard API 

402 parsed = parsed._replace(**replacements) 

403 

404 if parsed.params or parsed.query: 

405 log.warning("Additional items unexpectedly encountered in file URI: %s", parsed.geturl()) 

406 

407 return parsed, dirLike