Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24import os 

25import os.path 

26import shutil 

27import urllib.parse 

28import posixpath 

29import copy 

30import logging 

31import re 

32 

33__all__ = ('ButlerFileURI',) 

34 

35from typing import ( 

36 TYPE_CHECKING, 

37 cast, 

38 Iterator, 

39 List, 

40 Optional, 

41 Tuple, 

42 Union, 

43) 

44 

45from ..utils import safeMakeDir 

46from .utils import NoTransaction, os2posix, posix2os 

47from ._butlerUri import ButlerURI 

48 

49 

50if TYPE_CHECKING: 50 ↛ 51line 50 didn't jump to line 51, because the condition on line 50 was never true

51 from ..datastore import DatastoreTransaction 

52 

53 

54log = logging.getLogger(__name__) 

55 

56 

57class ButlerFileURI(ButlerURI): 

58 """URI for explicit ``file`` scheme.""" 

59 

60 transferModes = ("copy", "link", "symlink", "hardlink", "relsymlink", "auto", "move") 

61 transferDefault: str = "link" 

62 

63 # By definition refers to a local file 

64 isLocal = True 

65 

66 @property 

67 def ospath(self) -> str: 

68 """Path component of the URI localized to current OS. 

69 

70 Will unquote URI path since a formal URI must include the quoting. 

71 """ 

72 return urllib.parse.unquote(posix2os(self._uri.path)) 

73 

74 def exists(self) -> bool: 

75 """Indicate that the file exists.""" 

76 # Uses os.path.exists so if there is a soft link that points 

77 # to a file that no longer exists this will return False 

78 return os.path.exists(self.ospath) 

79 

80 def size(self) -> int: 

81 """Return the size of the file in bytes.""" 

82 if not os.path.isdir(self.ospath): 

83 stat = os.stat(self.ospath) 

84 sz = stat.st_size 

85 else: 

86 sz = 0 

87 return sz 

88 

89 def remove(self) -> None: 

90 """Remove the resource.""" 

91 os.remove(self.ospath) 

92 

93 def _as_local(self) -> Tuple[str, bool]: 

94 """Return the local path of the file. 

95 

96 This is an internal helper for ``as_local()``. 

97 

98 Returns 

99 ------- 

100 path : `str` 

101 The local path to this file. 

102 temporary : `bool` 

103 Always returns `False` (this is not a temporary file). 

104 """ 

105 return self.ospath, False 

106 

107 def _force_to_file(self) -> ButlerFileURI: 

108 """Force a schemeless URI to a file URI and returns a new URI. 

109 

110 Returns 

111 ------- 

112 file : `ButlerFileURI` 

113 A copy of the URI using file scheme. If already a file scheme 

114 the copy will be identical. 

115 

116 Raises 

117 ------ 

118 ValueError 

119 Raised if this URI is schemeless and relative path and so can 

120 not be forced to file absolute path without context. 

121 """ 

122 # This is always a file scheme so always return copy 

123 return copy.copy(self) 

124 

125 def relative_to(self, other: ButlerURI) -> Optional[str]: 

126 """Return the relative path from this URI to the other URI. 

127 

128 Parameters 

129 ---------- 

130 other : `ButlerURI` 

131 URI to use to calculate the relative path. Must be a parent 

132 of this URI. 

133 

134 Returns 

135 ------- 

136 subpath : `str` 

137 The sub path of this URI relative to the supplied other URI. 

138 Returns `None` if there is no parent child relationship. 

139 Scheme and netloc must match but for file URIs schemeless 

140 is also used. If this URI is a relative URI but the other is 

141 absolute, it is assumed to be in the parent completely unless it 

142 starts with ".." (in which case the path is combined and tested). 

143 If both URIs are relative, the relative paths are compared 

144 for commonality. 

145 

146 Notes 

147 ----- 

148 By definition a relative path will be relative to the enclosing 

149 absolute parent URI. It will be returned unchanged if it does not 

150 use a parent directory specification. 

151 """ 

152 # We know self is a file so check the other. Anything other than 

153 # file or schemeless means by definition these have no paths in common 

154 if other.scheme and other.scheme != "file": 

155 return None 

156 

157 # for case where both URIs are relative use the normal logic 

158 # where a/b/c.txt and a/b/ returns c.txt. 

159 if not self.isabs() and not other.isabs(): 

160 return super().relative_to(other) 

161 

162 # if we have a relative path convert it to absolute 

163 # relative to the supplied parent. This is solely to handle 

164 # the case where the relative path includes ".." but somehow 

165 # then goes back inside the directory of the parent 

166 if not self.isabs(): 

167 childUri = other.join(self.path) 

168 return childUri.relative_to(other) 

169 

170 # By this point if the schemes are identical we can use the 

171 # base class implementation. 

172 if self.scheme == other.scheme: 

173 return super().relative_to(other) 

174 

175 # if one is schemeless and the other is not the base implementation 

176 # will fail so we need to fix that -- they are both absolute so 

177 # forcing to file is fine. 

178 # Use a cast to convince mypy that other has to be a ButlerFileURI 

179 # in order to get to this part of the code. 

180 return self._force_to_file().relative_to(cast(ButlerFileURI, other)._force_to_file()) 

181 

182 def read(self, size: int = -1) -> bytes: 

183 """Return the entire content of the file as bytes.""" 

184 with open(self.ospath, "rb") as fh: 

185 return fh.read(size) 

186 

187 def write(self, data: bytes, overwrite: bool = True) -> None: 

188 """Write the supplied data to the file.""" 

189 dir = os.path.dirname(self.ospath) 

190 if not os.path.exists(dir): 

191 safeMakeDir(dir) 

192 if overwrite: 

193 mode = "wb" 

194 else: 

195 mode = "xb" 

196 with open(self.ospath, mode) as f: 

197 f.write(data) 

198 

199 def mkdir(self) -> None: 

200 """Make the directory associated with this URI.""" 

201 if not os.path.exists(self.ospath): 

202 safeMakeDir(self.ospath) 

203 elif not os.path.isdir(self.ospath): 

204 raise FileExistsError(f"URI {self} exists but is not a directory!") 

205 

206 def isdir(self) -> bool: 

207 """Return whether this URI is a directory. 

208 

209 Returns 

210 ------- 

211 isdir : `bool` 

212 `True` if this URI is a directory or looks like a directory, 

213 else `False`. 

214 """ 

215 return self.dirLike or os.path.isdir(self.ospath) 

216 

217 def transfer_from(self, src: ButlerURI, transfer: str, 

218 overwrite: bool = False, 

219 transaction: Optional[Union[DatastoreTransaction, NoTransaction]] = None) -> None: 

220 """Transfer the current resource to a local file. 

221 

222 Parameters 

223 ---------- 

224 src : `ButlerURI` 

225 Source URI. 

226 transfer : `str` 

227 Mode to use for transferring the resource. Supports the following 

228 options: copy, link, symlink, hardlink, relsymlink. 

229 overwrite : `bool`, optional 

230 Allow an existing file to be overwritten. Defaults to `False`. 

231 transaction : `DatastoreTransaction`, optional 

232 If a transaction is provided, undo actions will be registered. 

233 """ 

234 # Fail early to prevent delays if remote resources are requested 

235 if transfer not in self.transferModes: 

236 raise ValueError(f"Transfer mode '{transfer}' not supported by URI scheme {self.scheme}") 

237 

238 log.debug(f"Transferring {src} [exists: {src.exists()}] -> " 

239 f"{self} [exists: {self.exists()}] (transfer={transfer})") 

240 

241 # We do not have to special case ButlerFileURI here because 

242 # as_local handles that. 

243 with src.as_local() as local_uri: 

244 is_temporary = local_uri.isTemporary 

245 local_src = local_uri.ospath 

246 

247 # Default transfer mode depends on whether we have a temporary 

248 # file or not. 

249 if transfer == "auto": 

250 transfer = self.transferDefault if not is_temporary else "copy" 

251 

252 if not os.path.exists(local_src): 

253 if is_temporary: 

254 msg = f"Local file {local_uri} downloaded from {src} has gone missing" 

255 else: 

256 msg = f"Source URI {src} does not exist" 

257 raise FileNotFoundError(msg) 

258 

259 # Follow soft links 

260 local_src = os.path.realpath(os.path.normpath(local_src)) 

261 

262 # All the modes involving linking use "link" somewhere 

263 if "link" in transfer and is_temporary: 

264 raise RuntimeError("Can not use local file system transfer mode" 

265 f" {transfer} for remote resource ({src})") 

266 

267 # For temporary files we can own them 

268 requested_transfer = transfer 

269 if is_temporary and transfer == "copy": 

270 transfer = "move" 

271 

272 # The output location should not exist 

273 dest_exists = self.exists() 

274 if not overwrite and dest_exists: 

275 raise FileExistsError(f"Destination path '{self}' already exists. Transfer " 

276 f"from {src} cannot be completed.") 

277 

278 # Make the path absolute (but don't follow links since that 

279 # would possibly cause us to end up in the wrong place if the 

280 # file existed already as a soft link) 

281 newFullPath = os.path.abspath(self.ospath) 

282 outputDir = os.path.dirname(newFullPath) 

283 if not os.path.isdir(outputDir): 

284 # Must create the directory -- this can not be rolled back 

285 # since another transfer running concurrently may 

286 # be relying on this existing. 

287 safeMakeDir(outputDir) 

288 

289 if transaction is None: 

290 # Use a no-op transaction to reduce code duplication 

291 transaction = NoTransaction() 

292 

293 # For links the OS doesn't let us overwrite so if something does 

294 # exist we have to remove it before we do the actual "transfer" 

295 # below 

296 if "link" in transfer and overwrite and dest_exists: 

297 try: 

298 self.remove() 

299 except Exception: 

300 # If this fails we ignore it since it's a problem 

301 # that will manifest immediately below with a more relevant 

302 # error message 

303 pass 

304 

305 if transfer == "move": 

306 with transaction.undoWith(f"move from {local_src}", shutil.move, newFullPath, local_src): 

307 shutil.move(local_src, newFullPath) 

308 elif transfer == "copy": 

309 with transaction.undoWith(f"copy from {local_src}", os.remove, newFullPath): 

310 shutil.copy(local_src, newFullPath) 

311 elif transfer == "link": 

312 # Try hard link and if that fails use a symlink 

313 with transaction.undoWith(f"link to {local_src}", os.remove, newFullPath): 

314 try: 

315 os.link(local_src, newFullPath) 

316 except OSError: 

317 # Read through existing symlinks 

318 os.symlink(local_src, newFullPath) 

319 elif transfer == "hardlink": 

320 with transaction.undoWith(f"hardlink to {local_src}", os.remove, newFullPath): 

321 os.link(local_src, newFullPath) 

322 elif transfer == "symlink": 

323 # Read through existing symlinks 

324 with transaction.undoWith(f"symlink to {local_src}", os.remove, newFullPath): 

325 os.symlink(local_src, newFullPath) 

326 elif transfer == "relsymlink": 

327 # This is a standard symlink but using a relative path 

328 # Need the directory name to give to relative root 

329 # A full file path confuses it into an extra ../ 

330 newFullPathRoot = os.path.dirname(newFullPath) 

331 relPath = os.path.relpath(local_src, newFullPathRoot) 

332 with transaction.undoWith(f"relsymlink to {local_src}", os.remove, newFullPath): 

333 os.symlink(relPath, newFullPath) 

334 else: 

335 raise NotImplementedError("Transfer type '{}' not supported.".format(transfer)) 

336 

337 # This was an explicit move requested from a remote resource 

338 # try to remove that remote resource. We check is_temporary because 

339 # the local file would have been moved by shutil.move already. 

340 if requested_transfer == "move" and is_temporary: 

341 # Transactions do not work here 

342 src.remove() 

343 

344 def walk(self, file_filter: Optional[Union[str, re.Pattern]] = None) -> Iterator[Union[List, 

345 Tuple[ButlerURI, 

346 List[str], 

347 List[str]]]]: 

348 """Walk the directory tree returning matching files and directories. 

349 

350 Parameters 

351 ---------- 

352 file_filter : `str` or `re.Pattern`, optional 

353 Regex to filter out files from the list before it is returned. 

354 

355 Yields 

356 ------ 

357 dirpath : `ButlerURI` 

358 Current directory being examined. 

359 dirnames : `list` of `str` 

360 Names of subdirectories within dirpath. 

361 filenames : `list` of `str` 

362 Names of all the files within dirpath. 

363 """ 

364 if not self.isdir(): 

365 raise ValueError("Can not walk a non-directory URI") 

366 

367 if isinstance(file_filter, str): 

368 file_filter = re.compile(file_filter) 

369 

370 for root, dirs, files in os.walk(self.ospath): 

371 # Filter by the regex 

372 if file_filter is not None: 

373 files = [f for f in files if file_filter.search(f)] 

374 yield type(self)(root, forceAbsolute=False, forceDirectory=True), dirs, files 

375 

376 @classmethod 

377 def _fixupPathUri(cls, parsed: urllib.parse.ParseResult, root: Optional[Union[str, ButlerURI]] = None, 

378 forceAbsolute: bool = False, 

379 forceDirectory: bool = False) -> Tuple[urllib.parse.ParseResult, bool]: 

380 """Fix up relative paths in URI instances. 

381 

382 Parameters 

383 ---------- 

384 parsed : `~urllib.parse.ParseResult` 

385 The result from parsing a URI using `urllib.parse`. 

386 root : `str` or `ButlerURI`, optional 

387 Path to use as root when converting relative to absolute. 

388 If `None`, it will be the current working directory. This 

389 is a local file system path, or a file URI. It is only used if 

390 a file-scheme is used incorrectly with a relative path. 

391 forceAbsolute : `bool`, ignored 

392 Has no effect for this subclass. ``file`` URIs are always 

393 absolute. 

394 forceDirectory : `bool`, optional 

395 If `True` forces the URI to end with a separator, otherwise given 

396 URI is interpreted as is. 

397 

398 Returns 

399 ------- 

400 modified : `~urllib.parse.ParseResult` 

401 Update result if a URI is being handled. 

402 dirLike : `bool` 

403 `True` if given parsed URI has a trailing separator or 

404 forceDirectory is True. Otherwise `False`. 

405 

406 Notes 

407 ----- 

408 Relative paths are explicitly not supported by RFC8089 but `urllib` 

409 does accept URIs of the form ``file:relative/path.ext``. They need 

410 to be turned into absolute paths before they can be used. This is 

411 always done regardless of the ``forceAbsolute`` parameter. 

412 """ 

413 # assume we are not dealing with a directory like URI 

414 dirLike = False 

415 

416 # file URI implies POSIX path separators so split as POSIX, 

417 # then join as os, and convert to abspath. Do not handle 

418 # home directories since "file" scheme is explicitly documented 

419 # to not do tilde expansion. 

420 sep = posixpath.sep 

421 

422 # For local file system we can explicitly check to see if this 

423 # really is a directory. The URI might point to a location that 

424 # does not exists yet but all that matters is if it is a directory 

425 # then we make sure use that fact. No need to do the check if 

426 # we are already being told. 

427 if not forceDirectory and posixpath.isdir(parsed.path): 

428 forceDirectory = True 

429 

430 # For an absolute path all we need to do is check if we need 

431 # to force the directory separator 

432 if posixpath.isabs(parsed.path): 

433 if forceDirectory: 

434 if not parsed.path.endswith(sep): 

435 parsed = parsed._replace(path=parsed.path+sep) 

436 dirLike = True 

437 return copy.copy(parsed), dirLike 

438 

439 # Relative path so must fix it to be compliant with the standard 

440 

441 # Replacement values for the URI 

442 replacements = {} 

443 

444 if root is None: 

445 root = os.path.abspath(os.path.curdir) 

446 elif isinstance(root, ButlerURI): 

447 if root.scheme and root.scheme != "file": 

448 raise RuntimeError(f"The override root must be a file URI not {root.scheme}") 

449 root = os.path.abspath(root.ospath) 

450 

451 replacements["path"] = posixpath.normpath(posixpath.join(os2posix(root), parsed.path)) 

452 

453 # normpath strips trailing "/" so put it back if necessary 

454 # Acknowledge that trailing separator exists. 

455 if forceDirectory or (parsed.path.endswith(sep) and not replacements["path"].endswith(sep)): 

456 replacements["path"] += sep 

457 dirLike = True 

458 

459 # ParseResult is a NamedTuple so _replace is standard API 

460 parsed = parsed._replace(**replacements) 

461 

462 if parsed.params or parsed.query: 

463 log.warning("Additional items unexpectedly encountered in file URI: %s", parsed.geturl()) 

464 

465 return parsed, dirLike