Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("ButlerURI",) 

25 

26import contextlib 

27import os 

28import os.path 

29import shutil 

30import urllib 

31import pkg_resources 

32import posixpath 

33from pathlib import Path, PurePath, PurePosixPath 

34import requests 

35import tempfile 

36import copy 

37import logging 

38import re 

39 

40from typing import ( 

41 TYPE_CHECKING, 

42 Any, 

43 Callable, 

44 cast, 

45 Iterator, 

46 Optional, 

47 Tuple, 

48 Type, 

49 Union, 

50) 

51 

52from .utils import safeMakeDir 

53 

54if TYPE_CHECKING: 54 ↛ 55line 54 didn't jump to line 55, because the condition on line 54 was never true

55 try: 

56 import boto3 

57 except ImportError: 

58 pass 

59 from .datastore import DatastoreTransaction 

60 

61 

62log = logging.getLogger(__name__) 

63 

64# Determine if the path separator for the OS looks like POSIX 

65IS_POSIX = os.sep == posixpath.sep 

66 

67# Root path for this operating system 

68OS_ROOT_PATH = Path().resolve().root 

69 

70# Regex for looking for URI escapes 

71ESCAPES_RE = re.compile(r"%[A-F0-9]{2}") 

72 

73 

74def os2posix(ospath: str) -> str: 

75 """Convert a local path description to a POSIX path description. 

76 

77 Parameters 

78 ---------- 

79 ospath : `str` 

80 Path using the local path separator. 

81 

82 Returns 

83 ------- 

84 posix : `str` 

85 Path using POSIX path separator 

86 """ 

87 if IS_POSIX: 

88 return ospath 

89 

90 posix = PurePath(ospath).as_posix() 

91 

92 # PurePath strips trailing "/" from paths such that you can no 

93 # longer tell if a path is meant to be referring to a directory 

94 # Try to fix this. 

95 if ospath.endswith(os.sep) and not posix.endswith(posixpath.sep): 

96 posix += posixpath.sep 

97 

98 return posix 

99 

100 

101def posix2os(posix: Union[PurePath, str]) -> str: 

102 """Convert a POSIX path description to a local path description. 

103 

104 Parameters 

105 ---------- 

106 posix : `str`, `PurePath` 

107 Path using the POSIX path separator. 

108 

109 Returns 

110 ------- 

111 ospath : `str` 

112 Path using OS path separator 

113 """ 

114 if IS_POSIX: 

115 return str(posix) 

116 

117 posixPath = PurePosixPath(posix) 

118 paths = list(posixPath.parts) 

119 

120 # Have to convert the root directory after splitting 

121 if paths[0] == posixPath.root: 

122 paths[0] = OS_ROOT_PATH 

123 

124 # Trailing "/" is stripped so we need to add back an empty path 

125 # for consistency 

126 if str(posix).endswith(posixpath.sep): 

127 paths.append("") 

128 

129 return os.path.join(*paths) 

130 

131 

132class NoTransaction: 

133 """A simple emulation of the `DatastoreTransaction` class. 

134 

135 Does nothing. 

136 """ 

137 

138 def __init__(self) -> None: 

139 return 

140 

141 @contextlib.contextmanager 

142 def undoWith(self, name: str, undoFunc: Callable, *args: Any, **kwargs: Any) -> Iterator[None]: 

143 """No-op context manager to replace `DatastoreTransaction` 

144 """ 

145 yield None 

146 

147 

148class ButlerURI: 

149 """Convenience wrapper around URI parsers. 

150 

151 Provides access to URI components and can convert file 

152 paths into absolute path URIs. Scheme-less URIs are treated as if 

153 they are local file system paths and are converted to absolute URIs. 

154 

155 A specialist subclass is created for each supported URI scheme. 

156 

157 Parameters 

158 ---------- 

159 uri : `str` or `urllib.parse.ParseResult` 

160 URI in string form. Can be scheme-less if referring to a local 

161 filesystem path. 

162 root : `str` or `ButlerURI`, optional 

163 When fixing up a relative path in a ``file`` scheme or if scheme-less, 

164 use this as the root. Must be absolute. If `None` the current 

165 working directory will be used. Can be a file URI. 

166 forceAbsolute : `bool`, optional 

167 If `True`, scheme-less relative URI will be converted to an absolute 

168 path using a ``file`` scheme. If `False` scheme-less URI will remain 

169 scheme-less and will not be updated to ``file`` or absolute path. 

170 forceDirectory: `bool`, optional 

171 If `True` forces the URI to end with a separator, otherwise given URI 

172 is interpreted as is. 

173 """ 

174 

175 _pathLib: Type[PurePath] = PurePosixPath 

176 """Path library to use for this scheme.""" 

177 

178 _pathModule = posixpath 

179 """Path module to use for this scheme.""" 

180 

181 transferModes: Tuple[str, ...] = ("copy", "auto", "move") 

182 """Transfer modes supported by this implementation. 

183 

184 Move is special in that it is generally a copy followed by an unlink. 

185 Whether that unlink works depends critically on whether the source URI 

186 implements unlink. If it does not the move will be reported as a failure. 

187 """ 

188 

189 transferDefault: str = "copy" 

190 """Default mode to use for transferring if ``auto`` is specified.""" 

191 

192 quotePaths = True 

193 """True if path-like elements modifying a URI should be quoted. 

194 

195 All non-schemeless URIs have to internally use quoted paths. Therefore 

196 if a new file name is given (e.g. to updateFile or join) a decision must 

197 be made whether to quote it to be consistent. 

198 """ 

199 

200 # This is not an ABC with abstract methods because the __new__ being 

201 # a factory confuses mypy such that it assumes that every constructor 

202 # returns a ButlerURI and then determines that all the abstract methods 

203 # are still abstract. If they are not marked abstract but just raise 

204 # mypy is fine with it. 

205 

206 # mypy is confused without this 

207 _uri: urllib.parse.ParseResult 

208 

209 def __new__(cls, uri: Union[str, urllib.parse.ParseResult, ButlerURI], 

210 root: Optional[Union[str, ButlerURI]] = None, forceAbsolute: bool = True, 

211 forceDirectory: bool = False) -> ButlerURI: 

212 parsed: urllib.parse.ParseResult 

213 dirLike: bool 

214 subclass: Optional[Type] = None 

215 

216 # Record if we need to post process the URI components 

217 # or if the instance is already fully configured 

218 if isinstance(uri, str): 

219 # Since local file names can have special characters in them 

220 # we need to quote them for the parser but we can unquote 

221 # later. Assume that all other URI schemes are quoted. 

222 # Since sometimes people write file:/a/b and not file:///a/b 

223 # we should not quote in the explicit case of file: 

224 if "://" not in uri and not uri.startswith("file:"): 

225 if ESCAPES_RE.search(uri): 

226 log.warning("Possible double encoding of %s", uri) 

227 else: 

228 uri = urllib.parse.quote(uri) 

229 parsed = urllib.parse.urlparse(uri) 

230 elif isinstance(uri, urllib.parse.ParseResult): 

231 parsed = copy.copy(uri) 

232 elif isinstance(uri, ButlerURI): 

233 parsed = copy.copy(uri._uri) 

234 dirLike = uri.dirLike 

235 # No further parsing required and we know the subclass 

236 subclass = type(uri) 

237 else: 

238 raise ValueError(f"Supplied URI must be string, ButlerURI, or ParseResult but got '{uri!r}'") 

239 

240 if subclass is None: 

241 # Work out the subclass from the URI scheme 

242 if not parsed.scheme: 

243 subclass = ButlerSchemelessURI 

244 elif parsed.scheme == "file": 

245 subclass = ButlerFileURI 

246 elif parsed.scheme == "s3": 

247 subclass = ButlerS3URI 

248 elif parsed.scheme.startswith("http"): 

249 subclass = ButlerHttpURI 

250 elif parsed.scheme == "resource": 

251 # Rules for scheme names disasllow pkg_resource 

252 subclass = ButlerPackageResourceURI 

253 elif parsed.scheme == "mem": 

254 # in-memory datastore object 

255 subclass = ButlerInMemoryURI 

256 else: 

257 raise NotImplementedError(f"No URI support for scheme: '{parsed.scheme}'" 

258 " in {parsed.geturl()}") 

259 

260 parsed, dirLike = subclass._fixupPathUri(parsed, root=root, 

261 forceAbsolute=forceAbsolute, 

262 forceDirectory=forceDirectory) 

263 

264 # It is possible for the class to change from schemeless 

265 # to file so handle that 

266 if parsed.scheme == "file": 

267 subclass = ButlerFileURI 

268 

269 # Now create an instance of the correct subclass and set the 

270 # attributes directly 

271 self = object.__new__(subclass) 

272 self._uri = parsed 

273 self.dirLike = dirLike 

274 return self 

275 

276 @property 

277 def scheme(self) -> str: 

278 """The URI scheme (``://`` is not part of the scheme).""" 

279 return self._uri.scheme 

280 

281 @property 

282 def netloc(self) -> str: 

283 """The URI network location.""" 

284 return self._uri.netloc 

285 

286 @property 

287 def path(self) -> str: 

288 """The path component of the URI.""" 

289 return self._uri.path 

290 

291 @property 

292 def unquoted_path(self) -> str: 

293 """The path component of the URI with any URI quoting reversed.""" 

294 return urllib.parse.unquote(self._uri.path) 

295 

296 @property 

297 def ospath(self) -> str: 

298 """Path component of the URI localized to current OS.""" 

299 raise AttributeError(f"Non-file URI ({self}) has no local OS path.") 

300 

301 @property 

302 def relativeToPathRoot(self) -> str: 

303 """Returns path relative to network location. 

304 

305 Effectively, this is the path property with posix separator stripped 

306 from the left hand side of the path. 

307 

308 Always unquotes. 

309 """ 

310 p = self._pathLib(self.path) 

311 relToRoot = str(p.relative_to(p.root)) 

312 if self.dirLike and not relToRoot.endswith("/"): 

313 relToRoot += "/" 

314 return urllib.parse.unquote(relToRoot) 

315 

316 @property 

317 def fragment(self) -> str: 

318 """The fragment component of the URI.""" 

319 return self._uri.fragment 

320 

321 @property 

322 def params(self) -> str: 

323 """Any parameters included in the URI.""" 

324 return self._uri.params 

325 

326 @property 

327 def query(self) -> str: 

328 """Any query strings included in the URI.""" 

329 return self._uri.query 

330 

331 def geturl(self) -> str: 

332 """Return the URI in string form. 

333 

334 Returns 

335 ------- 

336 url : `str` 

337 String form of URI. 

338 """ 

339 return self._uri.geturl() 

340 

341 def split(self) -> Tuple[ButlerURI, str]: 

342 """Splits URI into head and tail. Equivalent to os.path.split where 

343 head preserves the URI components. 

344 

345 Returns 

346 ------- 

347 head: `ButlerURI` 

348 Everything leading up to tail, expanded and normalized as per 

349 ButlerURI rules. 

350 tail : `str` 

351 Last `self.path` component. Tail will be empty if path ends on a 

352 separator. Tail will never contain separators. It will be 

353 unquoted. 

354 """ 

355 head, tail = self._pathModule.split(self.path) 

356 headuri = self._uri._replace(path=head) 

357 

358 # The file part should never include quoted metacharacters 

359 tail = urllib.parse.unquote(tail) 

360 

361 # Schemeless is special in that it can be a relative path 

362 # We need to ensure that it stays that way. All other URIs will 

363 # be absolute already. 

364 forceAbsolute = self._pathModule.isabs(self.path) 

365 return ButlerURI(headuri, forceDirectory=True, forceAbsolute=forceAbsolute), tail 

366 

367 def basename(self) -> str: 

368 """Returns the base name, last element of path, of the URI. If URI ends 

369 on a slash returns an empty string. This is the second element returned 

370 by split(). 

371 

372 Equivalent of os.path.basename(). 

373 

374 Returns 

375 ------- 

376 tail : `str` 

377 Last part of the path attribute. Trail will be empty if path ends 

378 on a separator. 

379 """ 

380 return self.split()[1] 

381 

382 def dirname(self) -> ButlerURI: 

383 """Returns a ButlerURI containing all the directories of the path 

384 attribute. 

385 

386 Equivalent of os.path.dirname() 

387 

388 Returns 

389 ------- 

390 head : `ButlerURI` 

391 Everything except the tail of path attribute, expanded and 

392 normalized as per ButlerURI rules. 

393 """ 

394 return self.split()[0] 

395 

396 def replace(self, **kwargs: Any) -> ButlerURI: 

397 """Replace components in a URI with new values and return a new 

398 instance. 

399 

400 Returns 

401 ------- 

402 new : `ButlerURI` 

403 New `ButlerURI` object with updated values. 

404 """ 

405 return self.__class__(self._uri._replace(**kwargs)) 

406 

407 def updateFile(self, newfile: str) -> None: 

408 """Update in place the final component of the path with the supplied 

409 file name. 

410 

411 Parameters 

412 ---------- 

413 newfile : `str` 

414 File name with no path component. 

415 

416 Notes 

417 ----- 

418 Updates the URI in place. 

419 Updates the ButlerURI.dirLike attribute. The new file path will 

420 be quoted if necessary. 

421 """ 

422 if self.quotePaths: 

423 newfile = urllib.parse.quote(newfile) 

424 dir, _ = self._pathModule.split(self.path) 

425 newpath = self._pathModule.join(dir, newfile) 

426 

427 self.dirLike = False 

428 self._uri = self._uri._replace(path=newpath) 

429 

430 def getExtension(self) -> str: 

431 """Return the file extension(s) associated with this URI path. 

432 

433 Returns 

434 ------- 

435 ext : `str` 

436 The file extension (including the ``.``). Can be empty string 

437 if there is no file extension. Usually returns only the last 

438 file extension unless there is a special extension modifier 

439 indicating file compression, in which case the combined 

440 extension (e.g. ``.fits.gz``) will be returned. 

441 """ 

442 special = {".gz", ".bz2", ".xz", ".fz"} 

443 

444 extensions = self._pathLib(self.path).suffixes 

445 

446 if not extensions: 

447 return "" 

448 

449 ext = extensions.pop() 

450 

451 # Multiple extensions, decide whether to include the final two 

452 if extensions and ext in special: 

453 ext = f"{extensions[-1]}{ext}" 

454 

455 return ext 

456 

457 def join(self, path: str) -> ButlerURI: 

458 """Create a new `ButlerURI` with additional path components including 

459 a file. 

460 

461 Parameters 

462 ---------- 

463 path : `str` 

464 Additional file components to append to the current URI. Assumed 

465 to include a file at the end. Will be quoted depending on the 

466 associated URI scheme. 

467 

468 Returns 

469 ------- 

470 new : `ButlerURI` 

471 New URI with any file at the end replaced with the new path 

472 components. 

473 

474 Notes 

475 ----- 

476 Schemeless URIs assume local path separator but all other URIs assume 

477 POSIX separator if the supplied path has directory structure. It 

478 may be this never becomes a problem but datastore templates assume 

479 POSIX separator is being used. 

480 """ 

481 new = self.dirname() # By definition a directory URI 

482 

483 # new should be asked about quoting, not self, since dirname can 

484 # change the URI scheme for schemeless -> file 

485 if new.quotePaths: 

486 path = urllib.parse.quote(path) 

487 

488 newpath = self._pathModule.normpath(self._pathModule.join(new.path, path)) 

489 new._uri = new._uri._replace(path=newpath) 

490 # Declare the new URI not be dirLike unless path ended in / 

491 if not path.endswith(self._pathModule.sep): 

492 new.dirLike = False 

493 return new 

494 

495 def relative_to(self, other: ButlerURI) -> Optional[str]: 

496 """Return the relative path from this URI to the other URI. 

497 

498 Parameters 

499 ---------- 

500 other : `ButlerURI` 

501 URI to use to calculate the relative path. Must be a parent 

502 of this URI. 

503 

504 Returns 

505 ------- 

506 subpath : `str` 

507 The sub path of this URI relative to the supplied other URI. 

508 Returns `None` if there is no parent child relationship. 

509 Scheme and netloc must match. 

510 """ 

511 if self.scheme != other.scheme or self.netloc != other.netloc: 

512 return None 

513 

514 enclosed_path = self._pathLib(self.relativeToPathRoot) 

515 parent_path = other.relativeToPathRoot 

516 subpath: Optional[str] 

517 try: 

518 subpath = str(enclosed_path.relative_to(parent_path)) 

519 except ValueError: 

520 subpath = None 

521 else: 

522 subpath = urllib.parse.unquote(subpath) 

523 return subpath 

524 

525 def exists(self) -> bool: 

526 """Indicate that the resource is available. 

527 

528 Returns 

529 ------- 

530 exists : `bool` 

531 `True` if the resource exists. 

532 """ 

533 raise NotImplementedError() 

534 

535 def remove(self) -> None: 

536 """Remove the resource.""" 

537 raise NotImplementedError() 

538 

539 def isabs(self) -> bool: 

540 """Indicate that the resource is fully specified. 

541 

542 For non-schemeless URIs this is always true. 

543 

544 Returns 

545 ------- 

546 isabs : `bool` 

547 `True` in all cases except schemeless URI. 

548 """ 

549 return True 

550 

551 def as_local(self) -> Tuple[str, bool]: 

552 """Return the location of the (possibly remote) resource in the 

553 local file system. 

554 

555 Returns 

556 ------- 

557 path : `str` 

558 If this is a remote resource, it will be a copy of the resource 

559 on the local file system, probably in a temporary directory. 

560 For a local resource this should be the actual path to the 

561 resource. 

562 is_temporary : `bool` 

563 Indicates if the local path is a temporary file or not. 

564 """ 

565 raise NotImplementedError() 

566 

567 def read(self, size: int = -1) -> bytes: 

568 """Open the resource and return the contents in bytes. 

569 

570 Parameters 

571 ---------- 

572 size : `int`, optional 

573 The number of bytes to read. Negative or omitted indicates 

574 that all data should be read. 

575 """ 

576 raise NotImplementedError() 

577 

578 def write(self, data: bytes, overwrite: bool = True) -> None: 

579 """Write the supplied bytes to the new resource. 

580 

581 Parameters 

582 ---------- 

583 data : `bytes` 

584 The bytes to write to the resource. The entire contents of the 

585 resource will be replaced. 

586 overwrite : `bool`, optional 

587 If `True` the resource will be overwritten if it exists. Otherwise 

588 the write will fail. 

589 """ 

590 raise NotImplementedError() 

591 

592 def mkdir(self) -> None: 

593 """For a dir-like URI, create the directory resource if it does not 

594 already exist. 

595 """ 

596 raise NotImplementedError() 

597 

598 def size(self) -> int: 

599 """For non-dir-like URI, return the size of the resource. 

600 

601 Returns 

602 ------- 

603 sz : `int` 

604 The size in bytes of the resource associated with this URI. 

605 Returns 0 if dir-like. 

606 """ 

607 raise NotImplementedError() 

608 

609 def __str__(self) -> str: 

610 return self.geturl() 

611 

612 def __repr__(self) -> str: 

613 return f'ButlerURI("{self.geturl()}")' 

614 

615 def __eq__(self, other: Any) -> bool: 

616 if not isinstance(other, ButlerURI): 

617 return False 

618 return self.geturl() == other.geturl() 

619 

620 def __copy__(self) -> ButlerURI: 

621 # Implement here because the __new__ method confuses things 

622 return type(self)(str(self)) 

623 

624 def __deepcopy__(self, memo: Any) -> ButlerURI: 

625 # Implement here because the __new__ method confuses things 

626 return self.__copy__() 

627 

628 def __getnewargs__(self) -> Tuple: 

629 return (str(self),) 

630 

631 @staticmethod 

632 def _fixupPathUri(parsed: urllib.parse.ParseResult, root: Optional[Union[str, ButlerURI]] = None, 

633 forceAbsolute: bool = False, 

634 forceDirectory: bool = False) -> Tuple[urllib.parse.ParseResult, bool]: 

635 """Correct any issues with the supplied URI. 

636 

637 Parameters 

638 ---------- 

639 parsed : `~urllib.parse.ParseResult` 

640 The result from parsing a URI using `urllib.parse`. 

641 root : `str` or `ButlerURI`, ignored 

642 Not used by the this implementation since all URIs are 

643 absolute except for those representing the local file system. 

644 forceAbsolute : `bool`, ignored. 

645 Not used by this implementation. URIs are generally always 

646 absolute. 

647 forceDirectory : `bool`, optional 

648 If `True` forces the URI to end with a separator, otherwise given 

649 URI is interpreted as is. Specifying that the URI is conceptually 

650 equivalent to a directory can break some ambiguities when 

651 interpreting the last element of a path. 

652 

653 Returns 

654 ------- 

655 modified : `~urllib.parse.ParseResult` 

656 Update result if a URI is being handled. 

657 dirLike : `bool` 

658 `True` if given parsed URI has a trailing separator or 

659 forceDirectory is True. Otherwise `False`. 

660 

661 Notes 

662 ----- 

663 Relative paths are explicitly not supported by RFC8089 but `urllib` 

664 does accept URIs of the form ``file:relative/path.ext``. They need 

665 to be turned into absolute paths before they can be used. This is 

666 always done regardless of the ``forceAbsolute`` parameter. 

667 

668 AWS S3 differentiates between keys with trailing POSIX separators (i.e 

669 `/dir` and `/dir/`) whereas POSIX does not neccessarily. 

670 

671 Scheme-less paths are normalized. 

672 """ 

673 # assume we are not dealing with a directory like URI 

674 dirLike = False 

675 

676 # URI is dir-like if explicitly stated or if it ends on a separator 

677 endsOnSep = parsed.path.endswith(posixpath.sep) 

678 if forceDirectory or endsOnSep: 

679 dirLike = True 

680 # only add the separator if it's not already there 

681 if not endsOnSep: 

682 parsed = parsed._replace(path=parsed.path+posixpath.sep) 

683 

684 return parsed, dirLike 

685 

686 def transfer_from(self, src: ButlerURI, transfer: str, 

687 overwrite: bool = False, 

688 transaction: Optional[Union[DatastoreTransaction, NoTransaction]] = None) -> None: 

689 """Transfer the current resource to a new location. 

690 

691 Parameters 

692 ---------- 

693 src : `ButlerURI` 

694 Source URI. 

695 transfer : `str` 

696 Mode to use for transferring the resource. Generically there are 

697 many standard options: copy, link, symlink, hardlink, relsymlink. 

698 Not all URIs support all modes. 

699 overwrite : `bool`, optional 

700 Allow an existing file to be overwritten. Defaults to `False`. 

701 transaction : `DatastoreTransaction`, optional 

702 A transaction object that can (depending on implementation) 

703 rollback transfers on error. Not guaranteed to be implemented. 

704 

705 Notes 

706 ----- 

707 Conceptually this is hard to scale as the number of URI schemes 

708 grow. The destination URI is more important than the source URI 

709 since that is where all the transfer modes are relevant (with the 

710 complication that "move" deletes the source). 

711 

712 Local file to local file is the fundamental use case but every 

713 other scheme has to support "copy" to local file (with implicit 

714 support for "move") and copy from local file. 

715 All the "link" options tend to be specific to local file systems. 

716 

717 "move" is a "copy" where the remote resource is deleted at the end. 

718 Whether this works depends on the source URI rather than the 

719 destination URI. Reverting a move on transaction rollback is 

720 expected to be problematic if a remote resource was involved. 

721 """ 

722 raise NotImplementedError(f"No transfer modes supported by URI scheme {self.scheme}") 

723 

724 

725class ButlerFileURI(ButlerURI): 

726 """URI for explicit ``file`` scheme.""" 

727 

728 transferModes = ("copy", "link", "symlink", "hardlink", "relsymlink", "auto", "move") 

729 transferDefault: str = "link" 

730 

731 @property 

732 def ospath(self) -> str: 

733 """Path component of the URI localized to current OS. 

734 

735 Will unquote URI path since a formal URI must include the quoting. 

736 """ 

737 return urllib.parse.unquote(posix2os(self._uri.path)) 

738 

739 def exists(self) -> bool: 

740 # Uses os.path.exists so if there is a soft link that points 

741 # to a file that no longer exists this will return False 

742 return os.path.exists(self.ospath) 

743 

744 def size(self) -> int: 

745 if not os.path.isdir(self.ospath): 

746 stat = os.stat(self.ospath) 

747 sz = stat.st_size 

748 else: 

749 sz = 0 

750 return sz 

751 

752 def remove(self) -> None: 

753 """Remove the resource.""" 

754 os.remove(self.ospath) 

755 

756 def as_local(self) -> Tuple[str, bool]: 

757 """Return the local path of the file. 

758 

759 Returns 

760 ------- 

761 path : `str` 

762 The local path to this file. 

763 temporary : `bool` 

764 Always returns `False` (this is not a temporary file). 

765 """ 

766 return self.ospath, False 

767 

768 def _force_to_file(self) -> ButlerFileURI: 

769 """Force a schemeless URI to a file URI and returns a new URI. 

770 

771 Returns 

772 ------- 

773 file : `ButlerFileURI` 

774 A copy of the URI using file scheme. If already a file scheme 

775 the copy will be identical. 

776 

777 Raises 

778 ------ 

779 ValueError 

780 Raised if this URI is schemeless and relative path and so can 

781 not be forced to file absolute path without context. 

782 """ 

783 # This is always a file scheme so always return copy 

784 return copy.copy(self) 

785 

786 def relative_to(self, other: ButlerURI) -> Optional[str]: 

787 """Return the relative path from this URI to the other URI. 

788 

789 Parameters 

790 ---------- 

791 other : `ButlerURI` 

792 URI to use to calculate the relative path. Must be a parent 

793 of this URI. 

794 

795 Returns 

796 ------- 

797 subpath : `str` 

798 The sub path of this URI relative to the supplied other URI. 

799 Returns `None` if there is no parent child relationship. 

800 Scheme and netloc must match but for file URIs schemeless 

801 is also used. If this URI is a relative URI but the other is 

802 absolute, it is assumed to be in the parent completely unless it 

803 starts with ".." (in which case the path is combined and tested). 

804 If both URIs are relative, the relative paths are compared 

805 for commonality. 

806 

807 Notes 

808 ----- 

809 By definition a relative path will be relative to the enclosing 

810 absolute parent URI. It will be returned unchanged if it does not 

811 use a parent directory specification. 

812 """ 

813 # We know self is a file so check the other. Anything other than 

814 # file or schemeless means by definition these have no paths in common 

815 if other.scheme and other.scheme != "file": 

816 return None 

817 

818 # for case where both URIs are relative use the normal logic 

819 # where a/b/c.txt and a/b/ returns c.txt. 

820 if not self.isabs() and not other.isabs(): 

821 return super().relative_to(other) 

822 

823 # if we have a relative path convert it to absolute 

824 # relative to the supplied parent. This is solely to handle 

825 # the case where the relative path includes ".." but somehow 

826 # then goes back inside the directory of the parent 

827 if not self.isabs(): 

828 childUri = other.join(self.path) 

829 return childUri.relative_to(other) 

830 

831 # By this point if the schemes are identical we can use the 

832 # base class implementation. 

833 if self.scheme == other.scheme: 

834 return super().relative_to(other) 

835 

836 # if one is schemeless and the other is not the base implementation 

837 # will fail so we need to fix that -- they are both absolute so 

838 # forcing to file is fine. 

839 # Use a cast to convince mypy that other has to be a ButlerFileURI 

840 # in order to get to this part of the code. 

841 return self._force_to_file().relative_to(cast(ButlerFileURI, other)._force_to_file()) 

842 

843 def read(self, size: int = -1) -> bytes: 

844 # Docstring inherits 

845 with open(self.ospath, "rb") as fh: 

846 return fh.read(size) 

847 

848 def write(self, data: bytes, overwrite: bool = True) -> None: 

849 dir = os.path.dirname(self.ospath) 

850 if not os.path.exists(dir): 

851 safeMakeDir(dir) 

852 if overwrite: 

853 mode = "wb" 

854 else: 

855 mode = "xb" 

856 with open(self.ospath, mode) as f: 

857 f.write(data) 

858 

859 def mkdir(self) -> None: 

860 if not os.path.exists(self.ospath): 

861 safeMakeDir(self.ospath) 

862 elif not os.path.isdir(self.ospath): 

863 raise FileExistsError(f"URI {self} exists but is not a directory!") 

864 

865 def transfer_from(self, src: ButlerURI, transfer: str, 

866 overwrite: bool = False, 

867 transaction: Optional[Union[DatastoreTransaction, NoTransaction]] = None) -> None: 

868 """Transfer the current resource to a local file. 

869 

870 Parameters 

871 ---------- 

872 src : `ButlerURI` 

873 Source URI. 

874 transfer : `str` 

875 Mode to use for transferring the resource. Supports the following 

876 options: copy, link, symlink, hardlink, relsymlink. 

877 overwrite : `bool`, optional 

878 Allow an existing file to be overwritten. Defaults to `False`. 

879 transaction : `DatastoreTransaction`, optional 

880 If a transaction is provided, undo actions will be registered. 

881 """ 

882 # Fail early to prevent delays if remote resources are requested 

883 if transfer not in self.transferModes: 

884 raise ValueError(f"Transfer mode '{transfer}' not supported by URI scheme {self.scheme}") 

885 

886 log.debug(f"Transferring {src} [exists: {src.exists()}] -> " 

887 f"{self} [exists: {self.exists()}] (transfer={transfer})") 

888 

889 # We do not have to special case ButlerFileURI here because 

890 # as_local handles that. 

891 local_src, is_temporary = src.as_local() 

892 

893 # Default transfer mode depends on whether we have a temporary 

894 # file or not. 

895 if transfer == "auto": 

896 transfer = self.transferDefault if not is_temporary else "copy" 

897 

898 # Follow soft links 

899 local_src = os.path.realpath(os.path.normpath(local_src)) 

900 

901 if not os.path.exists(local_src): 

902 raise FileNotFoundError(f"Source URI {src} does not exist") 

903 

904 # All the modes involving linking use "link" somewhere 

905 if "link" in transfer and is_temporary: 

906 raise RuntimeError("Can not use local file system transfer mode" 

907 f" {transfer} for remote resource ({src})") 

908 

909 # For temporary files we can own them 

910 requested_transfer = transfer 

911 if is_temporary and transfer == "copy": 

912 transfer = "move" 

913 

914 # The output location should not exist 

915 dest_exists = self.exists() 

916 if not overwrite and dest_exists: 

917 raise FileExistsError(f"Destination path '{self}' already exists. Transfer " 

918 f"from {src} cannot be completed.") 

919 

920 # Make the path absolute (but don't follow links since that 

921 # would possibly cause us to end up in the wrong place if the 

922 # file existed already as a soft link) 

923 newFullPath = os.path.abspath(self.ospath) 

924 outputDir = os.path.dirname(newFullPath) 

925 if not os.path.isdir(outputDir): 

926 # Must create the directory -- this can not be rolled back 

927 # since another transfer running concurrently may 

928 # be relying on this existing. 

929 safeMakeDir(outputDir) 

930 

931 if transaction is None: 

932 # Use a no-op transaction to reduce code duplication 

933 transaction = NoTransaction() 

934 

935 # For links the OS doesn't let us overwrite so if something does 

936 # exist we have to remove it before we do the actual "transfer" below 

937 if "link" in transfer and overwrite and dest_exists: 

938 try: 

939 self.remove() 

940 except Exception: 

941 # If this fails we ignore it since it's a problem 

942 # that will manifest immediately below with a more relevant 

943 # error message 

944 pass 

945 

946 if transfer == "move": 

947 with transaction.undoWith(f"move from {local_src}", shutil.move, newFullPath, local_src): 

948 shutil.move(local_src, newFullPath) 

949 elif transfer == "copy": 

950 with transaction.undoWith(f"copy from {local_src}", os.remove, newFullPath): 

951 shutil.copy(local_src, newFullPath) 

952 elif transfer == "link": 

953 # Try hard link and if that fails use a symlink 

954 with transaction.undoWith(f"link to {local_src}", os.remove, newFullPath): 

955 try: 

956 os.link(local_src, newFullPath) 

957 except OSError: 

958 # Read through existing symlinks 

959 os.symlink(local_src, newFullPath) 

960 elif transfer == "hardlink": 

961 with transaction.undoWith(f"hardlink to {local_src}", os.remove, newFullPath): 

962 os.link(local_src, newFullPath) 

963 elif transfer == "symlink": 

964 # Read through existing symlinks 

965 with transaction.undoWith(f"symlink to {local_src}", os.remove, newFullPath): 

966 os.symlink(local_src, newFullPath) 

967 elif transfer == "relsymlink": 

968 # This is a standard symlink but using a relative path 

969 # Need the directory name to give to relative root 

970 # A full file path confuses it into an extra ../ 

971 newFullPathRoot = os.path.dirname(newFullPath) 

972 relPath = os.path.relpath(local_src, newFullPathRoot) 

973 with transaction.undoWith(f"relsymlink to {local_src}", os.remove, newFullPath): 

974 os.symlink(relPath, newFullPath) 

975 else: 

976 raise NotImplementedError("Transfer type '{}' not supported.".format(transfer)) 

977 

978 # This was an explicit move requested from a remote resource 

979 # try to remove that resource. We check is_temporary because 

980 # the local file would have been moved by shutil.move already. 

981 if requested_transfer == "move" and is_temporary: 

982 # Transactions do not work here 

983 src.remove() 

984 

985 if is_temporary and os.path.exists(local_src): 

986 # This should never happen since we have moved it above 

987 os.remove(local_src) 

988 

989 @staticmethod 

990 def _fixupPathUri(parsed: urllib.parse.ParseResult, root: Optional[Union[str, ButlerURI]] = None, 

991 forceAbsolute: bool = False, 

992 forceDirectory: bool = False) -> Tuple[urllib.parse.ParseResult, bool]: 

993 """Fix up relative paths in URI instances. 

994 

995 Parameters 

996 ---------- 

997 parsed : `~urllib.parse.ParseResult` 

998 The result from parsing a URI using `urllib.parse`. 

999 root : `str` or `ButlerURI`, optional 

1000 Path to use as root when converting relative to absolute. 

1001 If `None`, it will be the current working directory. This 

1002 is a local file system path, or a file URI. It is only used if 

1003 a file-scheme is used incorrectly with a relative path. 

1004 forceAbsolute : `bool`, ignored 

1005 Has no effect for this subclass. ``file`` URIs are always 

1006 absolute. 

1007 forceDirectory : `bool`, optional 

1008 If `True` forces the URI to end with a separator, otherwise given 

1009 URI is interpreted as is. 

1010 

1011 Returns 

1012 ------- 

1013 modified : `~urllib.parse.ParseResult` 

1014 Update result if a URI is being handled. 

1015 dirLike : `bool` 

1016 `True` if given parsed URI has a trailing separator or 

1017 forceDirectory is True. Otherwise `False`. 

1018 

1019 Notes 

1020 ----- 

1021 Relative paths are explicitly not supported by RFC8089 but `urllib` 

1022 does accept URIs of the form ``file:relative/path.ext``. They need 

1023 to be turned into absolute paths before they can be used. This is 

1024 always done regardless of the ``forceAbsolute`` parameter. 

1025 """ 

1026 # assume we are not dealing with a directory like URI 

1027 dirLike = False 

1028 

1029 # file URI implies POSIX path separators so split as POSIX, 

1030 # then join as os, and convert to abspath. Do not handle 

1031 # home directories since "file" scheme is explicitly documented 

1032 # to not do tilde expansion. 

1033 sep = posixpath.sep 

1034 

1035 # For local file system we can explicitly check to see if this 

1036 # really is a directory. The URI might point to a location that 

1037 # does not exists yet but all that matters is if it is a directory 

1038 # then we make sure use that fact. No need to do the check if 

1039 # we are already being told. 

1040 if not forceDirectory and posixpath.isdir(parsed.path): 

1041 forceDirectory = True 

1042 

1043 # For an absolute path all we need to do is check if we need 

1044 # to force the directory separator 

1045 if posixpath.isabs(parsed.path): 

1046 if forceDirectory: 

1047 if not parsed.path.endswith(sep): 

1048 parsed = parsed._replace(path=parsed.path+sep) 

1049 dirLike = True 

1050 return copy.copy(parsed), dirLike 

1051 

1052 # Relative path so must fix it to be compliant with the standard 

1053 

1054 # Replacement values for the URI 

1055 replacements = {} 

1056 

1057 if root is None: 

1058 root = os.path.abspath(os.path.curdir) 

1059 elif isinstance(root, ButlerURI): 

1060 if root.scheme and root.scheme != "file": 

1061 raise RuntimeError(f"The override root must be a file URI not {root.scheme}") 

1062 root = os.path.abspath(root.ospath) 

1063 

1064 replacements["path"] = posixpath.normpath(posixpath.join(os2posix(root), parsed.path)) 

1065 

1066 # normpath strips trailing "/" so put it back if necessary 

1067 # Acknowledge that trailing separator exists. 

1068 if forceDirectory or (parsed.path.endswith(sep) and not replacements["path"].endswith(sep)): 

1069 replacements["path"] += sep 

1070 dirLike = True 

1071 

1072 # ParseResult is a NamedTuple so _replace is standard API 

1073 parsed = parsed._replace(**replacements) 

1074 

1075 if parsed.params or parsed.query: 

1076 log.warning("Additional items unexpectedly encountered in file URI: %s", parsed.geturl()) 

1077 

1078 return parsed, dirLike 

1079 

1080 

1081class ButlerS3URI(ButlerURI): 

1082 """S3 URI""" 

1083 

1084 @property 

1085 def client(self) -> boto3.client: 

1086 """Client object to address remote resource.""" 

1087 # Defer import for circular dependencies 

1088 from .s3utils import getS3Client 

1089 return getS3Client() 

1090 

1091 def exists(self) -> bool: 

1092 # s3utils itself imports ButlerURI so defer this import 

1093 from .s3utils import s3CheckFileExists 

1094 exists, _ = s3CheckFileExists(self, client=self.client) 

1095 return exists 

1096 

1097 def size(self) -> int: 

1098 # s3utils itself imports ButlerURI so defer this import 

1099 from .s3utils import s3CheckFileExists 

1100 if self.dirLike: 

1101 return 0 

1102 _, sz = s3CheckFileExists(self, client=self.client) 

1103 return sz 

1104 

1105 def remove(self) -> None: 

1106 """Remove the resource.""" 

1107 

1108 # https://github.com/boto/boto3/issues/507 - there is no 

1109 # way of knowing if the file was actually deleted except 

1110 # for checking all the keys again, reponse is HTTP 204 OK 

1111 # response all the time 

1112 self.client.delete_object(Bucket=self.netloc, Key=self.relativeToPathRoot) 

1113 

1114 def read(self, size: int = -1) -> bytes: 

1115 args = {} 

1116 if size > 0: 

1117 args["Range"] = f"bytes=0-{size-1}" 

1118 try: 

1119 response = self.client.get_object(Bucket=self.netloc, 

1120 Key=self.relativeToPathRoot, 

1121 **args) 

1122 except (self.client.exceptions.NoSuchKey, self.client.exceptions.NoSuchBucket) as err: 

1123 raise FileNotFoundError(f"No such resource: {self}") from err 

1124 body = response["Body"].read() 

1125 response["Body"].close() 

1126 return body 

1127 

1128 def write(self, data: bytes, overwrite: bool = True) -> None: 

1129 if not overwrite: 

1130 if self.exists(): 

1131 raise FileExistsError(f"Remote resource {self} exists and overwrite has been disabled") 

1132 self.client.put_object(Bucket=self.netloc, Key=self.relativeToPathRoot, 

1133 Body=data) 

1134 

1135 def mkdir(self) -> None: 

1136 # Defer import for circular dependencies 

1137 from .s3utils import bucketExists 

1138 if not bucketExists(self.netloc): 

1139 raise ValueError(f"Bucket {self.netloc} does not exist for {self}!") 

1140 

1141 if not self.dirLike: 

1142 raise ValueError(f"Can not create a 'directory' for file-like URI {self}") 

1143 

1144 # don't create S3 key when root is at the top-level of an Bucket 

1145 if not self.path == "/": 

1146 self.client.put_object(Bucket=self.netloc, Key=self.relativeToPathRoot) 

1147 

1148 def as_local(self) -> Tuple[str, bool]: 

1149 """Download object from S3 and place in temporary directory. 

1150 

1151 Returns 

1152 ------- 

1153 path : `str` 

1154 Path to local temporary file. 

1155 temporary : `bool` 

1156 Always returns `True`. This is always a temporary file. 

1157 """ 

1158 with tempfile.NamedTemporaryFile(suffix=self.getExtension(), delete=False) as tmpFile: 

1159 self.client.download_fileobj(self.netloc, self.relativeToPathRoot, tmpFile) 

1160 return tmpFile.name, True 

1161 

1162 def transfer_from(self, src: ButlerURI, transfer: str = "copy", 

1163 overwrite: bool = False, 

1164 transaction: Optional[Union[DatastoreTransaction, NoTransaction]] = None) -> None: 

1165 """Transfer the current resource to an S3 bucket. 

1166 

1167 Parameters 

1168 ---------- 

1169 src : `ButlerURI` 

1170 Source URI. 

1171 transfer : `str` 

1172 Mode to use for transferring the resource. Supports the following 

1173 options: copy. 

1174 overwrite : `bool`, optional 

1175 Allow an existing file to be overwritten. Defaults to `False`. 

1176 transaction : `DatastoreTransaction`, optional 

1177 Currently unused. 

1178 """ 

1179 # Fail early to prevent delays if remote resources are requested 

1180 if transfer not in self.transferModes: 

1181 raise ValueError(f"Transfer mode '{transfer}' not supported by URI scheme {self.scheme}") 

1182 

1183 log.debug(f"Transferring {src} [exists: {src.exists()}] -> " 

1184 f"{self} [exists: {self.exists()}] (transfer={transfer})") 

1185 

1186 if not overwrite and self.exists(): 

1187 raise FileExistsError(f"Destination path '{self}' already exists.") 

1188 

1189 if transfer == "auto": 

1190 transfer = self.transferDefault 

1191 

1192 if isinstance(src, type(self)): 

1193 # Looks like an S3 remote uri so we can use direct copy 

1194 # note that boto3.resource.meta.copy is cleverer than the low 

1195 # level copy_object 

1196 copy_source = { 

1197 "Bucket": src.netloc, 

1198 "Key": src.relativeToPathRoot, 

1199 } 

1200 self.client.copy_object(CopySource=copy_source, Bucket=self.netloc, Key=self.relativeToPathRoot) 

1201 else: 

1202 # Use local file and upload it 

1203 local_src, is_temporary = src.as_local() 

1204 

1205 # resource.meta.upload_file seems like the right thing 

1206 # but we have a low level client 

1207 with open(local_src, "rb") as fh: 

1208 self.client.put_object(Bucket=self.netloc, 

1209 Key=self.relativeToPathRoot, Body=fh) 

1210 if is_temporary: 

1211 os.remove(local_src) 

1212 

1213 # This was an explicit move requested from a remote resource 

1214 # try to remove that resource 

1215 if transfer == "move": 

1216 # Transactions do not work here 

1217 src.remove() 

1218 

1219 

1220class ButlerPackageResourceURI(ButlerURI): 

1221 """URI referring to a Python package resource. 

1222 

1223 These URIs look like: ``resource://lsst.daf.butler/configs/file.yaml`` 

1224 where the network location is the Python package and the path is the 

1225 resource name. 

1226 """ 

1227 

1228 def exists(self) -> bool: 

1229 """Check that the python resource exists.""" 

1230 return pkg_resources.resource_exists(self.netloc, self.relativeToPathRoot) 

1231 

1232 def read(self, size: int = -1) -> bytes: 

1233 with pkg_resources.resource_stream(self.netloc, self.relativeToPathRoot) as fh: 

1234 return fh.read(size) 

1235 

1236 

1237class ButlerHttpURI(ButlerURI): 

1238 """General HTTP(S) resource.""" 

1239 

1240 @property 

1241 def session(self) -> requests.Session: 

1242 """Client object to address remote resource.""" 

1243 from .webdavutils import getHttpSession, isWebdavEndpoint 

1244 if isWebdavEndpoint(self): 

1245 log.debug("%s looks like a Webdav endpoint.", self.geturl()) 

1246 return getHttpSession() 

1247 

1248 log.debug("%s looks like a standard HTTP endpoint.", self.geturl()) 

1249 return requests.Session() 

1250 

1251 def exists(self) -> bool: 

1252 """Check that a remote HTTP resource exists.""" 

1253 log.debug("Checking if resource exists: %s", self.geturl()) 

1254 r = self.session.head(self.geturl()) 

1255 

1256 return True if r.status_code == 200 else False 

1257 

1258 def size(self) -> int: 

1259 if self.dirLike: 

1260 return 0 

1261 r = self.session.head(self.geturl()) 

1262 if r.status_code == 200: 

1263 return int(r.headers['Content-Length']) 

1264 else: 

1265 raise FileNotFoundError(f"Resource {self} does not exist") 

1266 

1267 def mkdir(self) -> None: 

1268 """For a dir-like URI, create the directory resource if it does not 

1269 already exist. 

1270 """ 

1271 if not self.dirLike: 

1272 raise ValueError(f"Can not create a 'directory' for file-like URI {self}") 

1273 

1274 if not self.exists(): 

1275 log.debug("Creating new directory: %s", self.geturl()) 

1276 r = self.session.request("MKCOL", self.geturl()) 

1277 if r.status_code != 201: 

1278 raise ValueError(f"Can not create directory {self}, status code: {r.status_code}") 

1279 

1280 def remove(self) -> None: 

1281 """Remove the resource.""" 

1282 log.debug("Removing resource: %s", self.geturl()) 

1283 r = self.session.delete(self.geturl()) 

1284 if r.status_code not in [200, 202, 204]: 

1285 raise FileNotFoundError(f"Unable to delete resource {self}; status code: {r.status_code}") 

1286 

1287 def as_local(self) -> Tuple[str, bool]: 

1288 """Download object over HTTP and place in temporary directory. 

1289 

1290 Returns 

1291 ------- 

1292 path : `str` 

1293 Path to local temporary file. 

1294 temporary : `bool` 

1295 Always returns `True`. This is always a temporary file. 

1296 """ 

1297 log.debug("Downloading remote resource as local file: %s", self.geturl()) 

1298 r = self.session.get(self.geturl(), stream=True) 

1299 if r.status_code != 200: 

1300 raise FileNotFoundError(f"Unable to download resource {self}; status code: {r.status_code}") 

1301 with tempfile.NamedTemporaryFile(suffix=self.getExtension(), delete=False) as tmpFile: 

1302 for chunk in r.iter_content(): 

1303 tmpFile.write(chunk) 

1304 return tmpFile.name, True 

1305 

1306 def read(self, size: int = -1) -> bytes: 

1307 """Open the resource and return the contents in bytes. 

1308 

1309 Parameters 

1310 ---------- 

1311 size : `int`, optional 

1312 The number of bytes to read. Negative or omitted indicates 

1313 that all data should be read. 

1314 """ 

1315 log.debug("Reading from remote resource: %s", self.geturl()) 

1316 stream = True if size > 0 else False 

1317 r = self.session.get(self.geturl(), stream=stream) 

1318 if r.status_code != 200: 

1319 raise FileNotFoundError(f"Unable to read resource {self}; status code: {r.status_code}") 

1320 if not stream: 

1321 return r.content 

1322 else: 

1323 return next(r.iter_content(chunk_size=size)) 

1324 

1325 def write(self, data: bytes, overwrite: bool = True) -> None: 

1326 """Write the supplied bytes to the new resource. 

1327 

1328 Parameters 

1329 ---------- 

1330 data : `bytes` 

1331 The bytes to write to the resource. The entire contents of the 

1332 resource will be replaced. 

1333 overwrite : `bool`, optional 

1334 If `True` the resource will be overwritten if it exists. Otherwise 

1335 the write will fail. 

1336 """ 

1337 log.debug("Writing to remote resource: %s", self.geturl()) 

1338 if not overwrite: 

1339 if self.exists(): 

1340 raise FileExistsError(f"Remote resource {self} exists and overwrite has been disabled") 

1341 self.session.put(self.geturl(), data=data) 

1342 

1343 def transfer_from(self, src: ButlerURI, transfer: str = "copy", 

1344 overwrite: bool = False, 

1345 transaction: Optional[Union[DatastoreTransaction, NoTransaction]] = None) -> None: 

1346 """Transfer the current resource to a Webdav repository. 

1347 

1348 Parameters 

1349 ---------- 

1350 src : `ButlerURI` 

1351 Source URI. 

1352 transfer : `str` 

1353 Mode to use for transferring the resource. Supports the following 

1354 options: copy. 

1355 transaction : `DatastoreTransaction`, optional 

1356 Currently unused. 

1357 """ 

1358 # Fail early to prevent delays if remote resources are requested 

1359 if transfer not in self.transferModes: 

1360 raise ValueError(f"Transfer mode {transfer} not supported by URI scheme {self.scheme}") 

1361 

1362 log.debug(f"Transferring {src} [exists: {src.exists()}] -> " 

1363 f"{self} [exists: {self.exists()}] (transfer={transfer})") 

1364 

1365 if self.exists(): 

1366 raise FileExistsError(f"Destination path {self} already exists.") 

1367 

1368 if transfer == "auto": 

1369 transfer = self.transferDefault 

1370 

1371 if isinstance(src, type(self)): 

1372 if transfer == "move": 

1373 self.session.request("MOVE", src.geturl(), headers={"Destination": self.geturl()}) 

1374 else: 

1375 self.session.request("COPY", src.geturl(), headers={"Destination": self.geturl()}) 

1376 else: 

1377 # Use local file and upload it 

1378 local_src, is_temporary = src.as_local() 

1379 f = open(local_src, "rb") 

1380 files = {"file": f} 

1381 self.session.post(self.geturl(), files=files) 

1382 f.close() 

1383 if is_temporary: 

1384 os.remove(local_src) 

1385 

1386 

1387class ButlerInMemoryURI(ButlerURI): 

1388 """Internal in-memory datastore URI (`mem://`). 

1389 

1390 Not used for any real purpose other than indicating that the dataset 

1391 is in memory. 

1392 """ 

1393 

1394 def exists(self) -> bool: 

1395 """Test for existence and always return False.""" 

1396 return True 

1397 

1398 def as_local(self) -> Tuple[str, bool]: 

1399 raise RuntimeError(f"Do not know how to retrieve data for URI '{self}'") 

1400 

1401 

1402class ButlerSchemelessURI(ButlerFileURI): 

1403 """Scheme-less URI referring to the local file system""" 

1404 

1405 _pathLib = PurePath 

1406 _pathModule = os.path 

1407 quotePaths = False 

1408 

1409 @property 

1410 def ospath(self) -> str: 

1411 """Path component of the URI localized to current OS.""" 

1412 return self.path 

1413 

1414 def isabs(self) -> bool: 

1415 """Indicate that the resource is fully specified. 

1416 

1417 For non-schemeless URIs this is always true. 

1418 

1419 Returns 

1420 ------- 

1421 isabs : `bool` 

1422 `True` if the file is absolute, `False` otherwise. 

1423 """ 

1424 return os.path.isabs(self.ospath) 

1425 

1426 def _force_to_file(self) -> ButlerFileURI: 

1427 """Force a schemeless URI to a file URI and returns a new URI. 

1428 

1429 This will include URI quoting of the path. 

1430 

1431 Returns 

1432 ------- 

1433 file : `ButlerFileURI` 

1434 A copy of the URI using file scheme. If already a file scheme 

1435 the copy will be identical. 

1436 

1437 Raises 

1438 ------ 

1439 ValueError 

1440 Raised if this URI is schemeless and relative path and so can 

1441 not be forced to file absolute path without context. 

1442 """ 

1443 if not self.isabs(): 

1444 raise RuntimeError(f"Internal error: Can not force {self} to absolute file URI") 

1445 uri = self._uri._replace(scheme="file", path=urllib.parse.quote(os2posix(self.path))) 

1446 # mypy really wants a ButlerFileURI to be returned here 

1447 return ButlerURI(uri, forceDirectory=self.dirLike) # type: ignore 

1448 

1449 @staticmethod 

1450 def _fixupPathUri(parsed: urllib.parse.ParseResult, root: Optional[Union[str, ButlerURI]] = None, 

1451 forceAbsolute: bool = False, 

1452 forceDirectory: bool = False) -> Tuple[urllib.parse.ParseResult, bool]: 

1453 """Fix up relative paths for local file system. 

1454 

1455 Parameters 

1456 ---------- 

1457 parsed : `~urllib.parse.ParseResult` 

1458 The result from parsing a URI using `urllib.parse`. 

1459 root : `str` or `ButlerURI`, optional 

1460 Path to use as root when converting relative to absolute. 

1461 If `None`, it will be the current working directory. This 

1462 is a local file system path, or a file URI. 

1463 forceAbsolute : `bool`, optional 

1464 If `True`, scheme-less relative URI will be converted to an 

1465 absolute path using a ``file`` scheme. If `False` scheme-less URI 

1466 will remain scheme-less and will not be updated to ``file`` or 

1467 absolute path. 

1468 forceDirectory : `bool`, optional 

1469 If `True` forces the URI to end with a separator, otherwise given 

1470 URI is interpreted as is. 

1471 

1472 Returns 

1473 ------- 

1474 modified : `~urllib.parse.ParseResult` 

1475 Update result if a URI is being handled. 

1476 dirLike : `bool` 

1477 `True` if given parsed URI has a trailing separator or 

1478 forceDirectory is True. Otherwise `False`. 

1479 

1480 Notes 

1481 ----- 

1482 Relative paths are explicitly not supported by RFC8089 but `urllib` 

1483 does accept URIs of the form ``file:relative/path.ext``. They need 

1484 to be turned into absolute paths before they can be used. This is 

1485 always done regardless of the ``forceAbsolute`` parameter. 

1486 

1487 Scheme-less paths are normalized. 

1488 """ 

1489 # assume we are not dealing with a directory URI 

1490 dirLike = False 

1491 

1492 # Replacement values for the URI 

1493 replacements = {} 

1494 

1495 if root is None: 

1496 root = os.path.abspath(os.path.curdir) 

1497 elif isinstance(root, ButlerURI): 

1498 if root.scheme and root.scheme != "file": 

1499 raise RuntimeError(f"The override root must be a file URI not {root.scheme}") 

1500 root = os.path.abspath(root.ospath) 

1501 

1502 # this is a local OS file path which can support tilde expansion. 

1503 # we quoted it in the constructor so unquote here 

1504 expandedPath = os.path.expanduser(urllib.parse.unquote(parsed.path)) 

1505 

1506 # Ensure that this becomes a file URI if it is already absolute 

1507 if os.path.isabs(expandedPath): 

1508 replacements["scheme"] = "file" 

1509 # Keep in OS form for now to simplify later logic 

1510 replacements["path"] = os.path.normpath(expandedPath) 

1511 elif forceAbsolute: 

1512 # This can stay in OS path form, do not change to file 

1513 # scheme. 

1514 replacements["path"] = os.path.normpath(os.path.join(root, expandedPath)) 

1515 else: 

1516 # No change needed for relative local path staying relative 

1517 # except normalization 

1518 replacements["path"] = os.path.normpath(expandedPath) 

1519 # normalization of empty path returns "." so we are dirLike 

1520 if expandedPath == "": 

1521 dirLike = True 

1522 

1523 # normpath strips trailing "/" which makes it hard to keep 

1524 # track of directory vs file when calling replaceFile 

1525 

1526 # For local file system we can explicitly check to see if this 

1527 # really is a directory. The URI might point to a location that 

1528 # does not exists yet but all that matters is if it is a directory 

1529 # then we make sure use that fact. No need to do the check if 

1530 # we are already being told. 

1531 if not forceDirectory and os.path.isdir(replacements["path"]): 

1532 forceDirectory = True 

1533 

1534 # add the trailing separator only if explicitly required or 

1535 # if it was stripped by normpath. Acknowledge that trailing 

1536 # separator exists. 

1537 endsOnSep = expandedPath.endswith(os.sep) and not replacements["path"].endswith(os.sep) 

1538 if (forceDirectory or endsOnSep or dirLike): 

1539 dirLike = True 

1540 if not replacements["path"].endswith(os.sep): 

1541 replacements["path"] += os.sep 

1542 

1543 if "scheme" in replacements: 

1544 # This is now meant to be a URI path so force to posix 

1545 # and quote 

1546 replacements["path"] = urllib.parse.quote(os2posix(replacements["path"])) 

1547 

1548 # ParseResult is a NamedTuple so _replace is standard API 

1549 parsed = parsed._replace(**replacements) 

1550 

1551 if parsed.params or parsed.fragment or parsed.query: 

1552 log.warning("Additional items unexpectedly encountered in schemeless URI: %s", parsed.geturl()) 

1553 

1554 return parsed, dirLike