Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("ButlerURI",) 

25 

26import contextlib 

27import os 

28import os.path 

29import shutil 

30import urllib 

31import pkg_resources 

32import posixpath 

33from pathlib import Path, PurePath, PurePosixPath 

34import requests 

35import tempfile 

36import copy 

37import logging 

38import re 

39 

40from typing import ( 

41 TYPE_CHECKING, 

42 Any, 

43 Callable, 

44 cast, 

45 Iterator, 

46 Optional, 

47 Tuple, 

48 Type, 

49 Union, 

50) 

51 

52from .utils import safeMakeDir 

53 

54if TYPE_CHECKING: 54 ↛ 55line 54 didn't jump to line 55, because the condition on line 54 was never true

55 try: 

56 import boto3 

57 except ImportError: 

58 pass 

59 from .datastore import DatastoreTransaction 

60 

61 

62log = logging.getLogger(__name__) 

63 

64# Determine if the path separator for the OS looks like POSIX 

65IS_POSIX = os.sep == posixpath.sep 

66 

67# Root path for this operating system 

68OS_ROOT_PATH = Path().resolve().root 

69 

70# Regex for looking for URI escapes 

71ESCAPES_RE = re.compile(r"%[A-F0-9]{2}") 

72 

73 

74def os2posix(ospath: str) -> str: 

75 """Convert a local path description to a POSIX path description. 

76 

77 Parameters 

78 ---------- 

79 ospath : `str` 

80 Path using the local path separator. 

81 

82 Returns 

83 ------- 

84 posix : `str` 

85 Path using POSIX path separator 

86 """ 

87 if IS_POSIX: 

88 return ospath 

89 

90 posix = PurePath(ospath).as_posix() 

91 

92 # PurePath strips trailing "/" from paths such that you can no 

93 # longer tell if a path is meant to be referring to a directory 

94 # Try to fix this. 

95 if ospath.endswith(os.sep) and not posix.endswith(posixpath.sep): 

96 posix += posixpath.sep 

97 

98 return posix 

99 

100 

101def posix2os(posix: Union[PurePath, str]) -> str: 

102 """Convert a POSIX path description to a local path description. 

103 

104 Parameters 

105 ---------- 

106 posix : `str`, `PurePath` 

107 Path using the POSIX path separator. 

108 

109 Returns 

110 ------- 

111 ospath : `str` 

112 Path using OS path separator 

113 """ 

114 if IS_POSIX: 

115 return str(posix) 

116 

117 posixPath = PurePosixPath(posix) 

118 paths = list(posixPath.parts) 

119 

120 # Have to convert the root directory after splitting 

121 if paths[0] == posixPath.root: 

122 paths[0] = OS_ROOT_PATH 

123 

124 # Trailing "/" is stripped so we need to add back an empty path 

125 # for consistency 

126 if str(posix).endswith(posixpath.sep): 

127 paths.append("") 

128 

129 return os.path.join(*paths) 

130 

131 

132class NoTransaction: 

133 """A simple emulation of the `DatastoreTransaction` class. 

134 

135 Does nothing. 

136 """ 

137 

138 def __init__(self) -> None: 

139 return 

140 

141 @contextlib.contextmanager 

142 def undoWith(self, name: str, undoFunc: Callable, *args: Any, **kwargs: Any) -> Iterator[None]: 

143 """No-op context manager to replace `DatastoreTransaction` 

144 """ 

145 yield None 

146 

147 

148class ButlerURI: 

149 """Convenience wrapper around URI parsers. 

150 

151 Provides access to URI components and can convert file 

152 paths into absolute path URIs. Scheme-less URIs are treated as if 

153 they are local file system paths and are converted to absolute URIs. 

154 

155 A specialist subclass is created for each supported URI scheme. 

156 

157 Parameters 

158 ---------- 

159 uri : `str` or `urllib.parse.ParseResult` 

160 URI in string form. Can be scheme-less if referring to a local 

161 filesystem path. 

162 root : `str` or `ButlerURI`, optional 

163 When fixing up a relative path in a ``file`` scheme or if scheme-less, 

164 use this as the root. Must be absolute. If `None` the current 

165 working directory will be used. Can be a file URI. 

166 forceAbsolute : `bool`, optional 

167 If `True`, scheme-less relative URI will be converted to an absolute 

168 path using a ``file`` scheme. If `False` scheme-less URI will remain 

169 scheme-less and will not be updated to ``file`` or absolute path. 

170 forceDirectory: `bool`, optional 

171 If `True` forces the URI to end with a separator, otherwise given URI 

172 is interpreted as is. 

173 """ 

174 

175 _pathLib: Type[PurePath] = PurePosixPath 

176 """Path library to use for this scheme.""" 

177 

178 _pathModule = posixpath 

179 """Path module to use for this scheme.""" 

180 

181 transferModes: Tuple[str, ...] = ("copy", "auto", "move") 

182 """Transfer modes supported by this implementation. 

183 

184 Move is special in that it is generally a copy followed by an unlink. 

185 Whether that unlink works depends critically on whether the source URI 

186 implements unlink. If it does not the move will be reported as a failure. 

187 """ 

188 

189 transferDefault: str = "copy" 

190 """Default mode to use for transferring if ``auto`` is specified.""" 

191 

192 quotePaths = True 

193 """True if path-like elements modifying a URI should be quoted. 

194 

195 All non-schemeless URIs have to internally use quoted paths. Therefore 

196 if a new file name is given (e.g. to updateFile or join) a decision must 

197 be made whether to quote it to be consistent. 

198 """ 

199 

200 # This is not an ABC with abstract methods because the __new__ being 

201 # a factory confuses mypy such that it assumes that every constructor 

202 # returns a ButlerURI and then determines that all the abstract methods 

203 # are still abstract. If they are not marked abstract but just raise 

204 # mypy is fine with it. 

205 

206 # mypy is confused without this 

207 _uri: urllib.parse.ParseResult 

208 

209 def __new__(cls, uri: Union[str, urllib.parse.ParseResult, ButlerURI], 

210 root: Optional[Union[str, ButlerURI]] = None, forceAbsolute: bool = True, 

211 forceDirectory: bool = False) -> ButlerURI: 

212 parsed: urllib.parse.ParseResult 

213 dirLike: bool 

214 subclass: Optional[Type] = None 

215 

216 # Record if we need to post process the URI components 

217 # or if the instance is already fully configured 

218 if isinstance(uri, str): 

219 # Since local file names can have special characters in them 

220 # we need to quote them for the parser but we can unquote 

221 # later. Assume that all other URI schemes are quoted. 

222 # Since sometimes people write file:/a/b and not file:///a/b 

223 # we should not quote in the explicit case of file: 

224 if "://" not in uri and not uri.startswith("file:"): 

225 if ESCAPES_RE.search(uri): 

226 log.warning("Possible double encoding of %s", uri) 

227 else: 

228 uri = urllib.parse.quote(uri) 

229 parsed = urllib.parse.urlparse(uri) 

230 elif isinstance(uri, urllib.parse.ParseResult): 

231 parsed = copy.copy(uri) 

232 elif isinstance(uri, ButlerURI): 

233 parsed = copy.copy(uri._uri) 

234 dirLike = uri.dirLike 

235 # No further parsing required and we know the subclass 

236 subclass = type(uri) 

237 else: 

238 raise ValueError(f"Supplied URI must be string, ButlerURI, or ParseResult but got '{uri!r}'") 

239 

240 if subclass is None: 

241 # Work out the subclass from the URI scheme 

242 if not parsed.scheme: 

243 subclass = ButlerSchemelessURI 

244 elif parsed.scheme == "file": 

245 subclass = ButlerFileURI 

246 elif parsed.scheme == "s3": 

247 subclass = ButlerS3URI 

248 elif parsed.scheme.startswith("http"): 

249 subclass = ButlerHttpURI 

250 elif parsed.scheme == "resource": 

251 # Rules for scheme names disasllow pkg_resource 

252 subclass = ButlerPackageResourceURI 

253 elif parsed.scheme == "mem": 

254 # in-memory datastore object 

255 subclass = ButlerInMemoryURI 

256 else: 

257 raise NotImplementedError(f"No URI support for scheme: '{parsed.scheme}'" 

258 " in {parsed.geturl()}") 

259 

260 parsed, dirLike = subclass._fixupPathUri(parsed, root=root, 

261 forceAbsolute=forceAbsolute, 

262 forceDirectory=forceDirectory) 

263 

264 # It is possible for the class to change from schemeless 

265 # to file so handle that 

266 if parsed.scheme == "file": 

267 subclass = ButlerFileURI 

268 

269 # Now create an instance of the correct subclass and set the 

270 # attributes directly 

271 self = object.__new__(subclass) 

272 self._uri = parsed 

273 self.dirLike = dirLike 

274 return self 

275 

276 @property 

277 def scheme(self) -> str: 

278 """The URI scheme (``://`` is not part of the scheme).""" 

279 return self._uri.scheme 

280 

281 @property 

282 def netloc(self) -> str: 

283 """The URI network location.""" 

284 return self._uri.netloc 

285 

286 @property 

287 def path(self) -> str: 

288 """The path component of the URI.""" 

289 return self._uri.path 

290 

291 @property 

292 def unquoted_path(self) -> str: 

293 """The path component of the URI with any URI quoting reversed.""" 

294 return urllib.parse.unquote(self._uri.path) 

295 

296 @property 

297 def ospath(self) -> str: 

298 """Path component of the URI localized to current OS.""" 

299 raise AttributeError(f"Non-file URI ({self}) has no local OS path.") 

300 

301 @property 

302 def relativeToPathRoot(self) -> str: 

303 """Returns path relative to network location. 

304 

305 Effectively, this is the path property with posix separator stripped 

306 from the left hand side of the path. 

307 

308 Always unquotes. 

309 """ 

310 p = self._pathLib(self.path) 

311 relToRoot = str(p.relative_to(p.root)) 

312 if self.dirLike and not relToRoot.endswith("/"): 

313 relToRoot += "/" 

314 return urllib.parse.unquote(relToRoot) 

315 

316 @property 

317 def is_root(self) -> bool: 

318 """`True` if this URI points to the root of the network location. 

319 

320 This means that the path components refers to the top level. 

321 """ 

322 relpath = self.relativeToPathRoot 

323 if relpath == "./": 

324 return True 

325 return False 

326 

327 @property 

328 def fragment(self) -> str: 

329 """The fragment component of the URI.""" 

330 return self._uri.fragment 

331 

332 @property 

333 def params(self) -> str: 

334 """Any parameters included in the URI.""" 

335 return self._uri.params 

336 

337 @property 

338 def query(self) -> str: 

339 """Any query strings included in the URI.""" 

340 return self._uri.query 

341 

342 def geturl(self) -> str: 

343 """Return the URI in string form. 

344 

345 Returns 

346 ------- 

347 url : `str` 

348 String form of URI. 

349 """ 

350 return self._uri.geturl() 

351 

352 def split(self) -> Tuple[ButlerURI, str]: 

353 """Splits URI into head and tail. Equivalent to os.path.split where 

354 head preserves the URI components. 

355 

356 Returns 

357 ------- 

358 head: `ButlerURI` 

359 Everything leading up to tail, expanded and normalized as per 

360 ButlerURI rules. 

361 tail : `str` 

362 Last `self.path` component. Tail will be empty if path ends on a 

363 separator. Tail will never contain separators. It will be 

364 unquoted. 

365 """ 

366 head, tail = self._pathModule.split(self.path) 

367 headuri = self._uri._replace(path=head) 

368 

369 # The file part should never include quoted metacharacters 

370 tail = urllib.parse.unquote(tail) 

371 

372 # Schemeless is special in that it can be a relative path 

373 # We need to ensure that it stays that way. All other URIs will 

374 # be absolute already. 

375 forceAbsolute = self._pathModule.isabs(self.path) 

376 return ButlerURI(headuri, forceDirectory=True, forceAbsolute=forceAbsolute), tail 

377 

378 def basename(self) -> str: 

379 """Returns the base name, last element of path, of the URI. If URI ends 

380 on a slash returns an empty string. This is the second element returned 

381 by split(). 

382 

383 Equivalent of os.path.basename(). 

384 

385 Returns 

386 ------- 

387 tail : `str` 

388 Last part of the path attribute. Trail will be empty if path ends 

389 on a separator. 

390 """ 

391 return self.split()[1] 

392 

393 def dirname(self) -> ButlerURI: 

394 """Returns a ButlerURI containing all the directories of the path 

395 attribute. 

396 

397 Equivalent of os.path.dirname() 

398 

399 Returns 

400 ------- 

401 head : `ButlerURI` 

402 Everything except the tail of path attribute, expanded and 

403 normalized as per ButlerURI rules. 

404 """ 

405 return self.split()[0] 

406 

407 def parent(self) -> ButlerURI: 

408 """Returns a ButlerURI containing all the directories of the path 

409 attribute, minus the last one. 

410 

411 Returns 

412 ------- 

413 head : `ButlerURI` 

414 Everything except the tail of path attribute, expanded and 

415 normalized as per ButlerURI rules. 

416 """ 

417 # When self is file-like, return self.dirname() 

418 if not self.dirLike: 

419 return self.dirname() 

420 # When self is dir-like, return its parent directory, 

421 # regardless of the presence of a trailing separator 

422 originalPath = self._pathLib(self.path) 

423 parentPath = originalPath.parent 

424 parentURI = self._uri._replace(path=str(parentPath)) 

425 

426 return ButlerURI(parentURI, forceDirectory=True) 

427 

428 def replace(self, **kwargs: Any) -> ButlerURI: 

429 """Replace components in a URI with new values and return a new 

430 instance. 

431 

432 Returns 

433 ------- 

434 new : `ButlerURI` 

435 New `ButlerURI` object with updated values. 

436 """ 

437 return self.__class__(self._uri._replace(**kwargs)) 

438 

439 def updateFile(self, newfile: str) -> None: 

440 """Update in place the final component of the path with the supplied 

441 file name. 

442 

443 Parameters 

444 ---------- 

445 newfile : `str` 

446 File name with no path component. 

447 

448 Notes 

449 ----- 

450 Updates the URI in place. 

451 Updates the ButlerURI.dirLike attribute. The new file path will 

452 be quoted if necessary. 

453 """ 

454 if self.quotePaths: 

455 newfile = urllib.parse.quote(newfile) 

456 dir, _ = self._pathModule.split(self.path) 

457 newpath = self._pathModule.join(dir, newfile) 

458 

459 self.dirLike = False 

460 self._uri = self._uri._replace(path=newpath) 

461 

462 def getExtension(self) -> str: 

463 """Return the file extension(s) associated with this URI path. 

464 

465 Returns 

466 ------- 

467 ext : `str` 

468 The file extension (including the ``.``). Can be empty string 

469 if there is no file extension. Usually returns only the last 

470 file extension unless there is a special extension modifier 

471 indicating file compression, in which case the combined 

472 extension (e.g. ``.fits.gz``) will be returned. 

473 """ 

474 special = {".gz", ".bz2", ".xz", ".fz"} 

475 

476 extensions = self._pathLib(self.path).suffixes 

477 

478 if not extensions: 

479 return "" 

480 

481 ext = extensions.pop() 

482 

483 # Multiple extensions, decide whether to include the final two 

484 if extensions and ext in special: 

485 ext = f"{extensions[-1]}{ext}" 

486 

487 return ext 

488 

489 def join(self, path: str) -> ButlerURI: 

490 """Create a new `ButlerURI` with additional path components including 

491 a file. 

492 

493 Parameters 

494 ---------- 

495 path : `str` 

496 Additional file components to append to the current URI. Assumed 

497 to include a file at the end. Will be quoted depending on the 

498 associated URI scheme. 

499 

500 Returns 

501 ------- 

502 new : `ButlerURI` 

503 New URI with any file at the end replaced with the new path 

504 components. 

505 

506 Notes 

507 ----- 

508 Schemeless URIs assume local path separator but all other URIs assume 

509 POSIX separator if the supplied path has directory structure. It 

510 may be this never becomes a problem but datastore templates assume 

511 POSIX separator is being used. 

512 """ 

513 new = self.dirname() # By definition a directory URI 

514 

515 # new should be asked about quoting, not self, since dirname can 

516 # change the URI scheme for schemeless -> file 

517 if new.quotePaths: 

518 path = urllib.parse.quote(path) 

519 

520 newpath = self._pathModule.normpath(self._pathModule.join(new.path, path)) 

521 new._uri = new._uri._replace(path=newpath) 

522 # Declare the new URI not be dirLike unless path ended in / 

523 if not path.endswith(self._pathModule.sep): 

524 new.dirLike = False 

525 return new 

526 

527 def relative_to(self, other: ButlerURI) -> Optional[str]: 

528 """Return the relative path from this URI to the other URI. 

529 

530 Parameters 

531 ---------- 

532 other : `ButlerURI` 

533 URI to use to calculate the relative path. Must be a parent 

534 of this URI. 

535 

536 Returns 

537 ------- 

538 subpath : `str` 

539 The sub path of this URI relative to the supplied other URI. 

540 Returns `None` if there is no parent child relationship. 

541 Scheme and netloc must match. 

542 """ 

543 if self.scheme != other.scheme or self.netloc != other.netloc: 

544 return None 

545 

546 enclosed_path = self._pathLib(self.relativeToPathRoot) 

547 parent_path = other.relativeToPathRoot 

548 subpath: Optional[str] 

549 try: 

550 subpath = str(enclosed_path.relative_to(parent_path)) 

551 except ValueError: 

552 subpath = None 

553 else: 

554 subpath = urllib.parse.unquote(subpath) 

555 return subpath 

556 

557 def exists(self) -> bool: 

558 """Indicate that the resource is available. 

559 

560 Returns 

561 ------- 

562 exists : `bool` 

563 `True` if the resource exists. 

564 """ 

565 raise NotImplementedError() 

566 

567 def remove(self) -> None: 

568 """Remove the resource.""" 

569 raise NotImplementedError() 

570 

571 def isabs(self) -> bool: 

572 """Indicate that the resource is fully specified. 

573 

574 For non-schemeless URIs this is always true. 

575 

576 Returns 

577 ------- 

578 isabs : `bool` 

579 `True` in all cases except schemeless URI. 

580 """ 

581 return True 

582 

583 def as_local(self) -> Tuple[str, bool]: 

584 """Return the location of the (possibly remote) resource in the 

585 local file system. 

586 

587 Returns 

588 ------- 

589 path : `str` 

590 If this is a remote resource, it will be a copy of the resource 

591 on the local file system, probably in a temporary directory. 

592 For a local resource this should be the actual path to the 

593 resource. 

594 is_temporary : `bool` 

595 Indicates if the local path is a temporary file or not. 

596 """ 

597 raise NotImplementedError() 

598 

599 def read(self, size: int = -1) -> bytes: 

600 """Open the resource and return the contents in bytes. 

601 

602 Parameters 

603 ---------- 

604 size : `int`, optional 

605 The number of bytes to read. Negative or omitted indicates 

606 that all data should be read. 

607 """ 

608 raise NotImplementedError() 

609 

610 def write(self, data: bytes, overwrite: bool = True) -> None: 

611 """Write the supplied bytes to the new resource. 

612 

613 Parameters 

614 ---------- 

615 data : `bytes` 

616 The bytes to write to the resource. The entire contents of the 

617 resource will be replaced. 

618 overwrite : `bool`, optional 

619 If `True` the resource will be overwritten if it exists. Otherwise 

620 the write will fail. 

621 """ 

622 raise NotImplementedError() 

623 

624 def mkdir(self) -> None: 

625 """For a dir-like URI, create the directory resource if it does not 

626 already exist. 

627 """ 

628 raise NotImplementedError() 

629 

630 def size(self) -> int: 

631 """For non-dir-like URI, return the size of the resource. 

632 

633 Returns 

634 ------- 

635 sz : `int` 

636 The size in bytes of the resource associated with this URI. 

637 Returns 0 if dir-like. 

638 """ 

639 raise NotImplementedError() 

640 

641 def __str__(self) -> str: 

642 return self.geturl() 

643 

644 def __repr__(self) -> str: 

645 return f'ButlerURI("{self.geturl()}")' 

646 

647 def __eq__(self, other: Any) -> bool: 

648 if not isinstance(other, ButlerURI): 

649 return False 

650 return self.geturl() == other.geturl() 

651 

652 def __copy__(self) -> ButlerURI: 

653 # Implement here because the __new__ method confuses things 

654 return type(self)(str(self)) 

655 

656 def __deepcopy__(self, memo: Any) -> ButlerURI: 

657 # Implement here because the __new__ method confuses things 

658 return self.__copy__() 

659 

660 def __getnewargs__(self) -> Tuple: 

661 return (str(self),) 

662 

663 @staticmethod 

664 def _fixupPathUri(parsed: urllib.parse.ParseResult, root: Optional[Union[str, ButlerURI]] = None, 

665 forceAbsolute: bool = False, 

666 forceDirectory: bool = False) -> Tuple[urllib.parse.ParseResult, bool]: 

667 """Correct any issues with the supplied URI. 

668 

669 Parameters 

670 ---------- 

671 parsed : `~urllib.parse.ParseResult` 

672 The result from parsing a URI using `urllib.parse`. 

673 root : `str` or `ButlerURI`, ignored 

674 Not used by the this implementation since all URIs are 

675 absolute except for those representing the local file system. 

676 forceAbsolute : `bool`, ignored. 

677 Not used by this implementation. URIs are generally always 

678 absolute. 

679 forceDirectory : `bool`, optional 

680 If `True` forces the URI to end with a separator, otherwise given 

681 URI is interpreted as is. Specifying that the URI is conceptually 

682 equivalent to a directory can break some ambiguities when 

683 interpreting the last element of a path. 

684 

685 Returns 

686 ------- 

687 modified : `~urllib.parse.ParseResult` 

688 Update result if a URI is being handled. 

689 dirLike : `bool` 

690 `True` if given parsed URI has a trailing separator or 

691 forceDirectory is True. Otherwise `False`. 

692 

693 Notes 

694 ----- 

695 Relative paths are explicitly not supported by RFC8089 but `urllib` 

696 does accept URIs of the form ``file:relative/path.ext``. They need 

697 to be turned into absolute paths before they can be used. This is 

698 always done regardless of the ``forceAbsolute`` parameter. 

699 

700 AWS S3 differentiates between keys with trailing POSIX separators (i.e 

701 `/dir` and `/dir/`) whereas POSIX does not neccessarily. 

702 

703 Scheme-less paths are normalized. 

704 """ 

705 # assume we are not dealing with a directory like URI 

706 dirLike = False 

707 

708 # URI is dir-like if explicitly stated or if it ends on a separator 

709 endsOnSep = parsed.path.endswith(posixpath.sep) 

710 if forceDirectory or endsOnSep: 

711 dirLike = True 

712 # only add the separator if it's not already there 

713 if not endsOnSep: 

714 parsed = parsed._replace(path=parsed.path+posixpath.sep) 

715 

716 return parsed, dirLike 

717 

718 def transfer_from(self, src: ButlerURI, transfer: str, 

719 overwrite: bool = False, 

720 transaction: Optional[Union[DatastoreTransaction, NoTransaction]] = None) -> None: 

721 """Transfer the current resource to a new location. 

722 

723 Parameters 

724 ---------- 

725 src : `ButlerURI` 

726 Source URI. 

727 transfer : `str` 

728 Mode to use for transferring the resource. Generically there are 

729 many standard options: copy, link, symlink, hardlink, relsymlink. 

730 Not all URIs support all modes. 

731 overwrite : `bool`, optional 

732 Allow an existing file to be overwritten. Defaults to `False`. 

733 transaction : `DatastoreTransaction`, optional 

734 A transaction object that can (depending on implementation) 

735 rollback transfers on error. Not guaranteed to be implemented. 

736 

737 Notes 

738 ----- 

739 Conceptually this is hard to scale as the number of URI schemes 

740 grow. The destination URI is more important than the source URI 

741 since that is where all the transfer modes are relevant (with the 

742 complication that "move" deletes the source). 

743 

744 Local file to local file is the fundamental use case but every 

745 other scheme has to support "copy" to local file (with implicit 

746 support for "move") and copy from local file. 

747 All the "link" options tend to be specific to local file systems. 

748 

749 "move" is a "copy" where the remote resource is deleted at the end. 

750 Whether this works depends on the source URI rather than the 

751 destination URI. Reverting a move on transaction rollback is 

752 expected to be problematic if a remote resource was involved. 

753 """ 

754 raise NotImplementedError(f"No transfer modes supported by URI scheme {self.scheme}") 

755 

756 

757class ButlerFileURI(ButlerURI): 

758 """URI for explicit ``file`` scheme.""" 

759 

760 transferModes = ("copy", "link", "symlink", "hardlink", "relsymlink", "auto", "move") 

761 transferDefault: str = "link" 

762 

763 @property 

764 def ospath(self) -> str: 

765 """Path component of the URI localized to current OS. 

766 

767 Will unquote URI path since a formal URI must include the quoting. 

768 """ 

769 return urllib.parse.unquote(posix2os(self._uri.path)) 

770 

771 def exists(self) -> bool: 

772 # Uses os.path.exists so if there is a soft link that points 

773 # to a file that no longer exists this will return False 

774 return os.path.exists(self.ospath) 

775 

776 def size(self) -> int: 

777 if not os.path.isdir(self.ospath): 

778 stat = os.stat(self.ospath) 

779 sz = stat.st_size 

780 else: 

781 sz = 0 

782 return sz 

783 

784 def remove(self) -> None: 

785 """Remove the resource.""" 

786 os.remove(self.ospath) 

787 

788 def as_local(self) -> Tuple[str, bool]: 

789 """Return the local path of the file. 

790 

791 Returns 

792 ------- 

793 path : `str` 

794 The local path to this file. 

795 temporary : `bool` 

796 Always returns `False` (this is not a temporary file). 

797 """ 

798 return self.ospath, False 

799 

800 def _force_to_file(self) -> ButlerFileURI: 

801 """Force a schemeless URI to a file URI and returns a new URI. 

802 

803 Returns 

804 ------- 

805 file : `ButlerFileURI` 

806 A copy of the URI using file scheme. If already a file scheme 

807 the copy will be identical. 

808 

809 Raises 

810 ------ 

811 ValueError 

812 Raised if this URI is schemeless and relative path and so can 

813 not be forced to file absolute path without context. 

814 """ 

815 # This is always a file scheme so always return copy 

816 return copy.copy(self) 

817 

818 def relative_to(self, other: ButlerURI) -> Optional[str]: 

819 """Return the relative path from this URI to the other URI. 

820 

821 Parameters 

822 ---------- 

823 other : `ButlerURI` 

824 URI to use to calculate the relative path. Must be a parent 

825 of this URI. 

826 

827 Returns 

828 ------- 

829 subpath : `str` 

830 The sub path of this URI relative to the supplied other URI. 

831 Returns `None` if there is no parent child relationship. 

832 Scheme and netloc must match but for file URIs schemeless 

833 is also used. If this URI is a relative URI but the other is 

834 absolute, it is assumed to be in the parent completely unless it 

835 starts with ".." (in which case the path is combined and tested). 

836 If both URIs are relative, the relative paths are compared 

837 for commonality. 

838 

839 Notes 

840 ----- 

841 By definition a relative path will be relative to the enclosing 

842 absolute parent URI. It will be returned unchanged if it does not 

843 use a parent directory specification. 

844 """ 

845 # We know self is a file so check the other. Anything other than 

846 # file or schemeless means by definition these have no paths in common 

847 if other.scheme and other.scheme != "file": 

848 return None 

849 

850 # for case where both URIs are relative use the normal logic 

851 # where a/b/c.txt and a/b/ returns c.txt. 

852 if not self.isabs() and not other.isabs(): 

853 return super().relative_to(other) 

854 

855 # if we have a relative path convert it to absolute 

856 # relative to the supplied parent. This is solely to handle 

857 # the case where the relative path includes ".." but somehow 

858 # then goes back inside the directory of the parent 

859 if not self.isabs(): 

860 childUri = other.join(self.path) 

861 return childUri.relative_to(other) 

862 

863 # By this point if the schemes are identical we can use the 

864 # base class implementation. 

865 if self.scheme == other.scheme: 

866 return super().relative_to(other) 

867 

868 # if one is schemeless and the other is not the base implementation 

869 # will fail so we need to fix that -- they are both absolute so 

870 # forcing to file is fine. 

871 # Use a cast to convince mypy that other has to be a ButlerFileURI 

872 # in order to get to this part of the code. 

873 return self._force_to_file().relative_to(cast(ButlerFileURI, other)._force_to_file()) 

874 

875 def read(self, size: int = -1) -> bytes: 

876 # Docstring inherits 

877 with open(self.ospath, "rb") as fh: 

878 return fh.read(size) 

879 

880 def write(self, data: bytes, overwrite: bool = True) -> None: 

881 dir = os.path.dirname(self.ospath) 

882 if not os.path.exists(dir): 

883 safeMakeDir(dir) 

884 if overwrite: 

885 mode = "wb" 

886 else: 

887 mode = "xb" 

888 with open(self.ospath, mode) as f: 

889 f.write(data) 

890 

891 def mkdir(self) -> None: 

892 if not os.path.exists(self.ospath): 

893 safeMakeDir(self.ospath) 

894 elif not os.path.isdir(self.ospath): 

895 raise FileExistsError(f"URI {self} exists but is not a directory!") 

896 

897 def transfer_from(self, src: ButlerURI, transfer: str, 

898 overwrite: bool = False, 

899 transaction: Optional[Union[DatastoreTransaction, NoTransaction]] = None) -> None: 

900 """Transfer the current resource to a local file. 

901 

902 Parameters 

903 ---------- 

904 src : `ButlerURI` 

905 Source URI. 

906 transfer : `str` 

907 Mode to use for transferring the resource. Supports the following 

908 options: copy, link, symlink, hardlink, relsymlink. 

909 overwrite : `bool`, optional 

910 Allow an existing file to be overwritten. Defaults to `False`. 

911 transaction : `DatastoreTransaction`, optional 

912 If a transaction is provided, undo actions will be registered. 

913 """ 

914 # Fail early to prevent delays if remote resources are requested 

915 if transfer not in self.transferModes: 

916 raise ValueError(f"Transfer mode '{transfer}' not supported by URI scheme {self.scheme}") 

917 

918 log.debug(f"Transferring {src} [exists: {src.exists()}] -> " 

919 f"{self} [exists: {self.exists()}] (transfer={transfer})") 

920 

921 # We do not have to special case ButlerFileURI here because 

922 # as_local handles that. 

923 local_src, is_temporary = src.as_local() 

924 

925 # Default transfer mode depends on whether we have a temporary 

926 # file or not. 

927 if transfer == "auto": 

928 transfer = self.transferDefault if not is_temporary else "copy" 

929 

930 # Follow soft links 

931 local_src = os.path.realpath(os.path.normpath(local_src)) 

932 

933 if not os.path.exists(local_src): 

934 raise FileNotFoundError(f"Source URI {src} does not exist") 

935 

936 # All the modes involving linking use "link" somewhere 

937 if "link" in transfer and is_temporary: 

938 raise RuntimeError("Can not use local file system transfer mode" 

939 f" {transfer} for remote resource ({src})") 

940 

941 # For temporary files we can own them 

942 requested_transfer = transfer 

943 if is_temporary and transfer == "copy": 

944 transfer = "move" 

945 

946 # The output location should not exist 

947 dest_exists = self.exists() 

948 if not overwrite and dest_exists: 

949 raise FileExistsError(f"Destination path '{self}' already exists. Transfer " 

950 f"from {src} cannot be completed.") 

951 

952 # Make the path absolute (but don't follow links since that 

953 # would possibly cause us to end up in the wrong place if the 

954 # file existed already as a soft link) 

955 newFullPath = os.path.abspath(self.ospath) 

956 outputDir = os.path.dirname(newFullPath) 

957 if not os.path.isdir(outputDir): 

958 # Must create the directory -- this can not be rolled back 

959 # since another transfer running concurrently may 

960 # be relying on this existing. 

961 safeMakeDir(outputDir) 

962 

963 if transaction is None: 

964 # Use a no-op transaction to reduce code duplication 

965 transaction = NoTransaction() 

966 

967 # For links the OS doesn't let us overwrite so if something does 

968 # exist we have to remove it before we do the actual "transfer" below 

969 if "link" in transfer and overwrite and dest_exists: 

970 try: 

971 self.remove() 

972 except Exception: 

973 # If this fails we ignore it since it's a problem 

974 # that will manifest immediately below with a more relevant 

975 # error message 

976 pass 

977 

978 if transfer == "move": 

979 with transaction.undoWith(f"move from {local_src}", shutil.move, newFullPath, local_src): 

980 shutil.move(local_src, newFullPath) 

981 elif transfer == "copy": 

982 with transaction.undoWith(f"copy from {local_src}", os.remove, newFullPath): 

983 shutil.copy(local_src, newFullPath) 

984 elif transfer == "link": 

985 # Try hard link and if that fails use a symlink 

986 with transaction.undoWith(f"link to {local_src}", os.remove, newFullPath): 

987 try: 

988 os.link(local_src, newFullPath) 

989 except OSError: 

990 # Read through existing symlinks 

991 os.symlink(local_src, newFullPath) 

992 elif transfer == "hardlink": 

993 with transaction.undoWith(f"hardlink to {local_src}", os.remove, newFullPath): 

994 os.link(local_src, newFullPath) 

995 elif transfer == "symlink": 

996 # Read through existing symlinks 

997 with transaction.undoWith(f"symlink to {local_src}", os.remove, newFullPath): 

998 os.symlink(local_src, newFullPath) 

999 elif transfer == "relsymlink": 

1000 # This is a standard symlink but using a relative path 

1001 # Need the directory name to give to relative root 

1002 # A full file path confuses it into an extra ../ 

1003 newFullPathRoot = os.path.dirname(newFullPath) 

1004 relPath = os.path.relpath(local_src, newFullPathRoot) 

1005 with transaction.undoWith(f"relsymlink to {local_src}", os.remove, newFullPath): 

1006 os.symlink(relPath, newFullPath) 

1007 else: 

1008 raise NotImplementedError("Transfer type '{}' not supported.".format(transfer)) 

1009 

1010 # This was an explicit move requested from a remote resource 

1011 # try to remove that resource. We check is_temporary because 

1012 # the local file would have been moved by shutil.move already. 

1013 if requested_transfer == "move" and is_temporary: 

1014 # Transactions do not work here 

1015 src.remove() 

1016 

1017 if is_temporary and os.path.exists(local_src): 

1018 # This should never happen since we have moved it above 

1019 os.remove(local_src) 

1020 

1021 @staticmethod 

1022 def _fixupPathUri(parsed: urllib.parse.ParseResult, root: Optional[Union[str, ButlerURI]] = None, 

1023 forceAbsolute: bool = False, 

1024 forceDirectory: bool = False) -> Tuple[urllib.parse.ParseResult, bool]: 

1025 """Fix up relative paths in URI instances. 

1026 

1027 Parameters 

1028 ---------- 

1029 parsed : `~urllib.parse.ParseResult` 

1030 The result from parsing a URI using `urllib.parse`. 

1031 root : `str` or `ButlerURI`, optional 

1032 Path to use as root when converting relative to absolute. 

1033 If `None`, it will be the current working directory. This 

1034 is a local file system path, or a file URI. It is only used if 

1035 a file-scheme is used incorrectly with a relative path. 

1036 forceAbsolute : `bool`, ignored 

1037 Has no effect for this subclass. ``file`` URIs are always 

1038 absolute. 

1039 forceDirectory : `bool`, optional 

1040 If `True` forces the URI to end with a separator, otherwise given 

1041 URI is interpreted as is. 

1042 

1043 Returns 

1044 ------- 

1045 modified : `~urllib.parse.ParseResult` 

1046 Update result if a URI is being handled. 

1047 dirLike : `bool` 

1048 `True` if given parsed URI has a trailing separator or 

1049 forceDirectory is True. Otherwise `False`. 

1050 

1051 Notes 

1052 ----- 

1053 Relative paths are explicitly not supported by RFC8089 but `urllib` 

1054 does accept URIs of the form ``file:relative/path.ext``. They need 

1055 to be turned into absolute paths before they can be used. This is 

1056 always done regardless of the ``forceAbsolute`` parameter. 

1057 """ 

1058 # assume we are not dealing with a directory like URI 

1059 dirLike = False 

1060 

1061 # file URI implies POSIX path separators so split as POSIX, 

1062 # then join as os, and convert to abspath. Do not handle 

1063 # home directories since "file" scheme is explicitly documented 

1064 # to not do tilde expansion. 

1065 sep = posixpath.sep 

1066 

1067 # For local file system we can explicitly check to see if this 

1068 # really is a directory. The URI might point to a location that 

1069 # does not exists yet but all that matters is if it is a directory 

1070 # then we make sure use that fact. No need to do the check if 

1071 # we are already being told. 

1072 if not forceDirectory and posixpath.isdir(parsed.path): 

1073 forceDirectory = True 

1074 

1075 # For an absolute path all we need to do is check if we need 

1076 # to force the directory separator 

1077 if posixpath.isabs(parsed.path): 

1078 if forceDirectory: 

1079 if not parsed.path.endswith(sep): 

1080 parsed = parsed._replace(path=parsed.path+sep) 

1081 dirLike = True 

1082 return copy.copy(parsed), dirLike 

1083 

1084 # Relative path so must fix it to be compliant with the standard 

1085 

1086 # Replacement values for the URI 

1087 replacements = {} 

1088 

1089 if root is None: 

1090 root = os.path.abspath(os.path.curdir) 

1091 elif isinstance(root, ButlerURI): 

1092 if root.scheme and root.scheme != "file": 

1093 raise RuntimeError(f"The override root must be a file URI not {root.scheme}") 

1094 root = os.path.abspath(root.ospath) 

1095 

1096 replacements["path"] = posixpath.normpath(posixpath.join(os2posix(root), parsed.path)) 

1097 

1098 # normpath strips trailing "/" so put it back if necessary 

1099 # Acknowledge that trailing separator exists. 

1100 if forceDirectory or (parsed.path.endswith(sep) and not replacements["path"].endswith(sep)): 

1101 replacements["path"] += sep 

1102 dirLike = True 

1103 

1104 # ParseResult is a NamedTuple so _replace is standard API 

1105 parsed = parsed._replace(**replacements) 

1106 

1107 if parsed.params or parsed.query: 

1108 log.warning("Additional items unexpectedly encountered in file URI: %s", parsed.geturl()) 

1109 

1110 return parsed, dirLike 

1111 

1112 

1113class ButlerS3URI(ButlerURI): 

1114 """S3 URI""" 

1115 

1116 @property 

1117 def client(self) -> boto3.client: 

1118 """Client object to address remote resource.""" 

1119 # Defer import for circular dependencies 

1120 from .s3utils import getS3Client 

1121 return getS3Client() 

1122 

1123 def exists(self) -> bool: 

1124 # s3utils itself imports ButlerURI so defer this import 

1125 from .s3utils import s3CheckFileExists, bucketExists 

1126 if self.is_root: 

1127 # Only check for the bucket since the path is irrelevant 

1128 return bucketExists(self.netloc) 

1129 exists, _ = s3CheckFileExists(self, client=self.client) 

1130 return exists 

1131 

1132 def size(self) -> int: 

1133 # s3utils itself imports ButlerURI so defer this import 

1134 from .s3utils import s3CheckFileExists 

1135 if self.dirLike: 

1136 return 0 

1137 _, sz = s3CheckFileExists(self, client=self.client) 

1138 return sz 

1139 

1140 def remove(self) -> None: 

1141 """Remove the resource.""" 

1142 

1143 # https://github.com/boto/boto3/issues/507 - there is no 

1144 # way of knowing if the file was actually deleted except 

1145 # for checking all the keys again, reponse is HTTP 204 OK 

1146 # response all the time 

1147 self.client.delete_object(Bucket=self.netloc, Key=self.relativeToPathRoot) 

1148 

1149 def read(self, size: int = -1) -> bytes: 

1150 args = {} 

1151 if size > 0: 

1152 args["Range"] = f"bytes=0-{size-1}" 

1153 try: 

1154 response = self.client.get_object(Bucket=self.netloc, 

1155 Key=self.relativeToPathRoot, 

1156 **args) 

1157 except (self.client.exceptions.NoSuchKey, self.client.exceptions.NoSuchBucket) as err: 

1158 raise FileNotFoundError(f"No such resource: {self}") from err 

1159 body = response["Body"].read() 

1160 response["Body"].close() 

1161 return body 

1162 

1163 def write(self, data: bytes, overwrite: bool = True) -> None: 

1164 if not overwrite: 

1165 if self.exists(): 

1166 raise FileExistsError(f"Remote resource {self} exists and overwrite has been disabled") 

1167 self.client.put_object(Bucket=self.netloc, Key=self.relativeToPathRoot, 

1168 Body=data) 

1169 

1170 def mkdir(self) -> None: 

1171 # Defer import for circular dependencies 

1172 from .s3utils import bucketExists 

1173 if not bucketExists(self.netloc): 

1174 raise ValueError(f"Bucket {self.netloc} does not exist for {self}!") 

1175 

1176 if not self.dirLike: 

1177 raise ValueError(f"Can not create a 'directory' for file-like URI {self}") 

1178 

1179 # don't create S3 key when root is at the top-level of an Bucket 

1180 if not self.path == "/": 

1181 self.client.put_object(Bucket=self.netloc, Key=self.relativeToPathRoot) 

1182 

1183 def as_local(self) -> Tuple[str, bool]: 

1184 """Download object from S3 and place in temporary directory. 

1185 

1186 Returns 

1187 ------- 

1188 path : `str` 

1189 Path to local temporary file. 

1190 temporary : `bool` 

1191 Always returns `True`. This is always a temporary file. 

1192 """ 

1193 with tempfile.NamedTemporaryFile(suffix=self.getExtension(), delete=False) as tmpFile: 

1194 self.client.download_fileobj(self.netloc, self.relativeToPathRoot, tmpFile) 

1195 return tmpFile.name, True 

1196 

1197 def transfer_from(self, src: ButlerURI, transfer: str = "copy", 

1198 overwrite: bool = False, 

1199 transaction: Optional[Union[DatastoreTransaction, NoTransaction]] = None) -> None: 

1200 """Transfer the current resource to an S3 bucket. 

1201 

1202 Parameters 

1203 ---------- 

1204 src : `ButlerURI` 

1205 Source URI. 

1206 transfer : `str` 

1207 Mode to use for transferring the resource. Supports the following 

1208 options: copy. 

1209 overwrite : `bool`, optional 

1210 Allow an existing file to be overwritten. Defaults to `False`. 

1211 transaction : `DatastoreTransaction`, optional 

1212 Currently unused. 

1213 """ 

1214 # Fail early to prevent delays if remote resources are requested 

1215 if transfer not in self.transferModes: 

1216 raise ValueError(f"Transfer mode '{transfer}' not supported by URI scheme {self.scheme}") 

1217 

1218 log.debug(f"Transferring {src} [exists: {src.exists()}] -> " 

1219 f"{self} [exists: {self.exists()}] (transfer={transfer})") 

1220 

1221 if not overwrite and self.exists(): 

1222 raise FileExistsError(f"Destination path '{self}' already exists.") 

1223 

1224 if transfer == "auto": 

1225 transfer = self.transferDefault 

1226 

1227 if isinstance(src, type(self)): 

1228 # Looks like an S3 remote uri so we can use direct copy 

1229 # note that boto3.resource.meta.copy is cleverer than the low 

1230 # level copy_object 

1231 copy_source = { 

1232 "Bucket": src.netloc, 

1233 "Key": src.relativeToPathRoot, 

1234 } 

1235 self.client.copy_object(CopySource=copy_source, Bucket=self.netloc, Key=self.relativeToPathRoot) 

1236 else: 

1237 # Use local file and upload it 

1238 local_src, is_temporary = src.as_local() 

1239 

1240 # resource.meta.upload_file seems like the right thing 

1241 # but we have a low level client 

1242 with open(local_src, "rb") as fh: 

1243 self.client.put_object(Bucket=self.netloc, 

1244 Key=self.relativeToPathRoot, Body=fh) 

1245 if is_temporary: 

1246 os.remove(local_src) 

1247 

1248 # This was an explicit move requested from a remote resource 

1249 # try to remove that resource 

1250 if transfer == "move": 

1251 # Transactions do not work here 

1252 src.remove() 

1253 

1254 

1255class ButlerPackageResourceURI(ButlerURI): 

1256 """URI referring to a Python package resource. 

1257 

1258 These URIs look like: ``resource://lsst.daf.butler/configs/file.yaml`` 

1259 where the network location is the Python package and the path is the 

1260 resource name. 

1261 """ 

1262 

1263 def exists(self) -> bool: 

1264 """Check that the python resource exists.""" 

1265 return pkg_resources.resource_exists(self.netloc, self.relativeToPathRoot) 

1266 

1267 def read(self, size: int = -1) -> bytes: 

1268 with pkg_resources.resource_stream(self.netloc, self.relativeToPathRoot) as fh: 

1269 return fh.read(size) 

1270 

1271 

1272class ButlerHttpURI(ButlerURI): 

1273 """General HTTP(S) resource.""" 

1274 _session = requests.Session() 

1275 _sessionInitialized = False 

1276 

1277 @property 

1278 def session(self) -> requests.Session: 

1279 """Client object to address remote resource.""" 

1280 from .webdavutils import refreshToken, isTokenAuth, getHttpSession, isWebdavEndpoint 

1281 if ButlerHttpURI._sessionInitialized: 

1282 if isTokenAuth(): 

1283 refreshToken(ButlerHttpURI._session) 

1284 return ButlerHttpURI._session 

1285 

1286 baseURL = self.scheme + "://" + self.netloc 

1287 

1288 if isWebdavEndpoint(baseURL): 

1289 log.debug("%s looks like a Webdav endpoint.", baseURL) 

1290 s = getHttpSession() 

1291 

1292 ButlerHttpURI._session = s 

1293 ButlerHttpURI._sessionInitialized = True 

1294 return s 

1295 

1296 def exists(self) -> bool: 

1297 """Check that a remote HTTP resource exists.""" 

1298 log.debug("Checking if resource exists: %s", self.geturl()) 

1299 r = self.session.head(self.geturl()) 

1300 

1301 return True if r.status_code == 200 else False 

1302 

1303 def size(self) -> int: 

1304 if self.dirLike: 

1305 return 0 

1306 r = self.session.head(self.geturl()) 

1307 if r.status_code == 200: 

1308 return int(r.headers['Content-Length']) 

1309 else: 

1310 raise FileNotFoundError(f"Resource {self} does not exist") 

1311 

1312 def mkdir(self) -> None: 

1313 """For a dir-like URI, create the directory resource if it does not 

1314 already exist. 

1315 """ 

1316 if not self.dirLike: 

1317 raise ValueError(f"Can not create a 'directory' for file-like URI {self}") 

1318 

1319 if not self.exists(): 

1320 # We need to test the absence of the parent directory, 

1321 # but also if parent URL is different from self URL, 

1322 # otherwise we could be stuck in a recursive loop 

1323 # where self == parent 

1324 if not self.parent().exists() and self.parent().geturl() != self.geturl(): 

1325 self.parent().mkdir() 

1326 log.debug("Creating new directory: %s", self.geturl()) 

1327 r = self.session.request("MKCOL", self.geturl()) 

1328 if r.status_code != 201: 

1329 if r.status_code == 405: 

1330 log.debug("Can not create directory: %s may already exist: skipping.", self.geturl()) 

1331 else: 

1332 raise ValueError(f"Can not create directory {self}, status code: {r.status_code}") 

1333 

1334 def remove(self) -> None: 

1335 """Remove the resource.""" 

1336 log.debug("Removing resource: %s", self.geturl()) 

1337 r = self.session.delete(self.geturl()) 

1338 if r.status_code not in [200, 202, 204]: 

1339 raise FileNotFoundError(f"Unable to delete resource {self}; status code: {r.status_code}") 

1340 

1341 def as_local(self) -> Tuple[str, bool]: 

1342 """Download object over HTTP and place in temporary directory. 

1343 

1344 Returns 

1345 ------- 

1346 path : `str` 

1347 Path to local temporary file. 

1348 temporary : `bool` 

1349 Always returns `True`. This is always a temporary file. 

1350 """ 

1351 log.debug("Downloading remote resource as local file: %s", self.geturl()) 

1352 r = self.session.get(self.geturl(), stream=True) 

1353 if r.status_code != 200: 

1354 raise FileNotFoundError(f"Unable to download resource {self}; status code: {r.status_code}") 

1355 with tempfile.NamedTemporaryFile(suffix=self.getExtension(), delete=False) as tmpFile: 

1356 for chunk in r.iter_content(): 

1357 tmpFile.write(chunk) 

1358 return tmpFile.name, True 

1359 

1360 def read(self, size: int = -1) -> bytes: 

1361 """Open the resource and return the contents in bytes. 

1362 

1363 Parameters 

1364 ---------- 

1365 size : `int`, optional 

1366 The number of bytes to read. Negative or omitted indicates 

1367 that all data should be read. 

1368 """ 

1369 log.debug("Reading from remote resource: %s", self.geturl()) 

1370 stream = True if size > 0 else False 

1371 r = self.session.get(self.geturl(), stream=stream) 

1372 if r.status_code != 200: 

1373 raise FileNotFoundError(f"Unable to read resource {self}; status code: {r.status_code}") 

1374 if not stream: 

1375 return r.content 

1376 else: 

1377 return next(r.iter_content(chunk_size=size)) 

1378 

1379 def write(self, data: bytes, overwrite: bool = True) -> None: 

1380 """Write the supplied bytes to the new resource. 

1381 

1382 Parameters 

1383 ---------- 

1384 data : `bytes` 

1385 The bytes to write to the resource. The entire contents of the 

1386 resource will be replaced. 

1387 overwrite : `bool`, optional 

1388 If `True` the resource will be overwritten if it exists. Otherwise 

1389 the write will fail. 

1390 """ 

1391 from .webdavutils import finalurl 

1392 log.debug("Writing to remote resource: %s", self.geturl()) 

1393 if not overwrite: 

1394 if self.exists(): 

1395 raise FileExistsError(f"Remote resource {self} exists and overwrite has been disabled") 

1396 dest_url = finalurl(self._emptyPut()) 

1397 r = self.session.put(dest_url, data=data) 

1398 if r.status_code not in [201, 202, 204]: 

1399 raise ValueError(f"Can not write file {self}, status code: {r.status_code}") 

1400 

1401 def transfer_from(self, src: ButlerURI, transfer: str = "copy", 

1402 overwrite: bool = False, 

1403 transaction: Optional[Union[DatastoreTransaction, NoTransaction]] = None) -> None: 

1404 """Transfer the current resource to a Webdav repository. 

1405 

1406 Parameters 

1407 ---------- 

1408 src : `ButlerURI` 

1409 Source URI. 

1410 transfer : `str` 

1411 Mode to use for transferring the resource. Supports the following 

1412 options: copy. 

1413 transaction : `DatastoreTransaction`, optional 

1414 Currently unused. 

1415 """ 

1416 from .webdavutils import finalurl 

1417 # Fail early to prevent delays if remote resources are requested 

1418 if transfer not in self.transferModes: 

1419 raise ValueError(f"Transfer mode {transfer} not supported by URI scheme {self.scheme}") 

1420 

1421 log.debug(f"Transferring {src} [exists: {src.exists()}] -> " 

1422 f"{self} [exists: {self.exists()}] (transfer={transfer})") 

1423 

1424 if self.exists(): 

1425 raise FileExistsError(f"Destination path {self} already exists.") 

1426 

1427 if transfer == "auto": 

1428 transfer = self.transferDefault 

1429 

1430 if isinstance(src, type(self)): 

1431 if transfer == "move": 

1432 r = self.session.request("MOVE", src.geturl(), headers={"Destination": self.geturl()}) 

1433 log.debug("Running move via MOVE HTTP request.") 

1434 else: 

1435 r = self.session.request("COPY", src.geturl(), headers={"Destination": self.geturl()}) 

1436 log.debug("Running copy via COPY HTTP request.") 

1437 else: 

1438 # Use local file and upload it 

1439 local_src, is_temporary = src.as_local() 

1440 f = open(local_src, "rb") 

1441 dest_url = finalurl(self._emptyPut()) 

1442 r = self.session.put(dest_url, data=f) 

1443 f.close() 

1444 if is_temporary: 

1445 os.remove(local_src) 

1446 log.debug("Running transfer from a local copy of the file.") 

1447 

1448 if r.status_code not in [201, 202, 204]: 

1449 raise ValueError(f"Can not transfer file {self}, status code: {r.status_code}") 

1450 

1451 def _emptyPut(self) -> requests.Response: 

1452 """Send an empty PUT request to current URL. This is used to detect 

1453 if redirection is enabled before sending actual data. 

1454 

1455 Returns 

1456 ------- 

1457 response : `requests.Response` 

1458 HTTP Response from the endpoint. 

1459 """ 

1460 return self.session.put(self.geturl(), data=None, 

1461 headers={"Content-Length": "0"}, allow_redirects=False) 

1462 

1463 

1464class ButlerInMemoryURI(ButlerURI): 

1465 """Internal in-memory datastore URI (`mem://`). 

1466 

1467 Not used for any real purpose other than indicating that the dataset 

1468 is in memory. 

1469 """ 

1470 

1471 def exists(self) -> bool: 

1472 """Test for existence and always return False.""" 

1473 return True 

1474 

1475 def as_local(self) -> Tuple[str, bool]: 

1476 raise RuntimeError(f"Do not know how to retrieve data for URI '{self}'") 

1477 

1478 

1479class ButlerSchemelessURI(ButlerFileURI): 

1480 """Scheme-less URI referring to the local file system""" 

1481 

1482 _pathLib = PurePath 

1483 _pathModule = os.path 

1484 quotePaths = False 

1485 

1486 @property 

1487 def ospath(self) -> str: 

1488 """Path component of the URI localized to current OS.""" 

1489 return self.path 

1490 

1491 def isabs(self) -> bool: 

1492 """Indicate that the resource is fully specified. 

1493 

1494 For non-schemeless URIs this is always true. 

1495 

1496 Returns 

1497 ------- 

1498 isabs : `bool` 

1499 `True` if the file is absolute, `False` otherwise. 

1500 """ 

1501 return os.path.isabs(self.ospath) 

1502 

1503 def _force_to_file(self) -> ButlerFileURI: 

1504 """Force a schemeless URI to a file URI and returns a new URI. 

1505 

1506 This will include URI quoting of the path. 

1507 

1508 Returns 

1509 ------- 

1510 file : `ButlerFileURI` 

1511 A copy of the URI using file scheme. If already a file scheme 

1512 the copy will be identical. 

1513 

1514 Raises 

1515 ------ 

1516 ValueError 

1517 Raised if this URI is schemeless and relative path and so can 

1518 not be forced to file absolute path without context. 

1519 """ 

1520 if not self.isabs(): 

1521 raise RuntimeError(f"Internal error: Can not force {self} to absolute file URI") 

1522 uri = self._uri._replace(scheme="file", path=urllib.parse.quote(os2posix(self.path))) 

1523 # mypy really wants a ButlerFileURI to be returned here 

1524 return ButlerURI(uri, forceDirectory=self.dirLike) # type: ignore 

1525 

1526 @staticmethod 

1527 def _fixupPathUri(parsed: urllib.parse.ParseResult, root: Optional[Union[str, ButlerURI]] = None, 

1528 forceAbsolute: bool = False, 

1529 forceDirectory: bool = False) -> Tuple[urllib.parse.ParseResult, bool]: 

1530 """Fix up relative paths for local file system. 

1531 

1532 Parameters 

1533 ---------- 

1534 parsed : `~urllib.parse.ParseResult` 

1535 The result from parsing a URI using `urllib.parse`. 

1536 root : `str` or `ButlerURI`, optional 

1537 Path to use as root when converting relative to absolute. 

1538 If `None`, it will be the current working directory. This 

1539 is a local file system path, or a file URI. 

1540 forceAbsolute : `bool`, optional 

1541 If `True`, scheme-less relative URI will be converted to an 

1542 absolute path using a ``file`` scheme. If `False` scheme-less URI 

1543 will remain scheme-less and will not be updated to ``file`` or 

1544 absolute path. 

1545 forceDirectory : `bool`, optional 

1546 If `True` forces the URI to end with a separator, otherwise given 

1547 URI is interpreted as is. 

1548 

1549 Returns 

1550 ------- 

1551 modified : `~urllib.parse.ParseResult` 

1552 Update result if a URI is being handled. 

1553 dirLike : `bool` 

1554 `True` if given parsed URI has a trailing separator or 

1555 forceDirectory is True. Otherwise `False`. 

1556 

1557 Notes 

1558 ----- 

1559 Relative paths are explicitly not supported by RFC8089 but `urllib` 

1560 does accept URIs of the form ``file:relative/path.ext``. They need 

1561 to be turned into absolute paths before they can be used. This is 

1562 always done regardless of the ``forceAbsolute`` parameter. 

1563 

1564 Scheme-less paths are normalized. 

1565 """ 

1566 # assume we are not dealing with a directory URI 

1567 dirLike = False 

1568 

1569 # Replacement values for the URI 

1570 replacements = {} 

1571 

1572 if root is None: 

1573 root = os.path.abspath(os.path.curdir) 

1574 elif isinstance(root, ButlerURI): 

1575 if root.scheme and root.scheme != "file": 

1576 raise RuntimeError(f"The override root must be a file URI not {root.scheme}") 

1577 root = os.path.abspath(root.ospath) 

1578 

1579 # this is a local OS file path which can support tilde expansion. 

1580 # we quoted it in the constructor so unquote here 

1581 expandedPath = os.path.expanduser(urllib.parse.unquote(parsed.path)) 

1582 

1583 # Ensure that this becomes a file URI if it is already absolute 

1584 if os.path.isabs(expandedPath): 

1585 replacements["scheme"] = "file" 

1586 # Keep in OS form for now to simplify later logic 

1587 replacements["path"] = os.path.normpath(expandedPath) 

1588 elif forceAbsolute: 

1589 # This can stay in OS path form, do not change to file 

1590 # scheme. 

1591 replacements["path"] = os.path.normpath(os.path.join(root, expandedPath)) 

1592 else: 

1593 # No change needed for relative local path staying relative 

1594 # except normalization 

1595 replacements["path"] = os.path.normpath(expandedPath) 

1596 # normalization of empty path returns "." so we are dirLike 

1597 if expandedPath == "": 

1598 dirLike = True 

1599 

1600 # normpath strips trailing "/" which makes it hard to keep 

1601 # track of directory vs file when calling replaceFile 

1602 

1603 # For local file system we can explicitly check to see if this 

1604 # really is a directory. The URI might point to a location that 

1605 # does not exists yet but all that matters is if it is a directory 

1606 # then we make sure use that fact. No need to do the check if 

1607 # we are already being told. 

1608 if not forceDirectory and os.path.isdir(replacements["path"]): 

1609 forceDirectory = True 

1610 

1611 # add the trailing separator only if explicitly required or 

1612 # if it was stripped by normpath. Acknowledge that trailing 

1613 # separator exists. 

1614 endsOnSep = expandedPath.endswith(os.sep) and not replacements["path"].endswith(os.sep) 

1615 if (forceDirectory or endsOnSep or dirLike): 

1616 dirLike = True 

1617 if not replacements["path"].endswith(os.sep): 

1618 replacements["path"] += os.sep 

1619 

1620 if "scheme" in replacements: 

1621 # This is now meant to be a URI path so force to posix 

1622 # and quote 

1623 replacements["path"] = urllib.parse.quote(os2posix(replacements["path"])) 

1624 

1625 # ParseResult is a NamedTuple so _replace is standard API 

1626 parsed = parsed._replace(**replacements) 

1627 

1628 if parsed.params or parsed.fragment or parsed.query: 

1629 log.warning("Additional items unexpectedly encountered in schemeless URI: %s", parsed.geturl()) 

1630 

1631 return parsed, dirLike