Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("ButlerURI",) 

25 

26import contextlib 

27import os 

28import os.path 

29import shutil 

30import urllib 

31import pkg_resources 

32import posixpath 

33from pathlib import Path, PurePath, PurePosixPath 

34import requests 

35import tempfile 

36import copy 

37import logging 

38import re 

39 

40from typing import ( 

41 TYPE_CHECKING, 

42 Any, 

43 Callable, 

44 cast, 

45 Iterator, 

46 Optional, 

47 Tuple, 

48 Type, 

49 Union, 

50) 

51 

52from .utils import safeMakeDir 

53 

54if TYPE_CHECKING: 54 ↛ 55line 54 didn't jump to line 55, because the condition on line 54 was never true

55 try: 

56 import boto3 

57 except ImportError: 

58 pass 

59 from .datastore import DatastoreTransaction 

60 

61 

62log = logging.getLogger(__name__) 

63 

64# Determine if the path separator for the OS looks like POSIX 

65IS_POSIX = os.sep == posixpath.sep 

66 

67# Root path for this operating system 

68OS_ROOT_PATH = Path().resolve().root 

69 

70# Regex for looking for URI escapes 

71ESCAPES_RE = re.compile(r"%[A-F0-9]{2}") 

72 

73 

74def os2posix(ospath: str) -> str: 

75 """Convert a local path description to a POSIX path description. 

76 

77 Parameters 

78 ---------- 

79 ospath : `str` 

80 Path using the local path separator. 

81 

82 Returns 

83 ------- 

84 posix : `str` 

85 Path using POSIX path separator 

86 """ 

87 if IS_POSIX: 

88 return ospath 

89 

90 posix = PurePath(ospath).as_posix() 

91 

92 # PurePath strips trailing "/" from paths such that you can no 

93 # longer tell if a path is meant to be referring to a directory 

94 # Try to fix this. 

95 if ospath.endswith(os.sep) and not posix.endswith(posixpath.sep): 

96 posix += posixpath.sep 

97 

98 return posix 

99 

100 

101def posix2os(posix: Union[PurePath, str]) -> str: 

102 """Convert a POSIX path description to a local path description. 

103 

104 Parameters 

105 ---------- 

106 posix : `str`, `PurePath` 

107 Path using the POSIX path separator. 

108 

109 Returns 

110 ------- 

111 ospath : `str` 

112 Path using OS path separator 

113 """ 

114 if IS_POSIX: 

115 return str(posix) 

116 

117 posixPath = PurePosixPath(posix) 

118 paths = list(posixPath.parts) 

119 

120 # Have to convert the root directory after splitting 

121 if paths[0] == posixPath.root: 

122 paths[0] = OS_ROOT_PATH 

123 

124 # Trailing "/" is stripped so we need to add back an empty path 

125 # for consistency 

126 if str(posix).endswith(posixpath.sep): 

127 paths.append("") 

128 

129 return os.path.join(*paths) 

130 

131 

132class NoTransaction: 

133 """A simple emulation of the `DatastoreTransaction` class. 

134 

135 Does nothing. 

136 """ 

137 

138 def __init__(self) -> None: 

139 return 

140 

141 @contextlib.contextmanager 

142 def undoWith(self, name: str, undoFunc: Callable, *args: Any, **kwargs: Any) -> Iterator[None]: 

143 """No-op context manager to replace `DatastoreTransaction` 

144 """ 

145 yield None 

146 

147 

148class ButlerURI: 

149 """Convenience wrapper around URI parsers. 

150 

151 Provides access to URI components and can convert file 

152 paths into absolute path URIs. Scheme-less URIs are treated as if 

153 they are local file system paths and are converted to absolute URIs. 

154 

155 A specialist subclass is created for each supported URI scheme. 

156 

157 Parameters 

158 ---------- 

159 uri : `str` or `urllib.parse.ParseResult` 

160 URI in string form. Can be scheme-less if referring to a local 

161 filesystem path. 

162 root : `str` or `ButlerURI`, optional 

163 When fixing up a relative path in a ``file`` scheme or if scheme-less, 

164 use this as the root. Must be absolute. If `None` the current 

165 working directory will be used. Can be a file URI. 

166 forceAbsolute : `bool`, optional 

167 If `True`, scheme-less relative URI will be converted to an absolute 

168 path using a ``file`` scheme. If `False` scheme-less URI will remain 

169 scheme-less and will not be updated to ``file`` or absolute path. 

170 forceDirectory: `bool`, optional 

171 If `True` forces the URI to end with a separator, otherwise given URI 

172 is interpreted as is. 

173 """ 

174 

175 _pathLib: Type[PurePath] = PurePosixPath 

176 """Path library to use for this scheme.""" 

177 

178 _pathModule = posixpath 

179 """Path module to use for this scheme.""" 

180 

181 transferModes: Tuple[str, ...] = ("copy", "auto", "move") 

182 """Transfer modes supported by this implementation. 

183 

184 Move is special in that it is generally a copy followed by an unlink. 

185 Whether that unlink works depends critically on whether the source URI 

186 implements unlink. If it does not the move will be reported as a failure. 

187 """ 

188 

189 transferDefault: str = "copy" 

190 """Default mode to use for transferring if ``auto`` is specified.""" 

191 

192 quotePaths = True 

193 """True if path-like elements modifying a URI should be quoted. 

194 

195 All non-schemeless URIs have to internally use quoted paths. Therefore 

196 if a new file name is given (e.g. to updateFile or join) a decision must 

197 be made whether to quote it to be consistent. 

198 """ 

199 

200 # This is not an ABC with abstract methods because the __new__ being 

201 # a factory confuses mypy such that it assumes that every constructor 

202 # returns a ButlerURI and then determines that all the abstract methods 

203 # are still abstract. If they are not marked abstract but just raise 

204 # mypy is fine with it. 

205 

206 # mypy is confused without this 

207 _uri: urllib.parse.ParseResult 

208 

209 def __new__(cls, uri: Union[str, urllib.parse.ParseResult, ButlerURI], 

210 root: Optional[Union[str, ButlerURI]] = None, forceAbsolute: bool = True, 

211 forceDirectory: bool = False) -> ButlerURI: 

212 parsed: urllib.parse.ParseResult 

213 dirLike: bool 

214 subclass: Optional[Type] = None 

215 

216 # Record if we need to post process the URI components 

217 # or if the instance is already fully configured 

218 if isinstance(uri, str): 

219 # Since local file names can have special characters in them 

220 # we need to quote them for the parser but we can unquote 

221 # later. Assume that all other URI schemes are quoted. 

222 # Since sometimes people write file:/a/b and not file:///a/b 

223 # we should not quote in the explicit case of file: 

224 if "://" not in uri and not uri.startswith("file:"): 

225 if ESCAPES_RE.search(uri): 

226 log.warning("Possible double encoding of %s", uri) 

227 else: 

228 uri = urllib.parse.quote(uri) 

229 parsed = urllib.parse.urlparse(uri) 

230 elif isinstance(uri, urllib.parse.ParseResult): 

231 parsed = copy.copy(uri) 

232 elif isinstance(uri, ButlerURI): 

233 parsed = copy.copy(uri._uri) 

234 dirLike = uri.dirLike 

235 # No further parsing required and we know the subclass 

236 subclass = type(uri) 

237 else: 

238 raise ValueError(f"Supplied URI must be string, ButlerURI, or ParseResult but got '{uri!r}'") 

239 

240 if subclass is None: 

241 # Work out the subclass from the URI scheme 

242 if not parsed.scheme: 

243 subclass = ButlerSchemelessURI 

244 elif parsed.scheme == "file": 

245 subclass = ButlerFileURI 

246 elif parsed.scheme == "s3": 

247 subclass = ButlerS3URI 

248 elif parsed.scheme.startswith("http"): 

249 subclass = ButlerHttpURI 

250 elif parsed.scheme == "resource": 

251 # Rules for scheme names disasllow pkg_resource 

252 subclass = ButlerPackageResourceURI 

253 elif parsed.scheme == "mem": 

254 # in-memory datastore object 

255 subclass = ButlerInMemoryURI 

256 else: 

257 raise NotImplementedError(f"No URI support for scheme: '{parsed.scheme}'" 

258 " in {parsed.geturl()}") 

259 

260 parsed, dirLike = subclass._fixupPathUri(parsed, root=root, 

261 forceAbsolute=forceAbsolute, 

262 forceDirectory=forceDirectory) 

263 

264 # It is possible for the class to change from schemeless 

265 # to file so handle that 

266 if parsed.scheme == "file": 

267 subclass = ButlerFileURI 

268 

269 # Now create an instance of the correct subclass and set the 

270 # attributes directly 

271 self = object.__new__(subclass) 

272 self._uri = parsed 

273 self.dirLike = dirLike 

274 return self 

275 

276 @property 

277 def scheme(self) -> str: 

278 """The URI scheme (``://`` is not part of the scheme).""" 

279 return self._uri.scheme 

280 

281 @property 

282 def netloc(self) -> str: 

283 """The URI network location.""" 

284 return self._uri.netloc 

285 

286 @property 

287 def path(self) -> str: 

288 """The path component of the URI.""" 

289 return self._uri.path 

290 

291 @property 

292 def unquoted_path(self) -> str: 

293 """The path component of the URI with any URI quoting reversed.""" 

294 return urllib.parse.unquote(self._uri.path) 

295 

296 @property 

297 def ospath(self) -> str: 

298 """Path component of the URI localized to current OS.""" 

299 raise AttributeError(f"Non-file URI ({self}) has no local OS path.") 

300 

301 @property 

302 def relativeToPathRoot(self) -> str: 

303 """Returns path relative to network location. 

304 

305 Effectively, this is the path property with posix separator stripped 

306 from the left hand side of the path. 

307 

308 Always unquotes. 

309 """ 

310 p = self._pathLib(self.path) 

311 relToRoot = str(p.relative_to(p.root)) 

312 if self.dirLike and not relToRoot.endswith("/"): 

313 relToRoot += "/" 

314 return urllib.parse.unquote(relToRoot) 

315 

316 @property 

317 def fragment(self) -> str: 

318 """The fragment component of the URI.""" 

319 return self._uri.fragment 

320 

321 @property 

322 def params(self) -> str: 

323 """Any parameters included in the URI.""" 

324 return self._uri.params 

325 

326 @property 

327 def query(self) -> str: 

328 """Any query strings included in the URI.""" 

329 return self._uri.query 

330 

331 def geturl(self) -> str: 

332 """Return the URI in string form. 

333 

334 Returns 

335 ------- 

336 url : `str` 

337 String form of URI. 

338 """ 

339 return self._uri.geturl() 

340 

341 def split(self) -> Tuple[ButlerURI, str]: 

342 """Splits URI into head and tail. Equivalent to os.path.split where 

343 head preserves the URI components. 

344 

345 Returns 

346 ------- 

347 head: `ButlerURI` 

348 Everything leading up to tail, expanded and normalized as per 

349 ButlerURI rules. 

350 tail : `str` 

351 Last `self.path` component. Tail will be empty if path ends on a 

352 separator. Tail will never contain separators. It will be 

353 unquoted. 

354 """ 

355 head, tail = self._pathModule.split(self.path) 

356 headuri = self._uri._replace(path=head) 

357 

358 # The file part should never include quoted metacharacters 

359 tail = urllib.parse.unquote(tail) 

360 

361 # Schemeless is special in that it can be a relative path 

362 # We need to ensure that it stays that way. All other URIs will 

363 # be absolute already. 

364 forceAbsolute = self._pathModule.isabs(self.path) 

365 return ButlerURI(headuri, forceDirectory=True, forceAbsolute=forceAbsolute), tail 

366 

367 def basename(self) -> str: 

368 """Returns the base name, last element of path, of the URI. If URI ends 

369 on a slash returns an empty string. This is the second element returned 

370 by split(). 

371 

372 Equivalent of os.path.basename(). 

373 

374 Returns 

375 ------- 

376 tail : `str` 

377 Last part of the path attribute. Trail will be empty if path ends 

378 on a separator. 

379 """ 

380 return self.split()[1] 

381 

382 def dirname(self) -> ButlerURI: 

383 """Returns a ButlerURI containing all the directories of the path 

384 attribute. 

385 

386 Equivalent of os.path.dirname() 

387 

388 Returns 

389 ------- 

390 head : `ButlerURI` 

391 Everything except the tail of path attribute, expanded and 

392 normalized as per ButlerURI rules. 

393 """ 

394 return self.split()[0] 

395 

396 def parent(self) -> ButlerURI: 

397 """Returns a ButlerURI containing all the directories of the path 

398 attribute, minus the last one. 

399 

400 Returns 

401 ------- 

402 head : `ButlerURI` 

403 Everything except the tail of path attribute, expanded and 

404 normalized as per ButlerURI rules. 

405 """ 

406 # When self is file-like, return self.dirname() 

407 if not self.dirLike: 

408 return self.dirname() 

409 # When self is dir-like, return its parent directory, 

410 # regardless of the presence of a trailing separator 

411 originalPath = self._pathLib(self.path) 

412 parentPath = originalPath.parent 

413 parentURI = self._uri._replace(path=str(parentPath)) 

414 

415 return ButlerURI(parentURI, forceDirectory=True) 

416 

417 def replace(self, **kwargs: Any) -> ButlerURI: 

418 """Replace components in a URI with new values and return a new 

419 instance. 

420 

421 Returns 

422 ------- 

423 new : `ButlerURI` 

424 New `ButlerURI` object with updated values. 

425 """ 

426 return self.__class__(self._uri._replace(**kwargs)) 

427 

428 def updateFile(self, newfile: str) -> None: 

429 """Update in place the final component of the path with the supplied 

430 file name. 

431 

432 Parameters 

433 ---------- 

434 newfile : `str` 

435 File name with no path component. 

436 

437 Notes 

438 ----- 

439 Updates the URI in place. 

440 Updates the ButlerURI.dirLike attribute. The new file path will 

441 be quoted if necessary. 

442 """ 

443 if self.quotePaths: 

444 newfile = urllib.parse.quote(newfile) 

445 dir, _ = self._pathModule.split(self.path) 

446 newpath = self._pathModule.join(dir, newfile) 

447 

448 self.dirLike = False 

449 self._uri = self._uri._replace(path=newpath) 

450 

451 def getExtension(self) -> str: 

452 """Return the file extension(s) associated with this URI path. 

453 

454 Returns 

455 ------- 

456 ext : `str` 

457 The file extension (including the ``.``). Can be empty string 

458 if there is no file extension. Usually returns only the last 

459 file extension unless there is a special extension modifier 

460 indicating file compression, in which case the combined 

461 extension (e.g. ``.fits.gz``) will be returned. 

462 """ 

463 special = {".gz", ".bz2", ".xz", ".fz"} 

464 

465 extensions = self._pathLib(self.path).suffixes 

466 

467 if not extensions: 

468 return "" 

469 

470 ext = extensions.pop() 

471 

472 # Multiple extensions, decide whether to include the final two 

473 if extensions and ext in special: 

474 ext = f"{extensions[-1]}{ext}" 

475 

476 return ext 

477 

478 def join(self, path: str) -> ButlerURI: 

479 """Create a new `ButlerURI` with additional path components including 

480 a file. 

481 

482 Parameters 

483 ---------- 

484 path : `str` 

485 Additional file components to append to the current URI. Assumed 

486 to include a file at the end. Will be quoted depending on the 

487 associated URI scheme. 

488 

489 Returns 

490 ------- 

491 new : `ButlerURI` 

492 New URI with any file at the end replaced with the new path 

493 components. 

494 

495 Notes 

496 ----- 

497 Schemeless URIs assume local path separator but all other URIs assume 

498 POSIX separator if the supplied path has directory structure. It 

499 may be this never becomes a problem but datastore templates assume 

500 POSIX separator is being used. 

501 """ 

502 new = self.dirname() # By definition a directory URI 

503 

504 # new should be asked about quoting, not self, since dirname can 

505 # change the URI scheme for schemeless -> file 

506 if new.quotePaths: 

507 path = urllib.parse.quote(path) 

508 

509 newpath = self._pathModule.normpath(self._pathModule.join(new.path, path)) 

510 new._uri = new._uri._replace(path=newpath) 

511 # Declare the new URI not be dirLike unless path ended in / 

512 if not path.endswith(self._pathModule.sep): 

513 new.dirLike = False 

514 return new 

515 

516 def relative_to(self, other: ButlerURI) -> Optional[str]: 

517 """Return the relative path from this URI to the other URI. 

518 

519 Parameters 

520 ---------- 

521 other : `ButlerURI` 

522 URI to use to calculate the relative path. Must be a parent 

523 of this URI. 

524 

525 Returns 

526 ------- 

527 subpath : `str` 

528 The sub path of this URI relative to the supplied other URI. 

529 Returns `None` if there is no parent child relationship. 

530 Scheme and netloc must match. 

531 """ 

532 if self.scheme != other.scheme or self.netloc != other.netloc: 

533 return None 

534 

535 enclosed_path = self._pathLib(self.relativeToPathRoot) 

536 parent_path = other.relativeToPathRoot 

537 subpath: Optional[str] 

538 try: 

539 subpath = str(enclosed_path.relative_to(parent_path)) 

540 except ValueError: 

541 subpath = None 

542 else: 

543 subpath = urllib.parse.unquote(subpath) 

544 return subpath 

545 

546 def exists(self) -> bool: 

547 """Indicate that the resource is available. 

548 

549 Returns 

550 ------- 

551 exists : `bool` 

552 `True` if the resource exists. 

553 """ 

554 raise NotImplementedError() 

555 

556 def remove(self) -> None: 

557 """Remove the resource.""" 

558 raise NotImplementedError() 

559 

560 def isabs(self) -> bool: 

561 """Indicate that the resource is fully specified. 

562 

563 For non-schemeless URIs this is always true. 

564 

565 Returns 

566 ------- 

567 isabs : `bool` 

568 `True` in all cases except schemeless URI. 

569 """ 

570 return True 

571 

572 def as_local(self) -> Tuple[str, bool]: 

573 """Return the location of the (possibly remote) resource in the 

574 local file system. 

575 

576 Returns 

577 ------- 

578 path : `str` 

579 If this is a remote resource, it will be a copy of the resource 

580 on the local file system, probably in a temporary directory. 

581 For a local resource this should be the actual path to the 

582 resource. 

583 is_temporary : `bool` 

584 Indicates if the local path is a temporary file or not. 

585 """ 

586 raise NotImplementedError() 

587 

588 def read(self, size: int = -1) -> bytes: 

589 """Open the resource and return the contents in bytes. 

590 

591 Parameters 

592 ---------- 

593 size : `int`, optional 

594 The number of bytes to read. Negative or omitted indicates 

595 that all data should be read. 

596 """ 

597 raise NotImplementedError() 

598 

599 def write(self, data: bytes, overwrite: bool = True) -> None: 

600 """Write the supplied bytes to the new resource. 

601 

602 Parameters 

603 ---------- 

604 data : `bytes` 

605 The bytes to write to the resource. The entire contents of the 

606 resource will be replaced. 

607 overwrite : `bool`, optional 

608 If `True` the resource will be overwritten if it exists. Otherwise 

609 the write will fail. 

610 """ 

611 raise NotImplementedError() 

612 

613 def mkdir(self) -> None: 

614 """For a dir-like URI, create the directory resource if it does not 

615 already exist. 

616 """ 

617 raise NotImplementedError() 

618 

619 def size(self) -> int: 

620 """For non-dir-like URI, return the size of the resource. 

621 

622 Returns 

623 ------- 

624 sz : `int` 

625 The size in bytes of the resource associated with this URI. 

626 Returns 0 if dir-like. 

627 """ 

628 raise NotImplementedError() 

629 

630 def __str__(self) -> str: 

631 return self.geturl() 

632 

633 def __repr__(self) -> str: 

634 return f'ButlerURI("{self.geturl()}")' 

635 

636 def __eq__(self, other: Any) -> bool: 

637 if not isinstance(other, ButlerURI): 

638 return False 

639 return self.geturl() == other.geturl() 

640 

641 def __copy__(self) -> ButlerURI: 

642 # Implement here because the __new__ method confuses things 

643 return type(self)(str(self)) 

644 

645 def __deepcopy__(self, memo: Any) -> ButlerURI: 

646 # Implement here because the __new__ method confuses things 

647 return self.__copy__() 

648 

649 def __getnewargs__(self) -> Tuple: 

650 return (str(self),) 

651 

652 @staticmethod 

653 def _fixupPathUri(parsed: urllib.parse.ParseResult, root: Optional[Union[str, ButlerURI]] = None, 

654 forceAbsolute: bool = False, 

655 forceDirectory: bool = False) -> Tuple[urllib.parse.ParseResult, bool]: 

656 """Correct any issues with the supplied URI. 

657 

658 Parameters 

659 ---------- 

660 parsed : `~urllib.parse.ParseResult` 

661 The result from parsing a URI using `urllib.parse`. 

662 root : `str` or `ButlerURI`, ignored 

663 Not used by the this implementation since all URIs are 

664 absolute except for those representing the local file system. 

665 forceAbsolute : `bool`, ignored. 

666 Not used by this implementation. URIs are generally always 

667 absolute. 

668 forceDirectory : `bool`, optional 

669 If `True` forces the URI to end with a separator, otherwise given 

670 URI is interpreted as is. Specifying that the URI is conceptually 

671 equivalent to a directory can break some ambiguities when 

672 interpreting the last element of a path. 

673 

674 Returns 

675 ------- 

676 modified : `~urllib.parse.ParseResult` 

677 Update result if a URI is being handled. 

678 dirLike : `bool` 

679 `True` if given parsed URI has a trailing separator or 

680 forceDirectory is True. Otherwise `False`. 

681 

682 Notes 

683 ----- 

684 Relative paths are explicitly not supported by RFC8089 but `urllib` 

685 does accept URIs of the form ``file:relative/path.ext``. They need 

686 to be turned into absolute paths before they can be used. This is 

687 always done regardless of the ``forceAbsolute`` parameter. 

688 

689 AWS S3 differentiates between keys with trailing POSIX separators (i.e 

690 `/dir` and `/dir/`) whereas POSIX does not neccessarily. 

691 

692 Scheme-less paths are normalized. 

693 """ 

694 # assume we are not dealing with a directory like URI 

695 dirLike = False 

696 

697 # URI is dir-like if explicitly stated or if it ends on a separator 

698 endsOnSep = parsed.path.endswith(posixpath.sep) 

699 if forceDirectory or endsOnSep: 

700 dirLike = True 

701 # only add the separator if it's not already there 

702 if not endsOnSep: 

703 parsed = parsed._replace(path=parsed.path+posixpath.sep) 

704 

705 return parsed, dirLike 

706 

707 def transfer_from(self, src: ButlerURI, transfer: str, 

708 overwrite: bool = False, 

709 transaction: Optional[Union[DatastoreTransaction, NoTransaction]] = None) -> None: 

710 """Transfer the current resource to a new location. 

711 

712 Parameters 

713 ---------- 

714 src : `ButlerURI` 

715 Source URI. 

716 transfer : `str` 

717 Mode to use for transferring the resource. Generically there are 

718 many standard options: copy, link, symlink, hardlink, relsymlink. 

719 Not all URIs support all modes. 

720 overwrite : `bool`, optional 

721 Allow an existing file to be overwritten. Defaults to `False`. 

722 transaction : `DatastoreTransaction`, optional 

723 A transaction object that can (depending on implementation) 

724 rollback transfers on error. Not guaranteed to be implemented. 

725 

726 Notes 

727 ----- 

728 Conceptually this is hard to scale as the number of URI schemes 

729 grow. The destination URI is more important than the source URI 

730 since that is where all the transfer modes are relevant (with the 

731 complication that "move" deletes the source). 

732 

733 Local file to local file is the fundamental use case but every 

734 other scheme has to support "copy" to local file (with implicit 

735 support for "move") and copy from local file. 

736 All the "link" options tend to be specific to local file systems. 

737 

738 "move" is a "copy" where the remote resource is deleted at the end. 

739 Whether this works depends on the source URI rather than the 

740 destination URI. Reverting a move on transaction rollback is 

741 expected to be problematic if a remote resource was involved. 

742 """ 

743 raise NotImplementedError(f"No transfer modes supported by URI scheme {self.scheme}") 

744 

745 

746class ButlerFileURI(ButlerURI): 

747 """URI for explicit ``file`` scheme.""" 

748 

749 transferModes = ("copy", "link", "symlink", "hardlink", "relsymlink", "auto", "move") 

750 transferDefault: str = "link" 

751 

752 @property 

753 def ospath(self) -> str: 

754 """Path component of the URI localized to current OS. 

755 

756 Will unquote URI path since a formal URI must include the quoting. 

757 """ 

758 return urllib.parse.unquote(posix2os(self._uri.path)) 

759 

760 def exists(self) -> bool: 

761 # Uses os.path.exists so if there is a soft link that points 

762 # to a file that no longer exists this will return False 

763 return os.path.exists(self.ospath) 

764 

765 def size(self) -> int: 

766 if not os.path.isdir(self.ospath): 

767 stat = os.stat(self.ospath) 

768 sz = stat.st_size 

769 else: 

770 sz = 0 

771 return sz 

772 

773 def remove(self) -> None: 

774 """Remove the resource.""" 

775 os.remove(self.ospath) 

776 

777 def as_local(self) -> Tuple[str, bool]: 

778 """Return the local path of the file. 

779 

780 Returns 

781 ------- 

782 path : `str` 

783 The local path to this file. 

784 temporary : `bool` 

785 Always returns `False` (this is not a temporary file). 

786 """ 

787 return self.ospath, False 

788 

789 def _force_to_file(self) -> ButlerFileURI: 

790 """Force a schemeless URI to a file URI and returns a new URI. 

791 

792 Returns 

793 ------- 

794 file : `ButlerFileURI` 

795 A copy of the URI using file scheme. If already a file scheme 

796 the copy will be identical. 

797 

798 Raises 

799 ------ 

800 ValueError 

801 Raised if this URI is schemeless and relative path and so can 

802 not be forced to file absolute path without context. 

803 """ 

804 # This is always a file scheme so always return copy 

805 return copy.copy(self) 

806 

807 def relative_to(self, other: ButlerURI) -> Optional[str]: 

808 """Return the relative path from this URI to the other URI. 

809 

810 Parameters 

811 ---------- 

812 other : `ButlerURI` 

813 URI to use to calculate the relative path. Must be a parent 

814 of this URI. 

815 

816 Returns 

817 ------- 

818 subpath : `str` 

819 The sub path of this URI relative to the supplied other URI. 

820 Returns `None` if there is no parent child relationship. 

821 Scheme and netloc must match but for file URIs schemeless 

822 is also used. If this URI is a relative URI but the other is 

823 absolute, it is assumed to be in the parent completely unless it 

824 starts with ".." (in which case the path is combined and tested). 

825 If both URIs are relative, the relative paths are compared 

826 for commonality. 

827 

828 Notes 

829 ----- 

830 By definition a relative path will be relative to the enclosing 

831 absolute parent URI. It will be returned unchanged if it does not 

832 use a parent directory specification. 

833 """ 

834 # We know self is a file so check the other. Anything other than 

835 # file or schemeless means by definition these have no paths in common 

836 if other.scheme and other.scheme != "file": 

837 return None 

838 

839 # for case where both URIs are relative use the normal logic 

840 # where a/b/c.txt and a/b/ returns c.txt. 

841 if not self.isabs() and not other.isabs(): 

842 return super().relative_to(other) 

843 

844 # if we have a relative path convert it to absolute 

845 # relative to the supplied parent. This is solely to handle 

846 # the case where the relative path includes ".." but somehow 

847 # then goes back inside the directory of the parent 

848 if not self.isabs(): 

849 childUri = other.join(self.path) 

850 return childUri.relative_to(other) 

851 

852 # By this point if the schemes are identical we can use the 

853 # base class implementation. 

854 if self.scheme == other.scheme: 

855 return super().relative_to(other) 

856 

857 # if one is schemeless and the other is not the base implementation 

858 # will fail so we need to fix that -- they are both absolute so 

859 # forcing to file is fine. 

860 # Use a cast to convince mypy that other has to be a ButlerFileURI 

861 # in order to get to this part of the code. 

862 return self._force_to_file().relative_to(cast(ButlerFileURI, other)._force_to_file()) 

863 

864 def read(self, size: int = -1) -> bytes: 

865 # Docstring inherits 

866 with open(self.ospath, "rb") as fh: 

867 return fh.read(size) 

868 

869 def write(self, data: bytes, overwrite: bool = True) -> None: 

870 dir = os.path.dirname(self.ospath) 

871 if not os.path.exists(dir): 

872 safeMakeDir(dir) 

873 if overwrite: 

874 mode = "wb" 

875 else: 

876 mode = "xb" 

877 with open(self.ospath, mode) as f: 

878 f.write(data) 

879 

880 def mkdir(self) -> None: 

881 if not os.path.exists(self.ospath): 

882 safeMakeDir(self.ospath) 

883 elif not os.path.isdir(self.ospath): 

884 raise FileExistsError(f"URI {self} exists but is not a directory!") 

885 

886 def transfer_from(self, src: ButlerURI, transfer: str, 

887 overwrite: bool = False, 

888 transaction: Optional[Union[DatastoreTransaction, NoTransaction]] = None) -> None: 

889 """Transfer the current resource to a local file. 

890 

891 Parameters 

892 ---------- 

893 src : `ButlerURI` 

894 Source URI. 

895 transfer : `str` 

896 Mode to use for transferring the resource. Supports the following 

897 options: copy, link, symlink, hardlink, relsymlink. 

898 overwrite : `bool`, optional 

899 Allow an existing file to be overwritten. Defaults to `False`. 

900 transaction : `DatastoreTransaction`, optional 

901 If a transaction is provided, undo actions will be registered. 

902 """ 

903 # Fail early to prevent delays if remote resources are requested 

904 if transfer not in self.transferModes: 

905 raise ValueError(f"Transfer mode '{transfer}' not supported by URI scheme {self.scheme}") 

906 

907 log.debug(f"Transferring {src} [exists: {src.exists()}] -> " 

908 f"{self} [exists: {self.exists()}] (transfer={transfer})") 

909 

910 # We do not have to special case ButlerFileURI here because 

911 # as_local handles that. 

912 local_src, is_temporary = src.as_local() 

913 

914 # Default transfer mode depends on whether we have a temporary 

915 # file or not. 

916 if transfer == "auto": 

917 transfer = self.transferDefault if not is_temporary else "copy" 

918 

919 # Follow soft links 

920 local_src = os.path.realpath(os.path.normpath(local_src)) 

921 

922 if not os.path.exists(local_src): 

923 raise FileNotFoundError(f"Source URI {src} does not exist") 

924 

925 # All the modes involving linking use "link" somewhere 

926 if "link" in transfer and is_temporary: 

927 raise RuntimeError("Can not use local file system transfer mode" 

928 f" {transfer} for remote resource ({src})") 

929 

930 # For temporary files we can own them 

931 requested_transfer = transfer 

932 if is_temporary and transfer == "copy": 

933 transfer = "move" 

934 

935 # The output location should not exist 

936 dest_exists = self.exists() 

937 if not overwrite and dest_exists: 

938 raise FileExistsError(f"Destination path '{self}' already exists. Transfer " 

939 f"from {src} cannot be completed.") 

940 

941 # Make the path absolute (but don't follow links since that 

942 # would possibly cause us to end up in the wrong place if the 

943 # file existed already as a soft link) 

944 newFullPath = os.path.abspath(self.ospath) 

945 outputDir = os.path.dirname(newFullPath) 

946 if not os.path.isdir(outputDir): 

947 # Must create the directory -- this can not be rolled back 

948 # since another transfer running concurrently may 

949 # be relying on this existing. 

950 safeMakeDir(outputDir) 

951 

952 if transaction is None: 

953 # Use a no-op transaction to reduce code duplication 

954 transaction = NoTransaction() 

955 

956 # For links the OS doesn't let us overwrite so if something does 

957 # exist we have to remove it before we do the actual "transfer" below 

958 if "link" in transfer and overwrite and dest_exists: 

959 try: 

960 self.remove() 

961 except Exception: 

962 # If this fails we ignore it since it's a problem 

963 # that will manifest immediately below with a more relevant 

964 # error message 

965 pass 

966 

967 if transfer == "move": 

968 with transaction.undoWith(f"move from {local_src}", shutil.move, newFullPath, local_src): 

969 shutil.move(local_src, newFullPath) 

970 elif transfer == "copy": 

971 with transaction.undoWith(f"copy from {local_src}", os.remove, newFullPath): 

972 shutil.copy(local_src, newFullPath) 

973 elif transfer == "link": 

974 # Try hard link and if that fails use a symlink 

975 with transaction.undoWith(f"link to {local_src}", os.remove, newFullPath): 

976 try: 

977 os.link(local_src, newFullPath) 

978 except OSError: 

979 # Read through existing symlinks 

980 os.symlink(local_src, newFullPath) 

981 elif transfer == "hardlink": 

982 with transaction.undoWith(f"hardlink to {local_src}", os.remove, newFullPath): 

983 os.link(local_src, newFullPath) 

984 elif transfer == "symlink": 

985 # Read through existing symlinks 

986 with transaction.undoWith(f"symlink to {local_src}", os.remove, newFullPath): 

987 os.symlink(local_src, newFullPath) 

988 elif transfer == "relsymlink": 

989 # This is a standard symlink but using a relative path 

990 # Need the directory name to give to relative root 

991 # A full file path confuses it into an extra ../ 

992 newFullPathRoot = os.path.dirname(newFullPath) 

993 relPath = os.path.relpath(local_src, newFullPathRoot) 

994 with transaction.undoWith(f"relsymlink to {local_src}", os.remove, newFullPath): 

995 os.symlink(relPath, newFullPath) 

996 else: 

997 raise NotImplementedError("Transfer type '{}' not supported.".format(transfer)) 

998 

999 # This was an explicit move requested from a remote resource 

1000 # try to remove that resource. We check is_temporary because 

1001 # the local file would have been moved by shutil.move already. 

1002 if requested_transfer == "move" and is_temporary: 

1003 # Transactions do not work here 

1004 src.remove() 

1005 

1006 if is_temporary and os.path.exists(local_src): 

1007 # This should never happen since we have moved it above 

1008 os.remove(local_src) 

1009 

1010 @staticmethod 

1011 def _fixupPathUri(parsed: urllib.parse.ParseResult, root: Optional[Union[str, ButlerURI]] = None, 

1012 forceAbsolute: bool = False, 

1013 forceDirectory: bool = False) -> Tuple[urllib.parse.ParseResult, bool]: 

1014 """Fix up relative paths in URI instances. 

1015 

1016 Parameters 

1017 ---------- 

1018 parsed : `~urllib.parse.ParseResult` 

1019 The result from parsing a URI using `urllib.parse`. 

1020 root : `str` or `ButlerURI`, optional 

1021 Path to use as root when converting relative to absolute. 

1022 If `None`, it will be the current working directory. This 

1023 is a local file system path, or a file URI. It is only used if 

1024 a file-scheme is used incorrectly with a relative path. 

1025 forceAbsolute : `bool`, ignored 

1026 Has no effect for this subclass. ``file`` URIs are always 

1027 absolute. 

1028 forceDirectory : `bool`, optional 

1029 If `True` forces the URI to end with a separator, otherwise given 

1030 URI is interpreted as is. 

1031 

1032 Returns 

1033 ------- 

1034 modified : `~urllib.parse.ParseResult` 

1035 Update result if a URI is being handled. 

1036 dirLike : `bool` 

1037 `True` if given parsed URI has a trailing separator or 

1038 forceDirectory is True. Otherwise `False`. 

1039 

1040 Notes 

1041 ----- 

1042 Relative paths are explicitly not supported by RFC8089 but `urllib` 

1043 does accept URIs of the form ``file:relative/path.ext``. They need 

1044 to be turned into absolute paths before they can be used. This is 

1045 always done regardless of the ``forceAbsolute`` parameter. 

1046 """ 

1047 # assume we are not dealing with a directory like URI 

1048 dirLike = False 

1049 

1050 # file URI implies POSIX path separators so split as POSIX, 

1051 # then join as os, and convert to abspath. Do not handle 

1052 # home directories since "file" scheme is explicitly documented 

1053 # to not do tilde expansion. 

1054 sep = posixpath.sep 

1055 

1056 # For local file system we can explicitly check to see if this 

1057 # really is a directory. The URI might point to a location that 

1058 # does not exists yet but all that matters is if it is a directory 

1059 # then we make sure use that fact. No need to do the check if 

1060 # we are already being told. 

1061 if not forceDirectory and posixpath.isdir(parsed.path): 

1062 forceDirectory = True 

1063 

1064 # For an absolute path all we need to do is check if we need 

1065 # to force the directory separator 

1066 if posixpath.isabs(parsed.path): 

1067 if forceDirectory: 

1068 if not parsed.path.endswith(sep): 

1069 parsed = parsed._replace(path=parsed.path+sep) 

1070 dirLike = True 

1071 return copy.copy(parsed), dirLike 

1072 

1073 # Relative path so must fix it to be compliant with the standard 

1074 

1075 # Replacement values for the URI 

1076 replacements = {} 

1077 

1078 if root is None: 

1079 root = os.path.abspath(os.path.curdir) 

1080 elif isinstance(root, ButlerURI): 

1081 if root.scheme and root.scheme != "file": 

1082 raise RuntimeError(f"The override root must be a file URI not {root.scheme}") 

1083 root = os.path.abspath(root.ospath) 

1084 

1085 replacements["path"] = posixpath.normpath(posixpath.join(os2posix(root), parsed.path)) 

1086 

1087 # normpath strips trailing "/" so put it back if necessary 

1088 # Acknowledge that trailing separator exists. 

1089 if forceDirectory or (parsed.path.endswith(sep) and not replacements["path"].endswith(sep)): 

1090 replacements["path"] += sep 

1091 dirLike = True 

1092 

1093 # ParseResult is a NamedTuple so _replace is standard API 

1094 parsed = parsed._replace(**replacements) 

1095 

1096 if parsed.params or parsed.query: 

1097 log.warning("Additional items unexpectedly encountered in file URI: %s", parsed.geturl()) 

1098 

1099 return parsed, dirLike 

1100 

1101 

1102class ButlerS3URI(ButlerURI): 

1103 """S3 URI""" 

1104 

1105 @property 

1106 def client(self) -> boto3.client: 

1107 """Client object to address remote resource.""" 

1108 # Defer import for circular dependencies 

1109 from .s3utils import getS3Client 

1110 return getS3Client() 

1111 

1112 def exists(self) -> bool: 

1113 # s3utils itself imports ButlerURI so defer this import 

1114 from .s3utils import s3CheckFileExists 

1115 exists, _ = s3CheckFileExists(self, client=self.client) 

1116 return exists 

1117 

1118 def size(self) -> int: 

1119 # s3utils itself imports ButlerURI so defer this import 

1120 from .s3utils import s3CheckFileExists 

1121 if self.dirLike: 

1122 return 0 

1123 _, sz = s3CheckFileExists(self, client=self.client) 

1124 return sz 

1125 

1126 def remove(self) -> None: 

1127 """Remove the resource.""" 

1128 

1129 # https://github.com/boto/boto3/issues/507 - there is no 

1130 # way of knowing if the file was actually deleted except 

1131 # for checking all the keys again, reponse is HTTP 204 OK 

1132 # response all the time 

1133 self.client.delete_object(Bucket=self.netloc, Key=self.relativeToPathRoot) 

1134 

1135 def read(self, size: int = -1) -> bytes: 

1136 args = {} 

1137 if size > 0: 

1138 args["Range"] = f"bytes=0-{size-1}" 

1139 try: 

1140 response = self.client.get_object(Bucket=self.netloc, 

1141 Key=self.relativeToPathRoot, 

1142 **args) 

1143 except (self.client.exceptions.NoSuchKey, self.client.exceptions.NoSuchBucket) as err: 

1144 raise FileNotFoundError(f"No such resource: {self}") from err 

1145 body = response["Body"].read() 

1146 response["Body"].close() 

1147 return body 

1148 

1149 def write(self, data: bytes, overwrite: bool = True) -> None: 

1150 if not overwrite: 

1151 if self.exists(): 

1152 raise FileExistsError(f"Remote resource {self} exists and overwrite has been disabled") 

1153 self.client.put_object(Bucket=self.netloc, Key=self.relativeToPathRoot, 

1154 Body=data) 

1155 

1156 def mkdir(self) -> None: 

1157 # Defer import for circular dependencies 

1158 from .s3utils import bucketExists 

1159 if not bucketExists(self.netloc): 

1160 raise ValueError(f"Bucket {self.netloc} does not exist for {self}!") 

1161 

1162 if not self.dirLike: 

1163 raise ValueError(f"Can not create a 'directory' for file-like URI {self}") 

1164 

1165 # don't create S3 key when root is at the top-level of an Bucket 

1166 if not self.path == "/": 

1167 self.client.put_object(Bucket=self.netloc, Key=self.relativeToPathRoot) 

1168 

1169 def as_local(self) -> Tuple[str, bool]: 

1170 """Download object from S3 and place in temporary directory. 

1171 

1172 Returns 

1173 ------- 

1174 path : `str` 

1175 Path to local temporary file. 

1176 temporary : `bool` 

1177 Always returns `True`. This is always a temporary file. 

1178 """ 

1179 with tempfile.NamedTemporaryFile(suffix=self.getExtension(), delete=False) as tmpFile: 

1180 self.client.download_fileobj(self.netloc, self.relativeToPathRoot, tmpFile) 

1181 return tmpFile.name, True 

1182 

1183 def transfer_from(self, src: ButlerURI, transfer: str = "copy", 

1184 overwrite: bool = False, 

1185 transaction: Optional[Union[DatastoreTransaction, NoTransaction]] = None) -> None: 

1186 """Transfer the current resource to an S3 bucket. 

1187 

1188 Parameters 

1189 ---------- 

1190 src : `ButlerURI` 

1191 Source URI. 

1192 transfer : `str` 

1193 Mode to use for transferring the resource. Supports the following 

1194 options: copy. 

1195 overwrite : `bool`, optional 

1196 Allow an existing file to be overwritten. Defaults to `False`. 

1197 transaction : `DatastoreTransaction`, optional 

1198 Currently unused. 

1199 """ 

1200 # Fail early to prevent delays if remote resources are requested 

1201 if transfer not in self.transferModes: 

1202 raise ValueError(f"Transfer mode '{transfer}' not supported by URI scheme {self.scheme}") 

1203 

1204 log.debug(f"Transferring {src} [exists: {src.exists()}] -> " 

1205 f"{self} [exists: {self.exists()}] (transfer={transfer})") 

1206 

1207 if not overwrite and self.exists(): 

1208 raise FileExistsError(f"Destination path '{self}' already exists.") 

1209 

1210 if transfer == "auto": 

1211 transfer = self.transferDefault 

1212 

1213 if isinstance(src, type(self)): 

1214 # Looks like an S3 remote uri so we can use direct copy 

1215 # note that boto3.resource.meta.copy is cleverer than the low 

1216 # level copy_object 

1217 copy_source = { 

1218 "Bucket": src.netloc, 

1219 "Key": src.relativeToPathRoot, 

1220 } 

1221 self.client.copy_object(CopySource=copy_source, Bucket=self.netloc, Key=self.relativeToPathRoot) 

1222 else: 

1223 # Use local file and upload it 

1224 local_src, is_temporary = src.as_local() 

1225 

1226 # resource.meta.upload_file seems like the right thing 

1227 # but we have a low level client 

1228 with open(local_src, "rb") as fh: 

1229 self.client.put_object(Bucket=self.netloc, 

1230 Key=self.relativeToPathRoot, Body=fh) 

1231 if is_temporary: 

1232 os.remove(local_src) 

1233 

1234 # This was an explicit move requested from a remote resource 

1235 # try to remove that resource 

1236 if transfer == "move": 

1237 # Transactions do not work here 

1238 src.remove() 

1239 

1240 

1241class ButlerPackageResourceURI(ButlerURI): 

1242 """URI referring to a Python package resource. 

1243 

1244 These URIs look like: ``resource://lsst.daf.butler/configs/file.yaml`` 

1245 where the network location is the Python package and the path is the 

1246 resource name. 

1247 """ 

1248 

1249 def exists(self) -> bool: 

1250 """Check that the python resource exists.""" 

1251 return pkg_resources.resource_exists(self.netloc, self.relativeToPathRoot) 

1252 

1253 def read(self, size: int = -1) -> bytes: 

1254 with pkg_resources.resource_stream(self.netloc, self.relativeToPathRoot) as fh: 

1255 return fh.read(size) 

1256 

1257 

1258class ButlerHttpURI(ButlerURI): 

1259 """General HTTP(S) resource.""" 

1260 

1261 @property 

1262 def session(self) -> requests.Session: 

1263 """Client object to address remote resource.""" 

1264 from .webdavutils import getHttpSession, isWebdavEndpoint 

1265 baseURL = self.scheme + "://" + self.netloc 

1266 if isWebdavEndpoint(baseURL): 

1267 log.debug("%s looks like a Webdav endpoint.", baseURL) 

1268 return getHttpSession() 

1269 

1270 log.debug("%s looks like a standard HTTP endpoint.", baseURL) 

1271 return requests.Session() 

1272 

1273 def exists(self) -> bool: 

1274 """Check that a remote HTTP resource exists.""" 

1275 log.debug("Checking if resource exists: %s", self.geturl()) 

1276 r = self.session.head(self.geturl()) 

1277 

1278 return True if r.status_code == 200 else False 

1279 

1280 def size(self) -> int: 

1281 if self.dirLike: 

1282 return 0 

1283 r = self.session.head(self.geturl()) 

1284 if r.status_code == 200: 

1285 return int(r.headers['Content-Length']) 

1286 else: 

1287 raise FileNotFoundError(f"Resource {self} does not exist") 

1288 

1289 def mkdir(self) -> None: 

1290 """For a dir-like URI, create the directory resource if it does not 

1291 already exist. 

1292 """ 

1293 if not self.dirLike: 

1294 raise ValueError(f"Can not create a 'directory' for file-like URI {self}") 

1295 

1296 if not self.exists(): 

1297 # We need to test the absence of the parent directory, 

1298 # but also if parent URL is different from self URL, 

1299 # otherwise we could be stuck in a recursive loop 

1300 # where self == parent 

1301 if not self.parent().exists() and self.parent().geturl() != self.geturl(): 

1302 self.parent().mkdir() 

1303 log.debug("Creating new directory: %s", self.geturl()) 

1304 r = self.session.request("MKCOL", self.geturl()) 

1305 if r.status_code != 201: 

1306 raise ValueError(f"Can not create directory {self}, status code: {r.status_code}") 

1307 

1308 def remove(self) -> None: 

1309 """Remove the resource.""" 

1310 log.debug("Removing resource: %s", self.geturl()) 

1311 r = self.session.delete(self.geturl()) 

1312 if r.status_code not in [200, 202, 204]: 

1313 raise FileNotFoundError(f"Unable to delete resource {self}; status code: {r.status_code}") 

1314 

1315 def as_local(self) -> Tuple[str, bool]: 

1316 """Download object over HTTP and place in temporary directory. 

1317 

1318 Returns 

1319 ------- 

1320 path : `str` 

1321 Path to local temporary file. 

1322 temporary : `bool` 

1323 Always returns `True`. This is always a temporary file. 

1324 """ 

1325 log.debug("Downloading remote resource as local file: %s", self.geturl()) 

1326 r = self.session.get(self.geturl(), stream=True) 

1327 if r.status_code != 200: 

1328 raise FileNotFoundError(f"Unable to download resource {self}; status code: {r.status_code}") 

1329 with tempfile.NamedTemporaryFile(suffix=self.getExtension(), delete=False) as tmpFile: 

1330 for chunk in r.iter_content(): 

1331 tmpFile.write(chunk) 

1332 return tmpFile.name, True 

1333 

1334 def read(self, size: int = -1) -> bytes: 

1335 """Open the resource and return the contents in bytes. 

1336 

1337 Parameters 

1338 ---------- 

1339 size : `int`, optional 

1340 The number of bytes to read. Negative or omitted indicates 

1341 that all data should be read. 

1342 """ 

1343 log.debug("Reading from remote resource: %s", self.geturl()) 

1344 stream = True if size > 0 else False 

1345 r = self.session.get(self.geturl(), stream=stream) 

1346 if r.status_code != 200: 

1347 raise FileNotFoundError(f"Unable to read resource {self}; status code: {r.status_code}") 

1348 if not stream: 

1349 return r.content 

1350 else: 

1351 return next(r.iter_content(chunk_size=size)) 

1352 

1353 def write(self, data: bytes, overwrite: bool = True) -> None: 

1354 """Write the supplied bytes to the new resource. 

1355 

1356 Parameters 

1357 ---------- 

1358 data : `bytes` 

1359 The bytes to write to the resource. The entire contents of the 

1360 resource will be replaced. 

1361 overwrite : `bool`, optional 

1362 If `True` the resource will be overwritten if it exists. Otherwise 

1363 the write will fail. 

1364 """ 

1365 log.debug("Writing to remote resource: %s", self.geturl()) 

1366 if not overwrite: 

1367 if self.exists(): 

1368 raise FileExistsError(f"Remote resource {self} exists and overwrite has been disabled") 

1369 self.session.put(self.geturl(), data=data) 

1370 

1371 def transfer_from(self, src: ButlerURI, transfer: str = "copy", 

1372 overwrite: bool = False, 

1373 transaction: Optional[Union[DatastoreTransaction, NoTransaction]] = None) -> None: 

1374 """Transfer the current resource to a Webdav repository. 

1375 

1376 Parameters 

1377 ---------- 

1378 src : `ButlerURI` 

1379 Source URI. 

1380 transfer : `str` 

1381 Mode to use for transferring the resource. Supports the following 

1382 options: copy. 

1383 transaction : `DatastoreTransaction`, optional 

1384 Currently unused. 

1385 """ 

1386 # Fail early to prevent delays if remote resources are requested 

1387 if transfer not in self.transferModes: 

1388 raise ValueError(f"Transfer mode {transfer} not supported by URI scheme {self.scheme}") 

1389 

1390 log.debug(f"Transferring {src} [exists: {src.exists()}] -> " 

1391 f"{self} [exists: {self.exists()}] (transfer={transfer})") 

1392 

1393 if self.exists(): 

1394 raise FileExistsError(f"Destination path {self} already exists.") 

1395 

1396 if transfer == "auto": 

1397 transfer = self.transferDefault 

1398 

1399 if isinstance(src, type(self)): 

1400 if transfer == "move": 

1401 self.session.request("MOVE", src.geturl(), headers={"Destination": self.geturl()}) 

1402 log.debug("Direct move via MOVE operation executed.") 

1403 else: 

1404 self.session.request("COPY", src.geturl(), headers={"Destination": self.geturl()}) 

1405 log.debug("Direct copy via COPY operation executed.") 

1406 else: 

1407 # Use local file and upload it 

1408 local_src, is_temporary = src.as_local() 

1409 f = open(local_src, "rb") 

1410 self.session.put(self.geturl(), data=f) 

1411 f.close() 

1412 if is_temporary: 

1413 os.remove(local_src) 

1414 log.debug("Indirect copy via temporary file executed.") 

1415 

1416 

1417class ButlerInMemoryURI(ButlerURI): 

1418 """Internal in-memory datastore URI (`mem://`). 

1419 

1420 Not used for any real purpose other than indicating that the dataset 

1421 is in memory. 

1422 """ 

1423 

1424 def exists(self) -> bool: 

1425 """Test for existence and always return False.""" 

1426 return True 

1427 

1428 def as_local(self) -> Tuple[str, bool]: 

1429 raise RuntimeError(f"Do not know how to retrieve data for URI '{self}'") 

1430 

1431 

1432class ButlerSchemelessURI(ButlerFileURI): 

1433 """Scheme-less URI referring to the local file system""" 

1434 

1435 _pathLib = PurePath 

1436 _pathModule = os.path 

1437 quotePaths = False 

1438 

1439 @property 

1440 def ospath(self) -> str: 

1441 """Path component of the URI localized to current OS.""" 

1442 return self.path 

1443 

1444 def isabs(self) -> bool: 

1445 """Indicate that the resource is fully specified. 

1446 

1447 For non-schemeless URIs this is always true. 

1448 

1449 Returns 

1450 ------- 

1451 isabs : `bool` 

1452 `True` if the file is absolute, `False` otherwise. 

1453 """ 

1454 return os.path.isabs(self.ospath) 

1455 

1456 def _force_to_file(self) -> ButlerFileURI: 

1457 """Force a schemeless URI to a file URI and returns a new URI. 

1458 

1459 This will include URI quoting of the path. 

1460 

1461 Returns 

1462 ------- 

1463 file : `ButlerFileURI` 

1464 A copy of the URI using file scheme. If already a file scheme 

1465 the copy will be identical. 

1466 

1467 Raises 

1468 ------ 

1469 ValueError 

1470 Raised if this URI is schemeless and relative path and so can 

1471 not be forced to file absolute path without context. 

1472 """ 

1473 if not self.isabs(): 

1474 raise RuntimeError(f"Internal error: Can not force {self} to absolute file URI") 

1475 uri = self._uri._replace(scheme="file", path=urllib.parse.quote(os2posix(self.path))) 

1476 # mypy really wants a ButlerFileURI to be returned here 

1477 return ButlerURI(uri, forceDirectory=self.dirLike) # type: ignore 

1478 

1479 @staticmethod 

1480 def _fixupPathUri(parsed: urllib.parse.ParseResult, root: Optional[Union[str, ButlerURI]] = None, 

1481 forceAbsolute: bool = False, 

1482 forceDirectory: bool = False) -> Tuple[urllib.parse.ParseResult, bool]: 

1483 """Fix up relative paths for local file system. 

1484 

1485 Parameters 

1486 ---------- 

1487 parsed : `~urllib.parse.ParseResult` 

1488 The result from parsing a URI using `urllib.parse`. 

1489 root : `str` or `ButlerURI`, optional 

1490 Path to use as root when converting relative to absolute. 

1491 If `None`, it will be the current working directory. This 

1492 is a local file system path, or a file URI. 

1493 forceAbsolute : `bool`, optional 

1494 If `True`, scheme-less relative URI will be converted to an 

1495 absolute path using a ``file`` scheme. If `False` scheme-less URI 

1496 will remain scheme-less and will not be updated to ``file`` or 

1497 absolute path. 

1498 forceDirectory : `bool`, optional 

1499 If `True` forces the URI to end with a separator, otherwise given 

1500 URI is interpreted as is. 

1501 

1502 Returns 

1503 ------- 

1504 modified : `~urllib.parse.ParseResult` 

1505 Update result if a URI is being handled. 

1506 dirLike : `bool` 

1507 `True` if given parsed URI has a trailing separator or 

1508 forceDirectory is True. Otherwise `False`. 

1509 

1510 Notes 

1511 ----- 

1512 Relative paths are explicitly not supported by RFC8089 but `urllib` 

1513 does accept URIs of the form ``file:relative/path.ext``. They need 

1514 to be turned into absolute paths before they can be used. This is 

1515 always done regardless of the ``forceAbsolute`` parameter. 

1516 

1517 Scheme-less paths are normalized. 

1518 """ 

1519 # assume we are not dealing with a directory URI 

1520 dirLike = False 

1521 

1522 # Replacement values for the URI 

1523 replacements = {} 

1524 

1525 if root is None: 

1526 root = os.path.abspath(os.path.curdir) 

1527 elif isinstance(root, ButlerURI): 

1528 if root.scheme and root.scheme != "file": 

1529 raise RuntimeError(f"The override root must be a file URI not {root.scheme}") 

1530 root = os.path.abspath(root.ospath) 

1531 

1532 # this is a local OS file path which can support tilde expansion. 

1533 # we quoted it in the constructor so unquote here 

1534 expandedPath = os.path.expanduser(urllib.parse.unquote(parsed.path)) 

1535 

1536 # Ensure that this becomes a file URI if it is already absolute 

1537 if os.path.isabs(expandedPath): 

1538 replacements["scheme"] = "file" 

1539 # Keep in OS form for now to simplify later logic 

1540 replacements["path"] = os.path.normpath(expandedPath) 

1541 elif forceAbsolute: 

1542 # This can stay in OS path form, do not change to file 

1543 # scheme. 

1544 replacements["path"] = os.path.normpath(os.path.join(root, expandedPath)) 

1545 else: 

1546 # No change needed for relative local path staying relative 

1547 # except normalization 

1548 replacements["path"] = os.path.normpath(expandedPath) 

1549 # normalization of empty path returns "." so we are dirLike 

1550 if expandedPath == "": 

1551 dirLike = True 

1552 

1553 # normpath strips trailing "/" which makes it hard to keep 

1554 # track of directory vs file when calling replaceFile 

1555 

1556 # For local file system we can explicitly check to see if this 

1557 # really is a directory. The URI might point to a location that 

1558 # does not exists yet but all that matters is if it is a directory 

1559 # then we make sure use that fact. No need to do the check if 

1560 # we are already being told. 

1561 if not forceDirectory and os.path.isdir(replacements["path"]): 

1562 forceDirectory = True 

1563 

1564 # add the trailing separator only if explicitly required or 

1565 # if it was stripped by normpath. Acknowledge that trailing 

1566 # separator exists. 

1567 endsOnSep = expandedPath.endswith(os.sep) and not replacements["path"].endswith(os.sep) 

1568 if (forceDirectory or endsOnSep or dirLike): 

1569 dirLike = True 

1570 if not replacements["path"].endswith(os.sep): 

1571 replacements["path"] += os.sep 

1572 

1573 if "scheme" in replacements: 

1574 # This is now meant to be a URI path so force to posix 

1575 # and quote 

1576 replacements["path"] = urllib.parse.quote(os2posix(replacements["path"])) 

1577 

1578 # ParseResult is a NamedTuple so _replace is standard API 

1579 parsed = parsed._replace(**replacements) 

1580 

1581 if parsed.params or parsed.fragment or parsed.query: 

1582 log.warning("Additional items unexpectedly encountered in schemeless URI: %s", parsed.geturl()) 

1583 

1584 return parsed, dirLike